diff --git a/libs/eigen/.clang-format b/libs/eigen/.clang-format
new file mode 100644
index 0000000..28251c6
--- /dev/null
+++ b/libs/eigen/.clang-format
@@ -0,0 +1,12 @@
+---
+Language:     Cpp
+BasedOnStyle: Google
+ColumnLimit:  120
+SortIncludes: false
+AttributeMacros:
+- EIGEN_STRONG_INLINE
+- EIGEN_ALWAYS_INLINE
+- EIGEN_DEVICE_FUNC
+- EIGEN_DONT_INLINE
+- EIGEN_DEPRECATED
+- EIGEN_UNUSED
diff --git a/libs/eigen/CMakeLists.txt b/libs/eigen/CMakeLists.txt
index f3e69b8..a57caee 100644
--- a/libs/eigen/CMakeLists.txt
+++ b/libs/eigen/CMakeLists.txt
@@ -1,8 +1,35 @@
 # cmake_minimum_require must be the first command of the file
-cmake_minimum_required(VERSION 3.5.0)
+cmake_minimum_required(VERSION 3.10.0)
+
+# NOTE Remove setting the policy once the minimum required CMake version is
+# increased to at least 3.15. Retain enabling the export to package registry.
+if (POLICY CMP0090)
+  # The export command does not populate package registry by default
+  cmake_policy (SET CMP0090 NEW)
+
+  # Unless otherwise specified, always export to package registry to ensure
+  # backwards compatibility.
+  if (NOT DEFINED CMAKE_EXPORT_PACKAGE_REGISTRY)
+    set (CMAKE_EXPORT_PACKAGE_REGISTRY ON)
+  endif (NOT DEFINED CMAKE_EXPORT_PACKAGE_REGISTRY)
+endif (POLICY CMP0090)
 
 project(Eigen3)
 
+# Remove this block after bumping CMake to v3.21.0
+# PROJECT_IS_TOP_LEVEL is defined then by default
+if(CMAKE_VERSION VERSION_LESS 3.21.0)
+  if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
+    set(PROJECT_IS_TOP_LEVEL TRUE)
+  else()
+    set(PROJECT_IS_TOP_LEVEL FALSE)
+  endif()
+endif()
+
+set(CMAKE_CXX_STANDARD 14 CACHE STRING "Default C++ standard")
+set(CMAKE_CXX_STANDARD_REQUIRED ON CACHE BOOL "Require C++ standard")
+set(CMAKE_CXX_EXTENSIONS OFF CACHE BOOL "Allow C++ extensions")
+
 # guard against in-source builds
 
 if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_BINARY_DIR})
@@ -23,7 +50,7 @@ endif()
 
 
 #############################################################################
-# retrieve version information                                               #
+# retrieve version information                                              #
 #############################################################################
 
 # automatically parse the version number
@@ -61,10 +88,6 @@ include(CMakeDependentOption)
 
 set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
 
-
-option(EIGEN_TEST_CXX11 "Enable testing with C++11 and C++11 features (e.g. Tensor module)." OFF)
-
-
 macro(ei_add_cxx_compiler_flag FLAG)
   string(REGEX REPLACE "-" "" SFLAG1 ${FLAG})
   string(REGEX REPLACE "\\+" "p" SFLAG ${SFLAG1})
@@ -74,20 +97,6 @@ macro(ei_add_cxx_compiler_flag FLAG)
   endif()
 endmacro()
 
-check_cxx_compiler_flag("-std=c++11" EIGEN_COMPILER_SUPPORT_CPP11)
-
-if(EIGEN_TEST_CXX11)
-  set(CMAKE_CXX_STANDARD 11)
-  set(CMAKE_CXX_EXTENSIONS OFF)
-  if(EIGEN_COMPILER_SUPPORT_CPP11)
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
-  endif()
-else()
-  #set(CMAKE_CXX_STANDARD 03)
-  #set(CMAKE_CXX_EXTENSIONS OFF)
-  ei_add_cxx_compiler_flag("-std=c++03")
-endif()
-
 # Determine if we should build shared libraries on this platform.
 get_cmake_property(EIGEN_BUILD_SHARED_LIBS TARGET_SUPPORTS_SHARED_LIBS)
 
@@ -100,6 +109,8 @@ find_package(StandardMathLibrary)
 
 set(EIGEN_TEST_CUSTOM_LINKER_FLAGS  "" CACHE STRING "Additional linker flags when linking unit tests.")
 set(EIGEN_TEST_CUSTOM_CXX_FLAGS     "" CACHE STRING "Additional compiler flags when compiling unit tests.")
+# convert space separated argument into CMake lists for downstream consumption
+separate_arguments(EIGEN_TEST_CUSTOM_CXX_FLAGS NATIVE_COMMAND ${EIGEN_TEST_CUSTOM_CXX_FLAGS})
 
 set(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO "")
 
@@ -109,13 +120,11 @@ if(NOT STANDARD_MATH_LIBRARY_FOUND)
     "Can't link to the standard math library. Please report to the Eigen developers, telling them about your platform.")
 
 else()
-
   if(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO)
     set(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO "${EIGEN_STANDARD_LIBRARIES_TO_LINK_TO} ${STANDARD_MATH_LIBRARY}")
   else()
     set(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO "${STANDARD_MATH_LIBRARY}")
   endif()
-
 endif()
 
 if(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO)
@@ -125,6 +134,7 @@ else()
 endif()
 
 option(EIGEN_BUILD_BTL "Build benchmark suite" OFF)
+option(EIGEN_BUILD_SPBENCH "Build sparse benchmark suite" OFF)
 
 # Disable pkgconfig only for native Windows builds
 if(NOT WIN32 OR NOT CMAKE_HOST_SYSTEM_NAME MATCHES Windows)
@@ -183,18 +193,6 @@ if(NOT MSVC)
   ei_add_cxx_compiler_flag("-wd981")                    # disable ICC's "operands are evaluated in unspecified order" remark
   ei_add_cxx_compiler_flag("-wd2304")                   # disable ICC's "warning #2304: non-explicit constructor with single argument may cause implicit type conversion" produced by -Wnon-virtual-dtor
 
-
-  # The -ansi flag must be added last, otherwise it is also used as a linker flag by check_cxx_compiler_flag making it fails
-  # Moreover we should not set both -strict-ansi and -ansi
-  check_cxx_compiler_flag("-strict-ansi" COMPILER_SUPPORT_STRICTANSI)
-  ei_add_cxx_compiler_flag("-Qunused-arguments")        # disable clang warning: argument unused during compilation: '-ansi'
-
-  if(COMPILER_SUPPORT_STRICTANSI)
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -strict-ansi")
-  else()
-    ei_add_cxx_compiler_flag("-ansi")
-  endif()
-
   if(ANDROID_NDK)
     ei_add_cxx_compiler_flag("-pie")
     ei_add_cxx_compiler_flag("-fPIE")
@@ -253,20 +251,20 @@ if(NOT MSVC)
   option(EIGEN_TEST_AVX512 "Enable/Disable AVX512 in tests/examples" OFF)
   if(EIGEN_TEST_AVX512)
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512f -mfma")
-    if (NOT "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
-      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fabi-version=6")
-    endif()
     message(STATUS "Enabling AVX512 in tests/examples")
   endif()
 
   option(EIGEN_TEST_AVX512DQ "Enable/Disable AVX512DQ in tests/examples" OFF)
   if(EIGEN_TEST_AVX512DQ)
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512dq")
-    if (NOT "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
-      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fabi-version=6")
-    endif()
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512dq -mfma")
     message(STATUS "Enabling AVX512DQ in tests/examples")
   endif()
+  
+  option(EIGEN_TEST_AVX512FP16 "Enable/Disable AVX512-FP16 in tests/examples" OFF)
+  if(EIGEN_TEST_AVX512FP16)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512f -mfma -mavx512vl -mavx512fp16")
+	message(STATUS "Enabling AVX512-FP16 in tests/examples")
+  endif()
 
   option(EIGEN_TEST_F16C "Enable/Disable F16C in tests/examples" OFF)
   if(EIGEN_TEST_F16C)
@@ -369,11 +367,19 @@ else()
   endif()
 
   option(EIGEN_TEST_FMA "Enable/Disable FMA/AVX2 in tests/examples" OFF)
-  if(EIGEN_TEST_FMA AND NOT EIGEN_TEST_NEON)
+  option(EIGEN_TEST_AVX2 "Enable/Disable FMA/AVX2 in tests/examples" OFF)
+  if((EIGEN_TEST_FMA AND NOT EIGEN_TEST_NEON) OR EIGEN_TEST_AVX2)
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX2")
     message(STATUS "Enabling FMA/AVX2 in tests/examples")
   endif()
 
+  option(EIGEN_TEST_AVX512 "Enable/Disable AVX512 in tests/examples" OFF)
+  option(EIGEN_TEST_AVX512DQ "Enable/Disable AVX512DQ in tests/examples" OFF)
+  if(EIGEN_TEST_AVX512 OR EIGEN_TEST_AVX512DQ)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX512")
+    message(STATUS "Enabling AVX512 in tests/examples")
+  endif()
+
 endif()
 
 option(EIGEN_TEST_NO_EXPLICIT_VECTORIZATION "Disable explicit vectorization in tests/examples" OFF)
@@ -416,7 +422,8 @@ if(EIGEN_TEST_NO_EXCEPTIONS)
   message(STATUS "Disabling exceptions in tests/examples")
 endif()
 
-set(EIGEN_CUDA_COMPUTE_ARCH 30 CACHE STRING "The CUDA compute architecture level to target when compiling CUDA code")
+set(EIGEN_CUDA_CXX_FLAGS "" CACHE STRING "Additional flags to pass to the cuda compiler.")
+set(EIGEN_CUDA_COMPUTE_ARCH 30 CACHE STRING "The CUDA compute architecture(s) to target when compiling CUDA code")
 
 include_directories(${CMAKE_CURRENT_SOURCE_DIR})
 
@@ -450,17 +457,6 @@ foreach(var INCLUDE_INSTALL_DIR CMAKEPACKAGE_INSTALL_DIR PKGCONFIG_INSTALL_DIR)
   endif()
 endforeach()
 
-# similar to set_target_properties but append the property instead of overwriting it
-macro(ei_add_target_property target prop value)
-
-  get_target_property(previous ${target} ${prop})
-  # if the property wasn't previously set, ${previous} is now "previous-NOTFOUND" which cmake allows catching with plain if()
-  if(NOT previous)
-    set(previous "")
-  endif()
-  set_target_properties(${target} PROPERTIES ${prop} "${previous} ${value}")
-endmacro()
-
 install(FILES
   signature_of_eigen3_matrix_library
   DESTINATION ${INCLUDE_INSTALL_DIR} COMPONENT Devel
@@ -482,8 +478,9 @@ if(EIGEN_BUILD_DOC)
 endif()
 
 
-option(BUILD_TESTING "Enable creation of Eigen tests." ON)
-if(BUILD_TESTING)
+cmake_dependent_option(BUILD_TESTING "Enable creation of tests." ON "PROJECT_IS_TOP_LEVEL" OFF)
+option(EIGEN_BUILD_TESTING "Enable creation of Eigen tests." ${BUILD_TESTING})
+if(EIGEN_BUILD_TESTING)
   include(EigenConfigureTesting)
 
   if(EIGEN_LEAVE_TEST_IN_ALL_TARGET)
@@ -495,6 +492,9 @@ if(BUILD_TESTING)
   add_subdirectory(failtest)
 endif()
 
+include(CMakeDetermineFortranCompiler)
+option(EIGEN_BUILD_BLAS "Toggles the building of the Eigen Blas library" ${CMAKE_Fortran_COMPILER})
+option(EIGEN_BUILD_LAPACK "Toggles the building of the included Eigen LAPACK library" ${CMAKE_Fortran_COMPILER})
 if(EIGEN_LEAVE_TEST_IN_ALL_TARGET)
   add_subdirectory(blas)
   add_subdirectory(lapack)
@@ -545,13 +545,30 @@ if(EIGEN_BUILD_BTL)
   add_subdirectory(bench/btl EXCLUDE_FROM_ALL)
 endif()
 
-if(NOT WIN32)
+find_package(CLANG_FORMAT 9 EXACT)
+if(CLANG_FORMAT_FOUND)
+set(FORMAT_SOURCES)
+list(APPEND FORMAT_SUBDIRS blas bench demos "doc" Eigen include lapack scripts share unsupported test failtest)
+foreach(DIR ${FORMAT_SUBDIRS})
+    set(ABS_DIR ${CMAKE_CURRENT_SOURCE_DIR}/${DIR})
+    file(GLOB_RECURSE ${DIR}_SOURCES ${ABS_DIR}/*.cc ${ABS_DIR}/*.h ${ABS_DIR}/*.cpp ${ABS_DIR}/*.hpp ${ABS_DIR}/*.c)
+    list(APPEND FORMAT_SOURCES ${${DIR}_SOURCES})
+  endforeach()
+    file(GLOB FORMAT_SOURCES_WITHOUTENDING LIST_DIRECTORIES false ${CMAKE_CURRENT_SOURCE_DIR}/Eigen/* ${CMAKE_CURRENT_SOURCE_DIR}/Eigen/CXX11/* ${CMAKE_CURRENT_SOURCE_DIR}/unsupported/Eigen/* ${CMAKE_CURRENT_SOURCE_DIR}/unsupported/Eigen/CXX11/*)
+    list(FILTER FORMAT_SOURCES_WITHOUTENDING EXCLUDE REGEX ".*.txt$")
+    list (APPEND FORMAT_SOURCES ${FORMAT_SOURCES_WITHOUTENDING})
+    add_custom_target(format
+    COMMAND ${CLANG_FORMAT_EXECUTABLE} -i -style=file ${FORMAT_SOURCES}
+    DEPENDS ${FORMAT_SOURCES})
+endif()
+
+if(NOT WIN32 AND EIGEN_BUILD_SPBENCH)
   add_subdirectory(bench/spbench EXCLUDE_FROM_ALL)
 endif()
 
 configure_file(scripts/cdashtesting.cmake.in cdashtesting.cmake @ONLY)
 
-if(BUILD_TESTING)
+if(EIGEN_BUILD_TESTING)
   ei_testing_print_summary()
 endif()
 
@@ -559,49 +576,49 @@ message(STATUS "")
 message(STATUS "Configured Eigen ${EIGEN_VERSION_NUMBER}")
 message(STATUS "")
 
-string(TOLOWER "${CMAKE_GENERATOR}" cmake_generator_tolower)
-if(cmake_generator_tolower MATCHES "makefile")
-  message(STATUS "Available targets (use: make TARGET):")
-else()
-  message(STATUS "Available targets (use: cmake --build . --target TARGET):")
+if(PROJECT_IS_TOP_LEVEL)
+  string(TOLOWER "${CMAKE_GENERATOR}" cmake_generator_tolower)
+  if(cmake_generator_tolower MATCHES "makefile")
+    message(STATUS "Available targets (use: make TARGET):")
+  else()
+    message(STATUS "Available targets (use: cmake --build . --target TARGET):")
+  endif()
+  message(STATUS "---------+--------------------------------------------------------------")
+  message(STATUS "Target   |   Description")
+  message(STATUS "---------+--------------------------------------------------------------")
+  message(STATUS "install  | Install Eigen. Headers will be installed to:")
+  message(STATUS "         |     <CMAKE_INSTALL_PREFIX>/<INCLUDE_INSTALL_DIR>")
+  message(STATUS "         |   Using the following values:")
+  message(STATUS "         |     CMAKE_INSTALL_PREFIX: ${CMAKE_INSTALL_PREFIX}")
+  message(STATUS "         |     INCLUDE_INSTALL_DIR:  ${INCLUDE_INSTALL_DIR}")
+  message(STATUS "         |   Change the install location of Eigen headers using:")
+  message(STATUS "         |     cmake . -DCMAKE_INSTALL_PREFIX=yourprefix")
+  message(STATUS "         |   Or:")
+  message(STATUS "         |     cmake . -DINCLUDE_INSTALL_DIR=yourdir")
+  message(STATUS "doc      | Generate the API documentation, requires Doxygen & LaTeX")
+  if(EIGEN_BUILD_TESTING)
+    message(STATUS "check    | Build and run the unit-tests. Read this page:")
+    message(STATUS "         |   http://eigen.tuxfamily.org/index.php?title=Tests")
+  endif()
+  if(CLANG_FORMAT_FOUND)
+    message(STATUS "format   | Formats the source code according to .clang-format file")
+  endif()
+  message(STATUS "blas     | Build BLAS library (not the same thing as Eigen)")
+  message(STATUS "uninstall| Remove files installed by the install target")
+  message(STATUS "---------+--------------------------------------------------------------")
+  message(STATUS "")
 endif()
-message(STATUS "---------+--------------------------------------------------------------")
-message(STATUS "Target   |   Description")
-message(STATUS "---------+--------------------------------------------------------------")
-message(STATUS "install  | Install Eigen. Headers will be installed to:")
-message(STATUS "         |     <CMAKE_INSTALL_PREFIX>/<INCLUDE_INSTALL_DIR>")
-message(STATUS "         |   Using the following values:")
-message(STATUS "         |     CMAKE_INSTALL_PREFIX: ${CMAKE_INSTALL_PREFIX}")
-message(STATUS "         |     INCLUDE_INSTALL_DIR:  ${INCLUDE_INSTALL_DIR}")
-message(STATUS "         |   Change the install location of Eigen headers using:")
-message(STATUS "         |     cmake . -DCMAKE_INSTALL_PREFIX=yourprefix")
-message(STATUS "         |   Or:")
-message(STATUS "         |     cmake . -DINCLUDE_INSTALL_DIR=yourdir")
-message(STATUS "doc      | Generate the API documentation, requires Doxygen & LaTeX")
-if(BUILD_TESTING)
-  message(STATUS "check    | Build and run the unit-tests. Read this page:")
-  message(STATUS "         |   http://eigen.tuxfamily.org/index.php?title=Tests")
-endif()
-message(STATUS "blas     | Build BLAS library (not the same thing as Eigen)")
-message(STATUS "uninstall| Remove files installed by the install target")
-message(STATUS "---------+--------------------------------------------------------------")
-message(STATUS "")
-
 
 set ( EIGEN_VERSION_STRING ${EIGEN_VERSION_NUMBER} )
 set ( EIGEN_VERSION_MAJOR  ${EIGEN_WORLD_VERSION} )
 set ( EIGEN_VERSION_MINOR  ${EIGEN_MAJOR_VERSION} )
 set ( EIGEN_VERSION_PATCH  ${EIGEN_MINOR_VERSION} )
-set ( EIGEN_DEFINITIONS "")
-set ( EIGEN_INCLUDE_DIR "${CMAKE_INSTALL_PREFIX}/${INCLUDE_INSTALL_DIR}" )
-set ( EIGEN_ROOT_DIR ${CMAKE_INSTALL_PREFIX} )
 
 include (CMakePackageConfigHelpers)
 
 # Imported target support
 add_library (eigen INTERFACE)
 add_library (Eigen3::Eigen ALIAS eigen)
-target_compile_definitions (eigen INTERFACE ${EIGEN_DEFINITIONS})
 target_include_directories (eigen INTERFACE
   $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
   $<INSTALL_INTERFACE:${INCLUDE_INSTALL_DIR}>
@@ -612,23 +629,35 @@ set_target_properties (eigen PROPERTIES EXPORT_NAME Eigen)
 
 install (TARGETS eigen EXPORT Eigen3Targets)
 
+option(EIGEN_BUILD_CMAKE_PACKAGE "Enables the creation of EigenConfig.cmake and related files" ON)
+if(EIGEN_BUILD_CMAKE_PACKAGE)
 configure_package_config_file (
   ${CMAKE_CURRENT_SOURCE_DIR}/cmake/Eigen3Config.cmake.in
   ${CMAKE_CURRENT_BINARY_DIR}/Eigen3Config.cmake
-  PATH_VARS EIGEN_INCLUDE_DIR EIGEN_ROOT_DIR
   INSTALL_DESTINATION ${CMAKEPACKAGE_INSTALL_DIR}
+  NO_SET_AND_CHECK_MACRO # Eigen does not provide legacy style defines
   NO_CHECK_REQUIRED_COMPONENTS_MACRO # Eigen does not provide components
 )
-# Remove CMAKE_SIZEOF_VOID_P from Eigen3ConfigVersion.cmake since Eigen does
-# not depend on architecture specific settings or libraries. More
-# specifically, an Eigen3Config.cmake generated from a 64 bit target can be
-# used for 32 bit targets as well (and vice versa).
-set (_Eigen3_CMAKE_SIZEOF_VOID_P ${CMAKE_SIZEOF_VOID_P})
-unset (CMAKE_SIZEOF_VOID_P)
-write_basic_package_version_file (Eigen3ConfigVersion.cmake
-                                  VERSION ${EIGEN_VERSION_NUMBER}
-                                  COMPATIBILITY SameMajorVersion)
-set (CMAKE_SIZEOF_VOID_P ${_Eigen3_CMAKE_SIZEOF_VOID_P})
+
+# NOTE Remove the first code path once the minimum required CMake version is
+# bumped to 3.14 or above.
+if (CMAKE_VERSION VERSION_LESS 3.14)
+  # Remove CMAKE_SIZEOF_VOID_P from Eigen3ConfigVersion.cmake since Eigen does
+  # not depend on architecture specific settings or libraries. More
+  # specifically, an Eigen3Config.cmake generated from a 64 bit target can be
+  # used for 32 bit targets as well (and vice versa).
+  set (_Eigen3_CMAKE_SIZEOF_VOID_P ${CMAKE_SIZEOF_VOID_P})
+  unset (CMAKE_SIZEOF_VOID_P)
+  write_basic_package_version_file (Eigen3ConfigVersion.cmake
+                                    VERSION ${EIGEN_VERSION_NUMBER}
+                                    COMPATIBILITY SameMajorVersion)
+  set (CMAKE_SIZEOF_VOID_P ${_Eigen3_CMAKE_SIZEOF_VOID_P})
+else (CMAKE_VERSION VERSION_LESS 3.14)
+  write_basic_package_version_file (Eigen3ConfigVersion.cmake
+                                    VERSION ${EIGEN_VERSION_NUMBER}
+                                    COMPATIBILITY SameMajorVersion
+                                    ARCH_INDEPENDENT)
+endif (CMAKE_VERSION VERSION_LESS 3.14)
 
 # The Eigen target will be located in the Eigen3 namespace. Other CMake
 # targets can refer to it using Eigen3::Eigen.
@@ -639,14 +668,16 @@ export (PACKAGE Eigen3)
 
 install (EXPORT Eigen3Targets NAMESPACE Eigen3:: DESTINATION ${CMAKEPACKAGE_INSTALL_DIR})
 
-install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/cmake/UseEigen3.cmake
-                ${CMAKE_CURRENT_BINARY_DIR}/Eigen3Config.cmake
-                ${CMAKE_CURRENT_BINARY_DIR}/Eigen3ConfigVersion.cmake
-          DESTINATION ${CMAKEPACKAGE_INSTALL_DIR} )
+install (FILES ${CMAKE_CURRENT_BINARY_DIR}/Eigen3Config.cmake
+               ${CMAKE_CURRENT_BINARY_DIR}/Eigen3ConfigVersion.cmake
+         DESTINATION ${CMAKEPACKAGE_INSTALL_DIR})
 
 # Add uninstall target
-add_custom_target ( uninstall
-    COMMAND ${CMAKE_COMMAND} -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/EigenUninstall.cmake)
+if(NOT TARGET uninstall)
+  add_custom_target ( uninstall
+      COMMAND ${CMAKE_COMMAND} -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/EigenUninstall.cmake)
+endif()
+endif()
 
 if (EIGEN_SPLIT_TESTSUITE)
   ei_split_testsuite("${EIGEN_SPLIT_TESTSUITE}")
diff --git a/libs/eigen/COPYING.GPL b/libs/eigen/COPYING.GPL
deleted file mode 100644
index 94a9ed0..0000000
--- a/libs/eigen/COPYING.GPL
+++ /dev/null
@@ -1,674 +0,0 @@
-                    GNU GENERAL PUBLIC LICENSE
-                       Version 3, 29 June 2007
-
- Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
- Everyone is permitted to copy and distribute verbatim copies
- of this license document, but changing it is not allowed.
-
-                            Preamble
-
-  The GNU General Public License is a free, copyleft license for
-software and other kinds of works.
-
-  The licenses for most software and other practical works are designed
-to take away your freedom to share and change the works.  By contrast,
-the GNU General Public License is intended to guarantee your freedom to
-share and change all versions of a program--to make sure it remains free
-software for all its users.  We, the Free Software Foundation, use the
-GNU General Public License for most of our software; it applies also to
-any other work released this way by its authors.  You can apply it to
-your programs, too.
-
-  When we speak of free software, we are referring to freedom, not
-price.  Our General Public Licenses are designed to make sure that you
-have the freedom to distribute copies of free software (and charge for
-them if you wish), that you receive source code or can get it if you
-want it, that you can change the software or use pieces of it in new
-free programs, and that you know you can do these things.
-
-  To protect your rights, we need to prevent others from denying you
-these rights or asking you to surrender the rights.  Therefore, you have
-certain responsibilities if you distribute copies of the software, or if
-you modify it: responsibilities to respect the freedom of others.
-
-  For example, if you distribute copies of such a program, whether
-gratis or for a fee, you must pass on to the recipients the same
-freedoms that you received.  You must make sure that they, too, receive
-or can get the source code.  And you must show them these terms so they
-know their rights.
-
-  Developers that use the GNU GPL protect your rights with two steps:
-(1) assert copyright on the software, and (2) offer you this License
-giving you legal permission to copy, distribute and/or modify it.
-
-  For the developers' and authors' protection, the GPL clearly explains
-that there is no warranty for this free software.  For both users' and
-authors' sake, the GPL requires that modified versions be marked as
-changed, so that their problems will not be attributed erroneously to
-authors of previous versions.
-
-  Some devices are designed to deny users access to install or run
-modified versions of the software inside them, although the manufacturer
-can do so.  This is fundamentally incompatible with the aim of
-protecting users' freedom to change the software.  The systematic
-pattern of such abuse occurs in the area of products for individuals to
-use, which is precisely where it is most unacceptable.  Therefore, we
-have designed this version of the GPL to prohibit the practice for those
-products.  If such problems arise substantially in other domains, we
-stand ready to extend this provision to those domains in future versions
-of the GPL, as needed to protect the freedom of users.
-
-  Finally, every program is threatened constantly by software patents.
-States should not allow patents to restrict development and use of
-software on general-purpose computers, but in those that do, we wish to
-avoid the special danger that patents applied to a free program could
-make it effectively proprietary.  To prevent this, the GPL assures that
-patents cannot be used to render the program non-free.
-
-  The precise terms and conditions for copying, distribution and
-modification follow.
-
-                       TERMS AND CONDITIONS
-
-  0. Definitions.
-
-  "This License" refers to version 3 of the GNU General Public License.
-
-  "Copyright" also means copyright-like laws that apply to other kinds of
-works, such as semiconductor masks.
-
-  "The Program" refers to any copyrightable work licensed under this
-License.  Each licensee is addressed as "you".  "Licensees" and
-"recipients" may be individuals or organizations.
-
-  To "modify" a work means to copy from or adapt all or part of the work
-in a fashion requiring copyright permission, other than the making of an
-exact copy.  The resulting work is called a "modified version" of the
-earlier work or a work "based on" the earlier work.
-
-  A "covered work" means either the unmodified Program or a work based
-on the Program.
-
-  To "propagate" a work means to do anything with it that, without
-permission, would make you directly or secondarily liable for
-infringement under applicable copyright law, except executing it on a
-computer or modifying a private copy.  Propagation includes copying,
-distribution (with or without modification), making available to the
-public, and in some countries other activities as well.
-
-  To "convey" a work means any kind of propagation that enables other
-parties to make or receive copies.  Mere interaction with a user through
-a computer network, with no transfer of a copy, is not conveying.
-
-  An interactive user interface displays "Appropriate Legal Notices"
-to the extent that it includes a convenient and prominently visible
-feature that (1) displays an appropriate copyright notice, and (2)
-tells the user that there is no warranty for the work (except to the
-extent that warranties are provided), that licensees may convey the
-work under this License, and how to view a copy of this License.  If
-the interface presents a list of user commands or options, such as a
-menu, a prominent item in the list meets this criterion.
-
-  1. Source Code.
-
-  The "source code" for a work means the preferred form of the work
-for making modifications to it.  "Object code" means any non-source
-form of a work.
-
-  A "Standard Interface" means an interface that either is an official
-standard defined by a recognized standards body, or, in the case of
-interfaces specified for a particular programming language, one that
-is widely used among developers working in that language.
-
-  The "System Libraries" of an executable work include anything, other
-than the work as a whole, that (a) is included in the normal form of
-packaging a Major Component, but which is not part of that Major
-Component, and (b) serves only to enable use of the work with that
-Major Component, or to implement a Standard Interface for which an
-implementation is available to the public in source code form.  A
-"Major Component", in this context, means a major essential component
-(kernel, window system, and so on) of the specific operating system
-(if any) on which the executable work runs, or a compiler used to
-produce the work, or an object code interpreter used to run it.
-
-  The "Corresponding Source" for a work in object code form means all
-the source code needed to generate, install, and (for an executable
-work) run the object code and to modify the work, including scripts to
-control those activities.  However, it does not include the work's
-System Libraries, or general-purpose tools or generally available free
-programs which are used unmodified in performing those activities but
-which are not part of the work.  For example, Corresponding Source
-includes interface definition files associated with source files for
-the work, and the source code for shared libraries and dynamically
-linked subprograms that the work is specifically designed to require,
-such as by intimate data communication or control flow between those
-subprograms and other parts of the work.
-
-  The Corresponding Source need not include anything that users
-can regenerate automatically from other parts of the Corresponding
-Source.
-
-  The Corresponding Source for a work in source code form is that
-same work.
-
-  2. Basic Permissions.
-
-  All rights granted under this License are granted for the term of
-copyright on the Program, and are irrevocable provided the stated
-conditions are met.  This License explicitly affirms your unlimited
-permission to run the unmodified Program.  The output from running a
-covered work is covered by this License only if the output, given its
-content, constitutes a covered work.  This License acknowledges your
-rights of fair use or other equivalent, as provided by copyright law.
-
-  You may make, run and propagate covered works that you do not
-convey, without conditions so long as your license otherwise remains
-in force.  You may convey covered works to others for the sole purpose
-of having them make modifications exclusively for you, or provide you
-with facilities for running those works, provided that you comply with
-the terms of this License in conveying all material for which you do
-not control copyright.  Those thus making or running the covered works
-for you must do so exclusively on your behalf, under your direction
-and control, on terms that prohibit them from making any copies of
-your copyrighted material outside their relationship with you.
-
-  Conveying under any other circumstances is permitted solely under
-the conditions stated below.  Sublicensing is not allowed; section 10
-makes it unnecessary.
-
-  3. Protecting Users' Legal Rights From Anti-Circumvention Law.
-
-  No covered work shall be deemed part of an effective technological
-measure under any applicable law fulfilling obligations under article
-11 of the WIPO copyright treaty adopted on 20 December 1996, or
-similar laws prohibiting or restricting circumvention of such
-measures.
-
-  When you convey a covered work, you waive any legal power to forbid
-circumvention of technological measures to the extent such circumvention
-is effected by exercising rights under this License with respect to
-the covered work, and you disclaim any intention to limit operation or
-modification of the work as a means of enforcing, against the work's
-users, your or third parties' legal rights to forbid circumvention of
-technological measures.
-
-  4. Conveying Verbatim Copies.
-
-  You may convey verbatim copies of the Program's source code as you
-receive it, in any medium, provided that you conspicuously and
-appropriately publish on each copy an appropriate copyright notice;
-keep intact all notices stating that this License and any
-non-permissive terms added in accord with section 7 apply to the code;
-keep intact all notices of the absence of any warranty; and give all
-recipients a copy of this License along with the Program.
-
-  You may charge any price or no price for each copy that you convey,
-and you may offer support or warranty protection for a fee.
-
-  5. Conveying Modified Source Versions.
-
-  You may convey a work based on the Program, or the modifications to
-produce it from the Program, in the form of source code under the
-terms of section 4, provided that you also meet all of these conditions:
-
-    a) The work must carry prominent notices stating that you modified
-    it, and giving a relevant date.
-
-    b) The work must carry prominent notices stating that it is
-    released under this License and any conditions added under section
-    7.  This requirement modifies the requirement in section 4 to
-    "keep intact all notices".
-
-    c) You must license the entire work, as a whole, under this
-    License to anyone who comes into possession of a copy.  This
-    License will therefore apply, along with any applicable section 7
-    additional terms, to the whole of the work, and all its parts,
-    regardless of how they are packaged.  This License gives no
-    permission to license the work in any other way, but it does not
-    invalidate such permission if you have separately received it.
-
-    d) If the work has interactive user interfaces, each must display
-    Appropriate Legal Notices; however, if the Program has interactive
-    interfaces that do not display Appropriate Legal Notices, your
-    work need not make them do so.
-
-  A compilation of a covered work with other separate and independent
-works, which are not by their nature extensions of the covered work,
-and which are not combined with it such as to form a larger program,
-in or on a volume of a storage or distribution medium, is called an
-"aggregate" if the compilation and its resulting copyright are not
-used to limit the access or legal rights of the compilation's users
-beyond what the individual works permit.  Inclusion of a covered work
-in an aggregate does not cause this License to apply to the other
-parts of the aggregate.
-
-  6. Conveying Non-Source Forms.
-
-  You may convey a covered work in object code form under the terms
-of sections 4 and 5, provided that you also convey the
-machine-readable Corresponding Source under the terms of this License,
-in one of these ways:
-
-    a) Convey the object code in, or embodied in, a physical product
-    (including a physical distribution medium), accompanied by the
-    Corresponding Source fixed on a durable physical medium
-    customarily used for software interchange.
-
-    b) Convey the object code in, or embodied in, a physical product
-    (including a physical distribution medium), accompanied by a
-    written offer, valid for at least three years and valid for as
-    long as you offer spare parts or customer support for that product
-    model, to give anyone who possesses the object code either (1) a
-    copy of the Corresponding Source for all the software in the
-    product that is covered by this License, on a durable physical
-    medium customarily used for software interchange, for a price no
-    more than your reasonable cost of physically performing this
-    conveying of source, or (2) access to copy the
-    Corresponding Source from a network server at no charge.
-
-    c) Convey individual copies of the object code with a copy of the
-    written offer to provide the Corresponding Source.  This
-    alternative is allowed only occasionally and noncommercially, and
-    only if you received the object code with such an offer, in accord
-    with subsection 6b.
-
-    d) Convey the object code by offering access from a designated
-    place (gratis or for a charge), and offer equivalent access to the
-    Corresponding Source in the same way through the same place at no
-    further charge.  You need not require recipients to copy the
-    Corresponding Source along with the object code.  If the place to
-    copy the object code is a network server, the Corresponding Source
-    may be on a different server (operated by you or a third party)
-    that supports equivalent copying facilities, provided you maintain
-    clear directions next to the object code saying where to find the
-    Corresponding Source.  Regardless of what server hosts the
-    Corresponding Source, you remain obligated to ensure that it is
-    available for as long as needed to satisfy these requirements.
-
-    e) Convey the object code using peer-to-peer transmission, provided
-    you inform other peers where the object code and Corresponding
-    Source of the work are being offered to the general public at no
-    charge under subsection 6d.
-
-  A separable portion of the object code, whose source code is excluded
-from the Corresponding Source as a System Library, need not be
-included in conveying the object code work.
-
-  A "User Product" is either (1) a "consumer product", which means any
-tangible personal property which is normally used for personal, family,
-or household purposes, or (2) anything designed or sold for incorporation
-into a dwelling.  In determining whether a product is a consumer product,
-doubtful cases shall be resolved in favor of coverage.  For a particular
-product received by a particular user, "normally used" refers to a
-typical or common use of that class of product, regardless of the status
-of the particular user or of the way in which the particular user
-actually uses, or expects or is expected to use, the product.  A product
-is a consumer product regardless of whether the product has substantial
-commercial, industrial or non-consumer uses, unless such uses represent
-the only significant mode of use of the product.
-
-  "Installation Information" for a User Product means any methods,
-procedures, authorization keys, or other information required to install
-and execute modified versions of a covered work in that User Product from
-a modified version of its Corresponding Source.  The information must
-suffice to ensure that the continued functioning of the modified object
-code is in no case prevented or interfered with solely because
-modification has been made.
-
-  If you convey an object code work under this section in, or with, or
-specifically for use in, a User Product, and the conveying occurs as
-part of a transaction in which the right of possession and use of the
-User Product is transferred to the recipient in perpetuity or for a
-fixed term (regardless of how the transaction is characterized), the
-Corresponding Source conveyed under this section must be accompanied
-by the Installation Information.  But this requirement does not apply
-if neither you nor any third party retains the ability to install
-modified object code on the User Product (for example, the work has
-been installed in ROM).
-
-  The requirement to provide Installation Information does not include a
-requirement to continue to provide support service, warranty, or updates
-for a work that has been modified or installed by the recipient, or for
-the User Product in which it has been modified or installed.  Access to a
-network may be denied when the modification itself materially and
-adversely affects the operation of the network or violates the rules and
-protocols for communication across the network.
-
-  Corresponding Source conveyed, and Installation Information provided,
-in accord with this section must be in a format that is publicly
-documented (and with an implementation available to the public in
-source code form), and must require no special password or key for
-unpacking, reading or copying.
-
-  7. Additional Terms.
-
-  "Additional permissions" are terms that supplement the terms of this
-License by making exceptions from one or more of its conditions.
-Additional permissions that are applicable to the entire Program shall
-be treated as though they were included in this License, to the extent
-that they are valid under applicable law.  If additional permissions
-apply only to part of the Program, that part may be used separately
-under those permissions, but the entire Program remains governed by
-this License without regard to the additional permissions.
-
-  When you convey a copy of a covered work, you may at your option
-remove any additional permissions from that copy, or from any part of
-it.  (Additional permissions may be written to require their own
-removal in certain cases when you modify the work.)  You may place
-additional permissions on material, added by you to a covered work,
-for which you have or can give appropriate copyright permission.
-
-  Notwithstanding any other provision of this License, for material you
-add to a covered work, you may (if authorized by the copyright holders of
-that material) supplement the terms of this License with terms:
-
-    a) Disclaiming warranty or limiting liability differently from the
-    terms of sections 15 and 16 of this License; or
-
-    b) Requiring preservation of specified reasonable legal notices or
-    author attributions in that material or in the Appropriate Legal
-    Notices displayed by works containing it; or
-
-    c) Prohibiting misrepresentation of the origin of that material, or
-    requiring that modified versions of such material be marked in
-    reasonable ways as different from the original version; or
-
-    d) Limiting the use for publicity purposes of names of licensors or
-    authors of the material; or
-
-    e) Declining to grant rights under trademark law for use of some
-    trade names, trademarks, or service marks; or
-
-    f) Requiring indemnification of licensors and authors of that
-    material by anyone who conveys the material (or modified versions of
-    it) with contractual assumptions of liability to the recipient, for
-    any liability that these contractual assumptions directly impose on
-    those licensors and authors.
-
-  All other non-permissive additional terms are considered "further
-restrictions" within the meaning of section 10.  If the Program as you
-received it, or any part of it, contains a notice stating that it is
-governed by this License along with a term that is a further
-restriction, you may remove that term.  If a license document contains
-a further restriction but permits relicensing or conveying under this
-License, you may add to a covered work material governed by the terms
-of that license document, provided that the further restriction does
-not survive such relicensing or conveying.
-
-  If you add terms to a covered work in accord with this section, you
-must place, in the relevant source files, a statement of the
-additional terms that apply to those files, or a notice indicating
-where to find the applicable terms.
-
-  Additional terms, permissive or non-permissive, may be stated in the
-form of a separately written license, or stated as exceptions;
-the above requirements apply either way.
-
-  8. Termination.
-
-  You may not propagate or modify a covered work except as expressly
-provided under this License.  Any attempt otherwise to propagate or
-modify it is void, and will automatically terminate your rights under
-this License (including any patent licenses granted under the third
-paragraph of section 11).
-
-  However, if you cease all violation of this License, then your
-license from a particular copyright holder is reinstated (a)
-provisionally, unless and until the copyright holder explicitly and
-finally terminates your license, and (b) permanently, if the copyright
-holder fails to notify you of the violation by some reasonable means
-prior to 60 days after the cessation.
-
-  Moreover, your license from a particular copyright holder is
-reinstated permanently if the copyright holder notifies you of the
-violation by some reasonable means, this is the first time you have
-received notice of violation of this License (for any work) from that
-copyright holder, and you cure the violation prior to 30 days after
-your receipt of the notice.
-
-  Termination of your rights under this section does not terminate the
-licenses of parties who have received copies or rights from you under
-this License.  If your rights have been terminated and not permanently
-reinstated, you do not qualify to receive new licenses for the same
-material under section 10.
-
-  9. Acceptance Not Required for Having Copies.
-
-  You are not required to accept this License in order to receive or
-run a copy of the Program.  Ancillary propagation of a covered work
-occurring solely as a consequence of using peer-to-peer transmission
-to receive a copy likewise does not require acceptance.  However,
-nothing other than this License grants you permission to propagate or
-modify any covered work.  These actions infringe copyright if you do
-not accept this License.  Therefore, by modifying or propagating a
-covered work, you indicate your acceptance of this License to do so.
-
-  10. Automatic Licensing of Downstream Recipients.
-
-  Each time you convey a covered work, the recipient automatically
-receives a license from the original licensors, to run, modify and
-propagate that work, subject to this License.  You are not responsible
-for enforcing compliance by third parties with this License.
-
-  An "entity transaction" is a transaction transferring control of an
-organization, or substantially all assets of one, or subdividing an
-organization, or merging organizations.  If propagation of a covered
-work results from an entity transaction, each party to that
-transaction who receives a copy of the work also receives whatever
-licenses to the work the party's predecessor in interest had or could
-give under the previous paragraph, plus a right to possession of the
-Corresponding Source of the work from the predecessor in interest, if
-the predecessor has it or can get it with reasonable efforts.
-
-  You may not impose any further restrictions on the exercise of the
-rights granted or affirmed under this License.  For example, you may
-not impose a license fee, royalty, or other charge for exercise of
-rights granted under this License, and you may not initiate litigation
-(including a cross-claim or counterclaim in a lawsuit) alleging that
-any patent claim is infringed by making, using, selling, offering for
-sale, or importing the Program or any portion of it.
-
-  11. Patents.
-
-  A "contributor" is a copyright holder who authorizes use under this
-License of the Program or a work on which the Program is based.  The
-work thus licensed is called the contributor's "contributor version".
-
-  A contributor's "essential patent claims" are all patent claims
-owned or controlled by the contributor, whether already acquired or
-hereafter acquired, that would be infringed by some manner, permitted
-by this License, of making, using, or selling its contributor version,
-but do not include claims that would be infringed only as a
-consequence of further modification of the contributor version.  For
-purposes of this definition, "control" includes the right to grant
-patent sublicenses in a manner consistent with the requirements of
-this License.
-
-  Each contributor grants you a non-exclusive, worldwide, royalty-free
-patent license under the contributor's essential patent claims, to
-make, use, sell, offer for sale, import and otherwise run, modify and
-propagate the contents of its contributor version.
-
-  In the following three paragraphs, a "patent license" is any express
-agreement or commitment, however denominated, not to enforce a patent
-(such as an express permission to practice a patent or covenant not to
-sue for patent infringement).  To "grant" such a patent license to a
-party means to make such an agreement or commitment not to enforce a
-patent against the party.
-
-  If you convey a covered work, knowingly relying on a patent license,
-and the Corresponding Source of the work is not available for anyone
-to copy, free of charge and under the terms of this License, through a
-publicly available network server or other readily accessible means,
-then you must either (1) cause the Corresponding Source to be so
-available, or (2) arrange to deprive yourself of the benefit of the
-patent license for this particular work, or (3) arrange, in a manner
-consistent with the requirements of this License, to extend the patent
-license to downstream recipients.  "Knowingly relying" means you have
-actual knowledge that, but for the patent license, your conveying the
-covered work in a country, or your recipient's use of the covered work
-in a country, would infringe one or more identifiable patents in that
-country that you have reason to believe are valid.
-
-  If, pursuant to or in connection with a single transaction or
-arrangement, you convey, or propagate by procuring conveyance of, a
-covered work, and grant a patent license to some of the parties
-receiving the covered work authorizing them to use, propagate, modify
-or convey a specific copy of the covered work, then the patent license
-you grant is automatically extended to all recipients of the covered
-work and works based on it.
-
-  A patent license is "discriminatory" if it does not include within
-the scope of its coverage, prohibits the exercise of, or is
-conditioned on the non-exercise of one or more of the rights that are
-specifically granted under this License.  You may not convey a covered
-work if you are a party to an arrangement with a third party that is
-in the business of distributing software, under which you make payment
-to the third party based on the extent of your activity of conveying
-the work, and under which the third party grants, to any of the
-parties who would receive the covered work from you, a discriminatory
-patent license (a) in connection with copies of the covered work
-conveyed by you (or copies made from those copies), or (b) primarily
-for and in connection with specific products or compilations that
-contain the covered work, unless you entered into that arrangement,
-or that patent license was granted, prior to 28 March 2007.
-
-  Nothing in this License shall be construed as excluding or limiting
-any implied license or other defenses to infringement that may
-otherwise be available to you under applicable patent law.
-
-  12. No Surrender of Others' Freedom.
-
-  If conditions are imposed on you (whether by court order, agreement or
-otherwise) that contradict the conditions of this License, they do not
-excuse you from the conditions of this License.  If you cannot convey a
-covered work so as to satisfy simultaneously your obligations under this
-License and any other pertinent obligations, then as a consequence you may
-not convey it at all.  For example, if you agree to terms that obligate you
-to collect a royalty for further conveying from those to whom you convey
-the Program, the only way you could satisfy both those terms and this
-License would be to refrain entirely from conveying the Program.
-
-  13. Use with the GNU Affero General Public License.
-
-  Notwithstanding any other provision of this License, you have
-permission to link or combine any covered work with a work licensed
-under version 3 of the GNU Affero General Public License into a single
-combined work, and to convey the resulting work.  The terms of this
-License will continue to apply to the part which is the covered work,
-but the special requirements of the GNU Affero General Public License,
-section 13, concerning interaction through a network will apply to the
-combination as such.
-
-  14. Revised Versions of this License.
-
-  The Free Software Foundation may publish revised and/or new versions of
-the GNU General Public License from time to time.  Such new versions will
-be similar in spirit to the present version, but may differ in detail to
-address new problems or concerns.
-
-  Each version is given a distinguishing version number.  If the
-Program specifies that a certain numbered version of the GNU General
-Public License "or any later version" applies to it, you have the
-option of following the terms and conditions either of that numbered
-version or of any later version published by the Free Software
-Foundation.  If the Program does not specify a version number of the
-GNU General Public License, you may choose any version ever published
-by the Free Software Foundation.
-
-  If the Program specifies that a proxy can decide which future
-versions of the GNU General Public License can be used, that proxy's
-public statement of acceptance of a version permanently authorizes you
-to choose that version for the Program.
-
-  Later license versions may give you additional or different
-permissions.  However, no additional obligations are imposed on any
-author or copyright holder as a result of your choosing to follow a
-later version.
-
-  15. Disclaimer of Warranty.
-
-  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
-APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
-HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
-OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
-THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
-IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
-ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
-
-  16. Limitation of Liability.
-
-  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
-WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
-THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
-GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
-USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
-DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
-PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
-EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
-SUCH DAMAGES.
-
-  17. Interpretation of Sections 15 and 16.
-
-  If the disclaimer of warranty and limitation of liability provided
-above cannot be given local legal effect according to their terms,
-reviewing courts shall apply local law that most closely approximates
-an absolute waiver of all civil liability in connection with the
-Program, unless a warranty or assumption of liability accompanies a
-copy of the Program in return for a fee.
-
-                     END OF TERMS AND CONDITIONS
-
-            How to Apply These Terms to Your New Programs
-
-  If you develop a new program, and you want it to be of the greatest
-possible use to the public, the best way to achieve this is to make it
-free software which everyone can redistribute and change under these terms.
-
-  To do so, attach the following notices to the program.  It is safest
-to attach them to the start of each source file to most effectively
-state the exclusion of warranty; and each file should have at least
-the "copyright" line and a pointer to where the full notice is found.
-
-    <one line to give the program's name and a brief idea of what it does.>
-    Copyright (C) <year>  <name of author>
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-
-Also add information on how to contact you by electronic and paper mail.
-
-  If the program does terminal interaction, make it output a short
-notice like this when it starts in an interactive mode:
-
-    <program>  Copyright (C) <year>  <name of author>
-    This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
-    This is free software, and you are welcome to redistribute it
-    under certain conditions; type `show c' for details.
-
-The hypothetical commands `show w' and `show c' should show the appropriate
-parts of the General Public License.  Of course, your program's commands
-might be different; for a GUI interface, you would use an "about box".
-
-  You should also get your employer (if you work as a programmer) or school,
-if any, to sign a "copyright disclaimer" for the program, if necessary.
-For more information on this, and how to apply and follow the GNU GPL, see
-<http://www.gnu.org/licenses/>.
-
-  The GNU General Public License does not permit incorporating your program
-into proprietary programs.  If your program is a subroutine library, you
-may consider it more useful to permit linking proprietary applications with
-the library.  If this is what you want to do, use the GNU Lesser General
-Public License instead of this License.  But first, please read
-<http://www.gnu.org/philosophy/why-not-lgpl.html>.
diff --git a/libs/eigen/COPYING.MPL2 b/libs/eigen/COPYING.MPL2
index 14e2f77..ee6256c 100644
--- a/libs/eigen/COPYING.MPL2
+++ b/libs/eigen/COPYING.MPL2
@@ -357,7 +357,7 @@ Exhibit A - Source Code Form License Notice
 
   This Source Code Form is subject to the terms of the Mozilla Public
   License, v. 2.0. If a copy of the MPL was not distributed with this
-  file, You can obtain one at http://mozilla.org/MPL/2.0/.
+  file, You can obtain one at https://mozilla.org/MPL/2.0/.
 
 If it is not possible or desirable to put the notice in a particular
 file, then You may include the notice in a location (such as a LICENSE
diff --git a/libs/eigen/Eigen/AccelerateSupport b/libs/eigen/Eigen/AccelerateSupport
new file mode 100644
index 0000000..8cee7ac
--- /dev/null
+++ b/libs/eigen/Eigen/AccelerateSupport
@@ -0,0 +1,50 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_ACCELERATESUPPORT_MODULE_H
+#define EIGEN_ACCELERATESUPPORT_MODULE_H
+
+#include "SparseCore"
+
+#include "src/Core/util/DisableStupidWarnings.h"
+
+/** \ingroup Support_modules
+  * \defgroup AccelerateSupport_Module AccelerateSupport module
+  *
+  * This module provides an interface to the Apple Accelerate library.
+  * It provides the seven following main factorization classes:
+  * - class AccelerateLLT: a Cholesky (LL^T) factorization.
+  * - class AccelerateLDLT: the default LDL^T factorization.
+  * - class AccelerateLDLTUnpivoted: a Cholesky-like LDL^T factorization with only 1x1 pivots and no pivoting
+  * - class AccelerateLDLTSBK: an LDL^T factorization with Supernode Bunch-Kaufman and static pivoting
+  * - class AccelerateLDLTTPP: an LDL^T factorization with full threshold partial pivoting
+  * - class AccelerateQR: a QR factorization
+  * - class AccelerateCholeskyAtA: a QR factorization without storing Q (equivalent to A^TA = R^T R)
+  *
+  * \code
+  * #include <Eigen/AccelerateSupport>
+  * \endcode
+  *
+  * In order to use this module, the Accelerate headers must be accessible from
+  * the include paths, and your binary must be linked to the Accelerate framework.
+  * The Accelerate library is only available on Apple hardware.
+  * 
+  * Note that many of the algorithms can be influenced by the UpLo template
+  * argument. All matrices are assumed to be symmetric. For example, the following
+  * creates an LDLT factorization where your matrix is symmetric (implicit) and
+  * uses the lower triangle:
+  * 
+  * \code
+  * AccelerateLDLT<SparseMatrix<float>, Lower> ldlt;
+  * \endcode
+  */
+
+#include "src/AccelerateSupport/AccelerateSupport.h"
+
+#include "src/Core/util/ReenableStupidWarnings.h"
+
+#endif // EIGEN_ACCELERATESUPPORT_MODULE_H
diff --git a/libs/eigen/Eigen/Cholesky b/libs/eigen/Eigen/Cholesky
index a318ceb..2c686f1 100644
--- a/libs/eigen/Eigen/Cholesky
+++ b/libs/eigen/Eigen/Cholesky
@@ -32,11 +32,7 @@
 #include "src/Cholesky/LLT.h"
 #include "src/Cholesky/LDLT.h"
 #ifdef EIGEN_USE_LAPACKE
-#ifdef EIGEN_USE_MKL
-#include "mkl_lapacke.h"
-#else
-#include "src/misc/lapacke.h"
-#endif
+#include "src/misc/lapacke_helpers.h"
 #include "src/Cholesky/LLT_LAPACKE.h"
 #endif
 
diff --git a/libs/eigen/Eigen/CholmodSupport b/libs/eigen/Eigen/CholmodSupport
index bed8924..1037bd5 100644
--- a/libs/eigen/Eigen/CholmodSupport
+++ b/libs/eigen/Eigen/CholmodSupport
@@ -22,7 +22,7 @@ extern "C" {
   * This module provides an interface to the Cholmod library which is part of the <a href="http://www.suitesparse.com">suitesparse</a> package.
   * It provides the two following main factorization classes:
   * - class CholmodSupernodalLLT: a supernodal LLT Cholesky factorization.
-  * - class CholmodDecomposiiton: a general L(D)LT Cholesky factorization with automatic or explicit runtime selection of the underlying factorization method (supernodal or simplicial).
+  * - class CholmodDecomposition: a general L(D)LT Cholesky factorization with automatic or explicit runtime selection of the underlying factorization method (supernodal or simplicial).
   *
   * For the sake of completeness, this module also propose the two following classes:
   * - class CholmodSimplicialLLT
diff --git a/libs/eigen/Eigen/Core b/libs/eigen/Eigen/Core
index 5921e15..623d735 100644
--- a/libs/eigen/Eigen/Core
+++ b/libs/eigen/Eigen/Core
@@ -8,8 +8,8 @@
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
-#ifndef EIGEN_CORE_H
-#define EIGEN_CORE_H
+#ifndef EIGEN_CORE_MODULE_H
+#define EIGEN_CORE_MODULE_H
 
 // first thing Eigen does: stop the compiler from reporting useless warnings.
 #include "src/Core/util/DisableStupidWarnings.h"
@@ -36,7 +36,7 @@
 
 // Disable the ipa-cp-clone optimization flag with MinGW 6.x or newer (enabled by default with -O3)
 // See http://eigen.tuxfamily.org/bz/show_bug.cgi?id=556 for details.
-#if EIGEN_COMP_MINGW && EIGEN_GNUC_AT_LEAST(4,6) && EIGEN_GNUC_AT_MOST(5,5)
+#if EIGEN_COMP_MINGW && EIGEN_GNUC_AT_MOST(5,5)
   #pragma GCC optimize ("-fno-ipa-cp-clone")
 #endif
 
@@ -67,6 +67,7 @@
 #endif
 
 #ifdef EIGEN_HAS_OPENMP
+#include <atomic>
 #include <omp.h>
 #endif
 
@@ -83,8 +84,8 @@
 #include <cmath>
 #include <cassert>
 #include <functional>
-#include <sstream>
 #ifndef EIGEN_NO_IO
+  #include <sstream>
   #include <iosfwd>
 #endif
 #include <cstring>
@@ -94,14 +95,10 @@
 // for min/max:
 #include <algorithm>
 
-#if EIGEN_HAS_CXX11
 #include <array>
-#endif
 
 // for std::is_nothrow_move_assignable
-#ifdef EIGEN_INCLUDE_TYPE_TRAITS
 #include <type_traits>
-#endif
 
 // for outputting debug info
 #ifdef EIGEN_DEBUG_ASSIGN
@@ -109,7 +106,8 @@
 #endif
 
 // required for __cpuid, needs to be included after cmath
-#if EIGEN_COMP_MSVC && EIGEN_ARCH_i386_OR_x86_64 && !EIGEN_OS_WINCE
+// also required for _BitScanReverse on Windows on ARM
+#if EIGEN_COMP_MSVC && (EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_ARM64) && !EIGEN_OS_WINCE
   #include <intrin.h>
 #endif
 
@@ -165,6 +163,7 @@ using std::ptrdiff_t;
 #include "src/Core/util/XprHelper.h"
 #include "src/Core/util/Memory.h"
 #include "src/Core/util/IntegralConstant.h"
+#include "src/Core/util/Serializer.h"
 #include "src/Core/util/SymbolicIndex.h"
 
 #include "src/Core/NumTraits.h"
@@ -179,6 +178,9 @@ using std::ptrdiff_t;
 #include "src/Core/arch/Default/GenericPacketMathFunctionsFwd.h"
 
 #if defined EIGEN_VECTORIZE_AVX512
+  #if defined EIGEN_VECTORIZE_AVX512FP16
+    #include "src/Core/arch/AVX512/PacketMathFP16.h"
+  #endif
   #include "src/Core/arch/SSE/PacketMath.h"
   #include "src/Core/arch/SSE/TypeCasting.h"
   #include "src/Core/arch/SSE/Complex.h"
@@ -191,6 +193,7 @@ using std::ptrdiff_t;
   #include "src/Core/arch/SSE/MathFunctions.h"
   #include "src/Core/arch/AVX/MathFunctions.h"
   #include "src/Core/arch/AVX512/MathFunctions.h"
+  #include "src/Core/arch/AVX512/TrsmKernel.h"
 #elif defined EIGEN_VECTORIZE_AVX
   // Use AVX for floats and doubles, SSE for integers
   #include "src/Core/arch/SSE/PacketMath.h"
@@ -256,10 +259,14 @@ using std::ptrdiff_t;
 #include "src/Core/functors/StlFunctors.h"
 #include "src/Core/functors/AssignmentFunctors.h"
 
-// Specialized functors to enable the processing of complex numbers
-// on CUDA devices
-#ifdef EIGEN_CUDACC
-#include "src/Core/arch/CUDA/Complex.h"
+// Specialized functors for GPU.
+#ifdef EIGEN_GPUCC
+#include "src/Core/arch/GPU/Complex.h"
+#endif
+
+// Specializations of vectorized activation functions for NEON.
+#ifdef EIGEN_VECTORIZE_NEON
+#include "src/Core/arch/NEON/UnaryFunctors.h"
 #endif
 
 #include "src/Core/util/IndexedViewHelper.h"
@@ -314,6 +321,7 @@ using std::ptrdiff_t;
 #include "src/Core/DiagonalMatrix.h"
 #include "src/Core/Diagonal.h"
 #include "src/Core/DiagonalProduct.h"
+#include "src/Core/SkewSymmetricMatrix3.h"
 #include "src/Core/Redux.h"
 #include "src/Core/Visitor.h"
 #include "src/Core/Fuzzy.h"
@@ -346,12 +354,16 @@ using std::ptrdiff_t;
 #include "src/Core/CoreIterators.h"
 #include "src/Core/ConditionEstimator.h"
 
-#if defined(EIGEN_VECTORIZE_ALTIVEC) || defined(EIGEN_VECTORIZE_VSX)
+#if defined(EIGEN_VECTORIZE_VSX)
   #include "src/Core/arch/AltiVec/MatrixProduct.h"
 #elif defined EIGEN_VECTORIZE_NEON
   #include "src/Core/arch/NEON/GeneralBlockPanelKernel.h"
 #endif
 
+#if defined(EIGEN_VECTORIZE_AVX512)
+  #include "src/Core/arch/AVX512/GemmKernel.h"
+#endif
+
 #include "src/Core/BooleanRedux.h"
 #include "src/Core/Select.h"
 #include "src/Core/VectorwiseOp.h"
@@ -381,4 +393,4 @@ using std::ptrdiff_t;
 
 #include "src/Core/util/ReenableStupidWarnings.h"
 
-#endif // EIGEN_CORE_H
+#endif // EIGEN_CORE_MODULE_H
diff --git a/libs/eigen/Eigen/IterativeLinearSolvers b/libs/eigen/Eigen/IterativeLinearSolvers
index 957d575..26a0560 100644
--- a/libs/eigen/Eigen/IterativeLinearSolvers
+++ b/libs/eigen/Eigen/IterativeLinearSolvers
@@ -27,7 +27,7 @@
   *  - DiagonalPreconditioner - also called Jacobi preconditioner, work very well on diagonal dominant matrices.
   *  - IncompleteLUT - incomplete LU factorization with dual thresholding
   *
-  * Such problems can also be solved using the direct sparse decomposition modules: SparseCholesky, CholmodSupport, UmfPackSupport, SuperLUSupport.
+  * Such problems can also be solved using the direct sparse decomposition modules: SparseCholesky, CholmodSupport, UmfPackSupport, SuperLUSupport, AccelerateSupport.
   *
     \code
     #include <Eigen/IterativeLinearSolvers>
diff --git a/libs/eigen/Eigen/LU b/libs/eigen/Eigen/LU
index 1236ceb..b7f9a8a 100644
--- a/libs/eigen/Eigen/LU
+++ b/libs/eigen/Eigen/LU
@@ -28,11 +28,7 @@
 #include "src/LU/FullPivLU.h"
 #include "src/LU/PartialPivLU.h"
 #ifdef EIGEN_USE_LAPACKE
-#ifdef EIGEN_USE_MKL
-#include "mkl_lapacke.h"
-#else
-#include "src/misc/lapacke.h"
-#endif
+#include "src/misc/lapacke_helpers.h"
 #include "src/LU/PartialPivLU_LAPACKE.h"
 #endif
 #include "src/LU/Determinant.h"
diff --git a/libs/eigen/Eigen/QR b/libs/eigen/Eigen/QR
index 8465b62..1f6c22e 100644
--- a/libs/eigen/Eigen/QR
+++ b/libs/eigen/Eigen/QR
@@ -36,11 +36,7 @@
 #include "src/QR/ColPivHouseholderQR.h"
 #include "src/QR/CompleteOrthogonalDecomposition.h"
 #ifdef EIGEN_USE_LAPACKE
-#ifdef EIGEN_USE_MKL
-#include "mkl_lapacke.h"
-#else
-#include "src/misc/lapacke.h"
-#endif
+#include "src/misc/lapacke_helpers.h"
 #include "src/QR/HouseholderQR_LAPACKE.h"
 #include "src/QR/ColPivHouseholderQR_LAPACKE.h"
 #endif
diff --git a/libs/eigen/Eigen/SPQRSupport b/libs/eigen/Eigen/SPQRSupport
index f70390c..33c3370 100644
--- a/libs/eigen/Eigen/SPQRSupport
+++ b/libs/eigen/Eigen/SPQRSupport
@@ -28,7 +28,7 @@
   *
   */
 
-#include "src/CholmodSupport/CholmodSupport.h"
+#include "Eigen/CholmodSupport"
 #include "src/SPQRSupport/SuiteSparseQRSupport.h"
 
 #endif
diff --git a/libs/eigen/Eigen/SVD b/libs/eigen/Eigen/SVD
index 3451794..8241c73 100644
--- a/libs/eigen/Eigen/SVD
+++ b/libs/eigen/Eigen/SVD
@@ -36,14 +36,17 @@
 #include "src/SVD/SVDBase.h"
 #include "src/SVD/JacobiSVD.h"
 #include "src/SVD/BDCSVD.h"
-#if defined(EIGEN_USE_LAPACKE) && !defined(EIGEN_USE_LAPACKE_STRICT)
+#ifdef EIGEN_USE_LAPACKE
 #ifdef EIGEN_USE_MKL
 #include "mkl_lapacke.h"
 #else
 #include "src/misc/lapacke.h"
 #endif
+#ifndef EIGEN_USE_LAPACKE_STRICT
 #include "src/SVD/JacobiSVD_LAPACKE.h"
 #endif
+#include "src/SVD/BDCSVD_LAPACKE.h"
+#endif
 
 #include "src/Core/util/ReenableStupidWarnings.h"
 
diff --git a/libs/eigen/Eigen/SparseCore b/libs/eigen/Eigen/SparseCore
index 76966c4..b2db46b 100644
--- a/libs/eigen/Eigen/SparseCore
+++ b/libs/eigen/Eigen/SparseCore
@@ -41,7 +41,6 @@
 #include "src/SparseCore/SparseCompressedBase.h"
 #include "src/SparseCore/SparseMatrix.h"
 #include "src/SparseCore/SparseMap.h"
-#include "src/SparseCore/MappedSparseMatrix.h"
 #include "src/SparseCore/SparseVector.h"
 #include "src/SparseCore/SparseRef.h"
 #include "src/SparseCore/SparseCwiseUnaryOp.h"
diff --git a/libs/eigen/Eigen/src/AccelerateSupport/AccelerateSupport.h b/libs/eigen/Eigen/src/AccelerateSupport/AccelerateSupport.h
new file mode 100644
index 0000000..0417688
--- /dev/null
+++ b/libs/eigen/Eigen/src/AccelerateSupport/AccelerateSupport.h
@@ -0,0 +1,421 @@
+#ifndef EIGEN_ACCELERATESUPPORT_H
+#define EIGEN_ACCELERATESUPPORT_H
+
+#include <Accelerate/Accelerate.h>
+
+#include <Eigen/Sparse>
+
+namespace Eigen {
+
+template <typename MatrixType_, int UpLo_, SparseFactorization_t Solver_, bool EnforceSquare_>
+class AccelerateImpl;
+
+/** \ingroup AccelerateSupport_Module
+  * \class AccelerateLLT
+  * \brief A direct Cholesky (LLT) factorization and solver based on Accelerate
+  *
+  * \warning Only single and double precision real scalar types are supported by Accelerate
+  * 
+  * \tparam MatrixType_ the type of the sparse matrix A, it must be a SparseMatrix<>
+  * \tparam UpLo_ additional information about the matrix structure. Default is Lower.
+  *
+  * \sa \ref TutorialSparseSolverConcept, class AccelerateLLT
+  */
+template <typename MatrixType, int UpLo = Lower>
+using AccelerateLLT = AccelerateImpl<MatrixType, UpLo | Symmetric, SparseFactorizationCholesky, true>;
+
+/** \ingroup AccelerateSupport_Module
+  * \class AccelerateLDLT
+  * \brief The default Cholesky (LDLT) factorization and solver based on Accelerate
+  *
+  * \warning Only single and double precision real scalar types are supported by Accelerate
+  * 
+  * \tparam MatrixType_ the type of the sparse matrix A, it must be a SparseMatrix<>
+  * \tparam UpLo_ additional information about the matrix structure. Default is Lower.
+  *
+  * \sa \ref TutorialSparseSolverConcept, class AccelerateLDLT
+  */
+template <typename MatrixType, int UpLo = Lower>
+using AccelerateLDLT = AccelerateImpl<MatrixType, UpLo | Symmetric, SparseFactorizationLDLT, true>;
+
+/** \ingroup AccelerateSupport_Module
+  * \class AccelerateLDLTUnpivoted
+  * \brief A direct Cholesky-like LDL^T factorization and solver based on Accelerate with only 1x1 pivots and no pivoting
+  *
+  * \warning Only single and double precision real scalar types are supported by Accelerate
+  * 
+  * \tparam MatrixType_ the type of the sparse matrix A, it must be a SparseMatrix<>
+  * \tparam UpLo_ additional information about the matrix structure. Default is Lower.
+  *
+  * \sa \ref TutorialSparseSolverConcept, class AccelerateLDLTUnpivoted
+  */
+template <typename MatrixType, int UpLo = Lower>
+using AccelerateLDLTUnpivoted = AccelerateImpl<MatrixType, UpLo | Symmetric, SparseFactorizationLDLTUnpivoted, true>;
+
+/** \ingroup AccelerateSupport_Module
+  * \class AccelerateLDLTSBK
+  * \brief A direct Cholesky (LDLT) factorization and solver based on Accelerate with Supernode Bunch-Kaufman and static pivoting
+  *
+  * \warning Only single and double precision real scalar types are supported by Accelerate
+  * 
+  * \tparam MatrixType_ the type of the sparse matrix A, it must be a SparseMatrix<>
+  * \tparam UpLo_ additional information about the matrix structure. Default is Lower.
+  *
+  * \sa \ref TutorialSparseSolverConcept, class AccelerateLDLTSBK
+  */
+template <typename MatrixType, int UpLo = Lower>
+using AccelerateLDLTSBK = AccelerateImpl<MatrixType, UpLo | Symmetric, SparseFactorizationLDLTSBK, true>;
+
+/** \ingroup AccelerateSupport_Module
+  * \class AccelerateLDLTTPP
+  * \brief A direct Cholesky (LDLT) factorization and solver based on Accelerate with full threshold partial pivoting
+  *
+  * \warning Only single and double precision real scalar types are supported by Accelerate
+  * 
+  * \tparam MatrixType_ the type of the sparse matrix A, it must be a SparseMatrix<>
+  * \tparam UpLo_ additional information about the matrix structure. Default is Lower.
+  *
+  * \sa \ref TutorialSparseSolverConcept, class AccelerateLDLTTPP
+  */
+template <typename MatrixType, int UpLo = Lower>
+using AccelerateLDLTTPP = AccelerateImpl<MatrixType, UpLo | Symmetric, SparseFactorizationLDLTTPP, true>;
+
+/** \ingroup AccelerateSupport_Module
+  * \class AccelerateQR
+  * \brief A QR factorization and solver based on Accelerate
+  *
+  * \warning Only single and double precision real scalar types are supported by Accelerate
+  * 
+  * \tparam MatrixType_ the type of the sparse matrix A, it must be a SparseMatrix<>
+  *
+  * \sa \ref TutorialSparseSolverConcept, class AccelerateQR
+  */
+template <typename MatrixType>
+using AccelerateQR = AccelerateImpl<MatrixType, 0, SparseFactorizationQR, false>;
+
+/** \ingroup AccelerateSupport_Module
+  * \class AccelerateCholeskyAtA
+  * \brief A QR factorization and solver based on Accelerate without storing Q (equivalent to A^TA = R^T R)
+  *
+  * \warning Only single and double precision real scalar types are supported by Accelerate
+  * 
+  * \tparam MatrixType_ the type of the sparse matrix A, it must be a SparseMatrix<>
+  *
+  * \sa \ref TutorialSparseSolverConcept, class AccelerateCholeskyAtA
+  */
+template <typename MatrixType>
+using AccelerateCholeskyAtA = AccelerateImpl<MatrixType, 0, SparseFactorizationCholeskyAtA, false>;
+
+namespace internal {
+template <typename T>
+struct AccelFactorizationDeleter {
+  void operator()(T* sym) {
+    if (sym) {
+      SparseCleanup(*sym);
+      delete sym;
+      sym = nullptr;
+    }
+  }
+};
+
+template <typename DenseVecT, typename DenseMatT, typename SparseMatT, typename NumFactT>
+struct SparseTypesTraitBase {
+  typedef DenseVecT AccelDenseVector;
+  typedef DenseMatT AccelDenseMatrix;
+  typedef SparseMatT AccelSparseMatrix;
+
+  typedef SparseOpaqueSymbolicFactorization SymbolicFactorization;
+  typedef NumFactT NumericFactorization;
+
+  typedef AccelFactorizationDeleter<SymbolicFactorization> SymbolicFactorizationDeleter;
+  typedef AccelFactorizationDeleter<NumericFactorization> NumericFactorizationDeleter;
+};
+
+template <typename Scalar>
+struct SparseTypesTrait {};
+
+template <>
+struct SparseTypesTrait<double> : SparseTypesTraitBase<DenseVector_Double, DenseMatrix_Double, SparseMatrix_Double,
+                                                       SparseOpaqueFactorization_Double> {};
+
+template <>
+struct SparseTypesTrait<float>
+    : SparseTypesTraitBase<DenseVector_Float, DenseMatrix_Float, SparseMatrix_Float, SparseOpaqueFactorization_Float> {
+};
+
+}  // end namespace internal
+
+template <typename MatrixType_, int UpLo_, SparseFactorization_t Solver_, bool EnforceSquare_>
+class AccelerateImpl : public SparseSolverBase<AccelerateImpl<MatrixType_, UpLo_, Solver_, EnforceSquare_> > {
+ protected:
+  using Base = SparseSolverBase<AccelerateImpl>;
+  using Base::derived;
+  using Base::m_isInitialized;
+
+ public:
+  using Base::_solve_impl;
+
+  typedef MatrixType_ MatrixType;
+  typedef typename MatrixType::Scalar Scalar;
+  typedef typename MatrixType::StorageIndex StorageIndex;
+  enum { ColsAtCompileTime = Dynamic, MaxColsAtCompileTime = Dynamic };
+  enum { UpLo = UpLo_ };
+
+  using AccelDenseVector = typename internal::SparseTypesTrait<Scalar>::AccelDenseVector;
+  using AccelDenseMatrix = typename internal::SparseTypesTrait<Scalar>::AccelDenseMatrix;
+  using AccelSparseMatrix = typename internal::SparseTypesTrait<Scalar>::AccelSparseMatrix;
+  using SymbolicFactorization = typename internal::SparseTypesTrait<Scalar>::SymbolicFactorization;
+  using NumericFactorization = typename internal::SparseTypesTrait<Scalar>::NumericFactorization;
+  using SymbolicFactorizationDeleter = typename internal::SparseTypesTrait<Scalar>::SymbolicFactorizationDeleter;
+  using NumericFactorizationDeleter = typename internal::SparseTypesTrait<Scalar>::NumericFactorizationDeleter;
+
+  AccelerateImpl() {
+    m_isInitialized = false;
+
+    auto check_flag_set = [](int value, int flag) { return ((value & flag) == flag); };
+
+    if (check_flag_set(UpLo_, Symmetric)) {
+      m_sparseKind = SparseSymmetric;
+      m_triType = (UpLo_ & Lower) ? SparseLowerTriangle : SparseUpperTriangle;
+    } else if (check_flag_set(UpLo_, UnitLower)) {
+      m_sparseKind = SparseUnitTriangular;
+      m_triType = SparseLowerTriangle;
+    } else if (check_flag_set(UpLo_, UnitUpper)) {
+      m_sparseKind = SparseUnitTriangular;
+      m_triType = SparseUpperTriangle;
+    } else if (check_flag_set(UpLo_, StrictlyLower)) {
+      m_sparseKind = SparseTriangular;
+      m_triType = SparseLowerTriangle;
+    } else if (check_flag_set(UpLo_, StrictlyUpper)) {
+      m_sparseKind = SparseTriangular;
+      m_triType = SparseUpperTriangle;
+    } else if (check_flag_set(UpLo_, Lower)) {
+      m_sparseKind = SparseTriangular;
+      m_triType = SparseLowerTriangle;
+    } else if (check_flag_set(UpLo_, Upper)) {
+      m_sparseKind = SparseTriangular;
+      m_triType = SparseUpperTriangle;
+    } else {
+      m_sparseKind = SparseOrdinary;
+      m_triType = (UpLo_ & Lower) ? SparseLowerTriangle : SparseUpperTriangle;
+    }
+
+    m_order = SparseOrderDefault;
+  }
+
+  explicit AccelerateImpl(const MatrixType& matrix) : AccelerateImpl() { compute(matrix); }
+
+  ~AccelerateImpl() {}
+
+  inline Index cols() const { return m_nCols; }
+  inline Index rows() const { return m_nRows; }
+
+  ComputationInfo info() const {
+    eigen_assert(m_isInitialized && "Decomposition is not initialized.");
+    return m_info;
+  }
+
+  void analyzePattern(const MatrixType& matrix);
+
+  void factorize(const MatrixType& matrix);
+
+  void compute(const MatrixType& matrix);
+
+  template <typename Rhs, typename Dest>
+  void _solve_impl(const MatrixBase<Rhs>& b, MatrixBase<Dest>& dest) const;
+
+  /** Sets the ordering algorithm to use. */
+  void setOrder(SparseOrder_t order) { m_order = order; }
+
+ private:
+  template <typename T>
+  void buildAccelSparseMatrix(const SparseMatrix<T>& a, AccelSparseMatrix& A, std::vector<long>& columnStarts) {
+    const Index nColumnsStarts = a.cols() + 1;
+
+    columnStarts.resize(nColumnsStarts);
+
+    for (Index i = 0; i < nColumnsStarts; i++) columnStarts[i] = a.outerIndexPtr()[i];
+
+    SparseAttributes_t attributes{};
+    attributes.transpose = false;
+    attributes.triangle = m_triType;
+    attributes.kind = m_sparseKind;
+
+    SparseMatrixStructure structure{};
+    structure.attributes = attributes;
+    structure.rowCount = static_cast<int>(a.rows());
+    structure.columnCount = static_cast<int>(a.cols());
+    structure.blockSize = 1;
+    structure.columnStarts = columnStarts.data();
+    structure.rowIndices = const_cast<int*>(a.innerIndexPtr());
+
+    A.structure = structure;
+    A.data = const_cast<T*>(a.valuePtr());
+  }
+
+  void doAnalysis(AccelSparseMatrix& A) {
+    m_numericFactorization.reset(nullptr);
+
+    SparseSymbolicFactorOptions opts{};
+    opts.control = SparseDefaultControl;
+    opts.orderMethod = m_order;
+    opts.order = nullptr;
+    opts.ignoreRowsAndColumns = nullptr;
+    opts.malloc = malloc;
+    opts.free = free;
+    opts.reportError = nullptr;
+
+    m_symbolicFactorization.reset(new SymbolicFactorization(SparseFactor(Solver_, A.structure, opts)));
+
+    SparseStatus_t status = m_symbolicFactorization->status;
+
+    updateInfoStatus(status);
+
+    if (status != SparseStatusOK) m_symbolicFactorization.reset(nullptr);
+  }
+
+  void doFactorization(AccelSparseMatrix& A) {
+    SparseStatus_t status = SparseStatusReleased;
+
+    if (m_symbolicFactorization) {
+      m_numericFactorization.reset(new NumericFactorization(SparseFactor(*m_symbolicFactorization, A)));
+
+      status = m_numericFactorization->status;
+
+      if (status != SparseStatusOK) m_numericFactorization.reset(nullptr);
+    }
+
+    updateInfoStatus(status);
+  }
+
+ protected:
+  void updateInfoStatus(SparseStatus_t status) const {
+    switch (status) {
+      case SparseStatusOK:
+        m_info = Success;
+        break;
+      case SparseFactorizationFailed:
+      case SparseMatrixIsSingular:
+        m_info = NumericalIssue;
+        break;
+      case SparseInternalError:
+      case SparseParameterError:
+      case SparseStatusReleased:
+      default:
+        m_info = InvalidInput;
+        break;
+    }
+  }
+
+  mutable ComputationInfo m_info;
+  Index m_nRows, m_nCols;
+  std::unique_ptr<SymbolicFactorization, SymbolicFactorizationDeleter> m_symbolicFactorization;
+  std::unique_ptr<NumericFactorization, NumericFactorizationDeleter> m_numericFactorization;
+  SparseKind_t m_sparseKind;
+  SparseTriangle_t m_triType;
+  SparseOrder_t m_order;
+};
+
+/** Computes the symbolic and numeric decomposition of matrix \a a */
+template <typename MatrixType_, int UpLo_, SparseFactorization_t Solver_, bool EnforceSquare_>
+void AccelerateImpl<MatrixType_, UpLo_, Solver_, EnforceSquare_>::compute(const MatrixType& a) {
+  if (EnforceSquare_) eigen_assert(a.rows() == a.cols());
+
+  m_nRows = a.rows();
+  m_nCols = a.cols();
+
+  AccelSparseMatrix A{};
+  std::vector<long> columnStarts;
+
+  buildAccelSparseMatrix(a, A, columnStarts);
+
+  doAnalysis(A);
+
+  if (m_symbolicFactorization) doFactorization(A);
+
+  m_isInitialized = true;
+}
+
+/** Performs a symbolic decomposition on the sparsity pattern of matrix \a a.
+ *
+ * This function is particularly useful when solving for several problems having the same structure.
+ *
+ * \sa factorize()
+ */
+template <typename MatrixType_, int UpLo_, SparseFactorization_t Solver_, bool EnforceSquare_>
+void AccelerateImpl<MatrixType_, UpLo_, Solver_, EnforceSquare_>::analyzePattern(const MatrixType& a) {
+  if (EnforceSquare_) eigen_assert(a.rows() == a.cols());
+
+  m_nRows = a.rows();
+  m_nCols = a.cols();
+
+  AccelSparseMatrix A{};
+  std::vector<long> columnStarts;
+
+  buildAccelSparseMatrix(a, A, columnStarts);
+
+  doAnalysis(A);
+
+  m_isInitialized = true;
+}
+
+/** Performs a numeric decomposition of matrix \a a.
+ *
+ * The given matrix must have the same sparsity pattern as the matrix on which the symbolic decomposition has been performed.
+ *
+ * \sa analyzePattern()
+ */
+template <typename MatrixType_, int UpLo_, SparseFactorization_t Solver_, bool EnforceSquare_>
+void AccelerateImpl<MatrixType_, UpLo_, Solver_, EnforceSquare_>::factorize(const MatrixType& a) {
+  eigen_assert(m_symbolicFactorization && "You must first call analyzePattern()");
+  eigen_assert(m_nRows == a.rows() && m_nCols == a.cols());
+
+  if (EnforceSquare_) eigen_assert(a.rows() == a.cols());
+
+  AccelSparseMatrix A{};
+  std::vector<long> columnStarts;
+
+  buildAccelSparseMatrix(a, A, columnStarts);
+
+  doFactorization(A);
+}
+
+template <typename MatrixType_, int UpLo_, SparseFactorization_t Solver_, bool EnforceSquare_>
+template <typename Rhs, typename Dest>
+void AccelerateImpl<MatrixType_, UpLo_, Solver_, EnforceSquare_>::_solve_impl(const MatrixBase<Rhs>& b,
+                                                                              MatrixBase<Dest>& x) const {
+  if (!m_numericFactorization) {
+    m_info = InvalidInput;
+    return;
+  }
+
+  eigen_assert(m_nRows == b.rows());
+  eigen_assert(((b.cols() == 1) || b.outerStride() == b.rows()));
+
+  SparseStatus_t status = SparseStatusOK;
+
+  Scalar* b_ptr = const_cast<Scalar*>(b.derived().data());
+  Scalar* x_ptr = const_cast<Scalar*>(x.derived().data());
+
+  AccelDenseMatrix xmat{};
+  xmat.attributes = SparseAttributes_t();
+  xmat.columnCount = static_cast<int>(x.cols());
+  xmat.rowCount = static_cast<int>(x.rows());
+  xmat.columnStride = xmat.rowCount;
+  xmat.data = x_ptr;
+
+  AccelDenseMatrix bmat{};
+  bmat.attributes = SparseAttributes_t();
+  bmat.columnCount = static_cast<int>(b.cols());
+  bmat.rowCount = static_cast<int>(b.rows());
+  bmat.columnStride = bmat.rowCount;
+  bmat.data = b_ptr;
+
+  SparseSolve(*m_numericFactorization, bmat, xmat);
+
+  updateInfoStatus(status);
+}
+
+}  // end namespace Eigen
+
+#endif  // EIGEN_ACCELERATESUPPORT_H
diff --git a/libs/eigen/Eigen/src/AccelerateSupport/InternalHeaderCheck.h b/libs/eigen/Eigen/src/AccelerateSupport/InternalHeaderCheck.h
new file mode 100644
index 0000000..69bcff5
--- /dev/null
+++ b/libs/eigen/Eigen/src/AccelerateSupport/InternalHeaderCheck.h
@@ -0,0 +1,3 @@
+#ifndef EIGEN_ACCELERATESUPPORT_MODULE_H
+#error "Please include Eigen/AccelerateSupport instead of including headers inside the src directory directly."
+#endif
diff --git a/libs/eigen/Eigen/src/Cholesky/InternalHeaderCheck.h b/libs/eigen/Eigen/src/Cholesky/InternalHeaderCheck.h
new file mode 100644
index 0000000..5de2b21
--- /dev/null
+++ b/libs/eigen/Eigen/src/Cholesky/InternalHeaderCheck.h
@@ -0,0 +1,3 @@
+#ifndef EIGEN_CHOLESKY_MODULE_H
+#error "Please include Eigen/Cholesky instead of including headers inside the src directory directly."
+#endif
diff --git a/libs/eigen/Eigen/src/Cholesky/LDLT.h b/libs/eigen/Eigen/src/Cholesky/LDLT.h
index 1013ca0..1d0369b 100644
--- a/libs/eigen/Eigen/src/Cholesky/LDLT.h
+++ b/libs/eigen/Eigen/src/Cholesky/LDLT.h
@@ -13,11 +13,13 @@
 #ifndef EIGEN_LDLT_H
 #define EIGEN_LDLT_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
-  template<typename _MatrixType, int _UpLo> struct traits<LDLT<_MatrixType, _UpLo> >
-   : traits<_MatrixType>
+  template<typename MatrixType_, int UpLo_> struct traits<LDLT<MatrixType_, UpLo_> >
+   : traits<MatrixType_>
   {
     typedef MatrixXpr XprKind;
     typedef SolverStorage StorageKind;
@@ -37,8 +39,8 @@ namespace internal {
   *
   * \brief Robust Cholesky decomposition of a matrix with pivoting
   *
-  * \tparam _MatrixType the type of the matrix of which to compute the LDL^T Cholesky decomposition
-  * \tparam _UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper.
+  * \tparam MatrixType_ the type of the matrix of which to compute the LDL^T Cholesky decomposition
+  * \tparam UpLo_ the triangular part that will be used for the decomposition: Lower (default) or Upper.
   *             The other triangular part won't be read.
   *
   * Perform a robust Cholesky decomposition of a positive semidefinite or negative semidefinite
@@ -56,11 +58,11 @@ namespace internal {
   *
   * \sa MatrixBase::ldlt(), SelfAdjointView::ldlt(), class LLT
   */
-template<typename _MatrixType, int _UpLo> class LDLT
-        : public SolverBase<LDLT<_MatrixType, _UpLo> >
+template<typename MatrixType_, int UpLo_> class LDLT
+        : public SolverBase<LDLT<MatrixType_, UpLo_> >
 {
   public:
-    typedef _MatrixType MatrixType;
+    typedef MatrixType_ MatrixType;
     typedef SolverBase<LDLT> Base;
     friend class SolverBase<LDLT>;
 
@@ -68,7 +70,7 @@ template<typename _MatrixType, int _UpLo> class LDLT
     enum {
       MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
       MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
-      UpLo = _UpLo
+      UpLo = UpLo_
     };
     typedef Matrix<Scalar, RowsAtCompileTime, 1, 0, MaxRowsAtCompileTime, 1> TmpMatrixType;
 
@@ -244,7 +246,7 @@ template<typename _MatrixType, int _UpLo> class LDLT
       * This method is provided for compatibility with other matrix decompositions, thus enabling generic code such as:
       * \code x = decomposition.adjoint().solve(b) \endcode
       */
-    const LDLT& adjoint() const { return *this; };
+    const LDLT& adjoint() const { return *this; }
 
     EIGEN_DEVICE_FUNC inline EIGEN_CONSTEXPR Index rows() const EIGEN_NOEXCEPT { return m_matrix.rows(); }
     EIGEN_DEVICE_FUNC inline EIGEN_CONSTEXPR Index cols() const EIGEN_NOEXCEPT { return m_matrix.cols(); }
@@ -270,10 +272,7 @@ template<typename _MatrixType, int _UpLo> class LDLT
 
   protected:
 
-    static void check_template_parameters()
-    {
-      EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
-    }
+    EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
 
     /** \internal
       * Used to compute and store the Cholesky decomposition A = L D L^* = U^* D U.
@@ -441,7 +440,7 @@ template<> struct ldlt_inplace<Lower>
       // Update the terms of L
       Index rs = size-j-1;
       w.tail(rs) -= wj * mat.col(j).tail(rs);
-      if(gamma != 0)
+      if(!numext::is_exactly_zero(gamma))
         mat.col(j).tail(rs) += (sigma*numext::conj(wj)/gamma)*w.tail(rs);
     }
     return true;
@@ -494,12 +493,10 @@ template<typename MatrixType> struct LDLT_Traits<MatrixType,Upper>
 
 /** Compute / recompute the LDLT decomposition A = L D L^* = U^* D U of \a matrix
   */
-template<typename MatrixType, int _UpLo>
+template<typename MatrixType, int UpLo_>
 template<typename InputType>
-LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::compute(const EigenBase<InputType>& a)
+LDLT<MatrixType,UpLo_>& LDLT<MatrixType,UpLo_>::compute(const EigenBase<InputType>& a)
 {
-  check_template_parameters();
-
   eigen_assert(a.rows()==a.cols());
   const Index size = a.rows();
 
@@ -510,7 +507,7 @@ LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::compute(const EigenBase<InputTyp
   // TODO move this code to SelfAdjointView
   for (Index col = 0; col < size; ++col) {
     RealScalar abs_col_sum;
-    if (_UpLo == Lower)
+    if (UpLo_ == Lower)
       abs_col_sum = m_matrix.col(col).tail(size - col).template lpNorm<1>() + m_matrix.row(col).head(col).template lpNorm<1>();
     else
       abs_col_sum = m_matrix.col(col).head(col).template lpNorm<1>() + m_matrix.row(col).tail(size - col).template lpNorm<1>();
@@ -534,9 +531,9 @@ LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::compute(const EigenBase<InputTyp
  * \param sigma a scalar, +1 for updates and -1 for "downdates," which correspond to removing previously-added column vectors. Optional; default value is +1.
  * \sa setZero()
   */
-template<typename MatrixType, int _UpLo>
+template<typename MatrixType, int UpLo_>
 template<typename Derived>
-LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::rankUpdate(const MatrixBase<Derived>& w, const typename LDLT<MatrixType,_UpLo>::RealScalar& sigma)
+LDLT<MatrixType,UpLo_>& LDLT<MatrixType,UpLo_>::rankUpdate(const MatrixBase<Derived>& w, const typename LDLT<MatrixType,UpLo_>::RealScalar& sigma)
 {
   typedef typename TranspositionType::StorageIndex IndexType;
   const Index size = w.rows();
@@ -562,16 +559,16 @@ LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::rankUpdate(const MatrixBase<Deri
 }
 
 #ifndef EIGEN_PARSED_BY_DOXYGEN
-template<typename _MatrixType, int _UpLo>
+template<typename MatrixType_, int UpLo_>
 template<typename RhsType, typename DstType>
-void LDLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) const
+void LDLT<MatrixType_,UpLo_>::_solve_impl(const RhsType &rhs, DstType &dst) const
 {
   _solve_impl_transposed<true>(rhs, dst);
 }
 
-template<typename _MatrixType,int _UpLo>
+template<typename MatrixType_,int UpLo_>
 template<bool Conjugate, typename RhsType, typename DstType>
-void LDLT<_MatrixType,_UpLo>::_solve_impl_transposed(const RhsType &rhs, DstType &dst) const
+void LDLT<MatrixType_,UpLo_>::_solve_impl_transposed(const RhsType &rhs, DstType &dst) const
 {
   // dst = P b
   dst = m_transpositions * rhs;
@@ -624,9 +621,9 @@ void LDLT<_MatrixType,_UpLo>::_solve_impl_transposed(const RhsType &rhs, DstType
   *
   * \sa LDLT::solve(), MatrixBase::ldlt()
   */
-template<typename MatrixType,int _UpLo>
+template<typename MatrixType,int UpLo_>
 template<typename Derived>
-bool LDLT<MatrixType,_UpLo>::solveInPlace(MatrixBase<Derived> &bAndX) const
+bool LDLT<MatrixType,UpLo_>::solveInPlace(MatrixBase<Derived> &bAndX) const
 {
   eigen_assert(m_isInitialized && "LDLT is not initialized.");
   eigen_assert(m_matrix.rows() == bAndX.rows());
@@ -639,8 +636,8 @@ bool LDLT<MatrixType,_UpLo>::solveInPlace(MatrixBase<Derived> &bAndX) const
 /** \returns the matrix represented by the decomposition,
  * i.e., it returns the product: P^T L D L^* P.
  * This function is provided for debug purpose. */
-template<typename MatrixType, int _UpLo>
-MatrixType LDLT<MatrixType,_UpLo>::reconstructedMatrix() const
+template<typename MatrixType, int UpLo_>
+MatrixType LDLT<MatrixType,UpLo_>::reconstructedMatrix() const
 {
   eigen_assert(m_isInitialized && "LDLT is not initialized.");
   const Index size = m_matrix.rows();
diff --git a/libs/eigen/Eigen/src/Cholesky/LLT.h b/libs/eigen/Eigen/src/Cholesky/LLT.h
index 8c9b2b3..1443eac 100644
--- a/libs/eigen/Eigen/src/Cholesky/LLT.h
+++ b/libs/eigen/Eigen/src/Cholesky/LLT.h
@@ -10,12 +10,14 @@
 #ifndef EIGEN_LLT_H
 #define EIGEN_LLT_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal{
 
-template<typename _MatrixType, int _UpLo> struct traits<LLT<_MatrixType, _UpLo> >
- : traits<_MatrixType>
+template<typename MatrixType_, int UpLo_> struct traits<LLT<MatrixType_, UpLo_> >
+ : traits<MatrixType_>
 {
   typedef MatrixXpr XprKind;
   typedef SolverStorage StorageKind;
@@ -32,8 +34,8 @@ template<typename MatrixType, int UpLo> struct LLT_Traits;
   *
   * \brief Standard Cholesky decomposition (LL^T) of a matrix and associated features
   *
-  * \tparam _MatrixType the type of the matrix of which we are computing the LL^T Cholesky decomposition
-  * \tparam _UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper.
+  * \tparam MatrixType_ the type of the matrix of which we are computing the LL^T Cholesky decomposition
+  * \tparam UpLo_ the triangular part that will be used for the decomposition: Lower (default) or Upper.
   *               The other triangular part won't be read.
   *
   * This class performs a LL^T Cholesky decomposition of a symmetric, positive definite
@@ -58,16 +60,16 @@ template<typename MatrixType, int UpLo> struct LLT_Traits;
   *
   * This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism.
   *
-  * Note that during the decomposition, only the lower (or upper, as defined by _UpLo) triangular part of A is considered.
+  * Note that during the decomposition, only the lower (or upper, as defined by UpLo_) triangular part of A is considered.
   * Therefore, the strict lower part does not have to store correct values.
   *
   * \sa MatrixBase::llt(), SelfAdjointView::llt(), class LDLT
   */
-template<typename _MatrixType, int _UpLo> class LLT
-        : public SolverBase<LLT<_MatrixType, _UpLo> >
+template<typename MatrixType_, int UpLo_> class LLT
+        : public SolverBase<LLT<MatrixType_, UpLo_> >
 {
   public:
-    typedef _MatrixType MatrixType;
+    typedef MatrixType_ MatrixType;
     typedef SolverBase<LLT> Base;
     friend class SolverBase<LLT>;
 
@@ -79,7 +81,7 @@ template<typename _MatrixType, int _UpLo> class LLT
     enum {
       PacketSize = internal::packet_traits<Scalar>::size,
       AlignmentMask = int(PacketSize)-1,
-      UpLo = _UpLo
+      UpLo = UpLo_
     };
 
     typedef internal::LLT_Traits<MatrixType,UpLo> Traits;
@@ -199,7 +201,7 @@ template<typename _MatrixType, int _UpLo> class LLT
       * This method is provided for compatibility with other matrix decompositions, thus enabling generic code such as:
       * \code x = decomposition.adjoint().solve(b) \endcode
       */
-    const LLT& adjoint() const EIGEN_NOEXCEPT { return *this; };
+    const LLT& adjoint() const EIGEN_NOEXCEPT { return *this; }
 
     inline EIGEN_CONSTEXPR Index rows() const EIGEN_NOEXCEPT { return m_matrix.rows(); }
     inline EIGEN_CONSTEXPR Index cols() const EIGEN_NOEXCEPT { return m_matrix.cols(); }
@@ -217,10 +219,7 @@ template<typename _MatrixType, int _UpLo> class LLT
 
   protected:
 
-    static void check_template_parameters()
-    {
-      EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
-    }
+    EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
 
     /** \internal
       * Used to compute and store L
@@ -243,7 +242,7 @@ static Index llt_rank_update_lower(MatrixType& mat, const VectorType& vec, const
   typedef typename MatrixType::Scalar Scalar;
   typedef typename MatrixType::RealScalar RealScalar;
   typedef typename MatrixType::ColXpr ColXpr;
-  typedef typename internal::remove_all<ColXpr>::type ColXprCleaned;
+  typedef internal::remove_all_t<ColXpr> ColXprCleaned;
   typedef typename ColXprCleaned::SegmentReturnType ColXprSegment;
   typedef Matrix<Scalar,Dynamic,1> TempVectorType;
   typedef typename TempVectorType::SegmentReturnType TempVecSegment;
@@ -298,7 +297,7 @@ static Index llt_rank_update_lower(MatrixType& mat, const VectorType& vec, const
       if(rs)
       {
         temp.tail(rs) -= (wj/Ljj) * mat.col(j).tail(rs);
-        if(gamma != 0)
+        if(!numext::is_exactly_zero(gamma))
           mat.col(j).tail(rs) = (nLjj/Ljj) * mat.col(j).tail(rs) + (nLjj * sigma*numext::conj(wj)/gamma)*temp.tail(rs);
       }
     }
@@ -427,12 +426,10 @@ template<typename MatrixType> struct LLT_Traits<MatrixType,Upper>
   * Example: \include TutorialLinAlgComputeTwice.cpp
   * Output: \verbinclude TutorialLinAlgComputeTwice.out
   */
-template<typename MatrixType, int _UpLo>
+template<typename MatrixType, int UpLo_>
 template<typename InputType>
-LLT<MatrixType,_UpLo>& LLT<MatrixType,_UpLo>::compute(const EigenBase<InputType>& a)
+LLT<MatrixType,UpLo_>& LLT<MatrixType,UpLo_>::compute(const EigenBase<InputType>& a)
 {
-  check_template_parameters();
-
   eigen_assert(a.rows()==a.cols());
   const Index size = a.rows();
   m_matrix.resize(size, size);
@@ -444,7 +441,7 @@ LLT<MatrixType,_UpLo>& LLT<MatrixType,_UpLo>::compute(const EigenBase<InputType>
   // TODO move this code to SelfAdjointView
   for (Index col = 0; col < size; ++col) {
     RealScalar abs_col_sum;
-    if (_UpLo == Lower)
+    if (UpLo_ == Lower)
       abs_col_sum = m_matrix.col(col).tail(size - col).template lpNorm<1>() + m_matrix.row(col).head(col).template lpNorm<1>();
     else
       abs_col_sum = m_matrix.col(col).head(col).template lpNorm<1>() + m_matrix.row(col).tail(size - col).template lpNorm<1>();
@@ -464,9 +461,9 @@ LLT<MatrixType,_UpLo>& LLT<MatrixType,_UpLo>::compute(const EigenBase<InputType>
   * then after it we have LL^* = A + sigma * v v^* where \a v must be a vector
   * of same dimension.
   */
-template<typename _MatrixType, int _UpLo>
+template<typename MatrixType_, int UpLo_>
 template<typename VectorType>
-LLT<_MatrixType,_UpLo> & LLT<_MatrixType,_UpLo>::rankUpdate(const VectorType& v, const RealScalar& sigma)
+LLT<MatrixType_,UpLo_> & LLT<MatrixType_,UpLo_>::rankUpdate(const VectorType& v, const RealScalar& sigma)
 {
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(VectorType);
   eigen_assert(v.size()==m_matrix.cols());
@@ -480,16 +477,16 @@ LLT<_MatrixType,_UpLo> & LLT<_MatrixType,_UpLo>::rankUpdate(const VectorType& v,
 }
 
 #ifndef EIGEN_PARSED_BY_DOXYGEN
-template<typename _MatrixType,int _UpLo>
+template<typename MatrixType_,int UpLo_>
 template<typename RhsType, typename DstType>
-void LLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) const
+void LLT<MatrixType_,UpLo_>::_solve_impl(const RhsType &rhs, DstType &dst) const
 {
   _solve_impl_transposed<true>(rhs, dst);
 }
 
-template<typename _MatrixType,int _UpLo>
+template<typename MatrixType_,int UpLo_>
 template<bool Conjugate, typename RhsType, typename DstType>
-void LLT<_MatrixType,_UpLo>::_solve_impl_transposed(const RhsType &rhs, DstType &dst) const
+void LLT<MatrixType_,UpLo_>::_solve_impl_transposed(const RhsType &rhs, DstType &dst) const
 {
     dst = rhs;
 
@@ -511,9 +508,9 @@ void LLT<_MatrixType,_UpLo>::_solve_impl_transposed(const RhsType &rhs, DstType
   *
   * \sa LLT::solve(), MatrixBase::llt()
   */
-template<typename MatrixType, int _UpLo>
+template<typename MatrixType, int UpLo_>
 template<typename Derived>
-void LLT<MatrixType,_UpLo>::solveInPlace(const MatrixBase<Derived> &bAndX) const
+void LLT<MatrixType,UpLo_>::solveInPlace(const MatrixBase<Derived> &bAndX) const
 {
   eigen_assert(m_isInitialized && "LLT is not initialized.");
   eigen_assert(m_matrix.rows()==bAndX.rows());
@@ -524,8 +521,8 @@ void LLT<MatrixType,_UpLo>::solveInPlace(const MatrixBase<Derived> &bAndX) const
 /** \returns the matrix represented by the decomposition,
  * i.e., it returns the product: L L^*.
  * This function is provided for debug purpose. */
-template<typename MatrixType, int _UpLo>
-MatrixType LLT<MatrixType,_UpLo>::reconstructedMatrix() const
+template<typename MatrixType, int UpLo_>
+MatrixType LLT<MatrixType,UpLo_>::reconstructedMatrix() const
 {
   eigen_assert(m_isInitialized && "LLT is not initialized.");
   return matrixL() * matrixL().adjoint().toDenseMatrix();
diff --git a/libs/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h b/libs/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h
index bc6489e..62bc679 100644
--- a/libs/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h
+++ b/libs/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h
@@ -33,64 +33,86 @@
 #ifndef EIGEN_LLT_LAPACKE_H
 #define EIGEN_LLT_LAPACKE_H
 
-namespace Eigen { 
+#include "./InternalHeaderCheck.h"
+
+namespace Eigen {
 
 namespace internal {
 
-template<typename Scalar> struct lapacke_llt;
+namespace lapacke_helpers {
+  // -------------------------------------------------------------------------------------------------------------------
+  //        Dispatch for rank update handling upper and lower parts
+  // -------------------------------------------------------------------------------------------------------------------
 
-#define EIGEN_LAPACKE_LLT(EIGTYPE, BLASTYPE, LAPACKE_PREFIX) \
-template<> struct lapacke_llt<EIGTYPE> \
-{ \
-  template<typename MatrixType> \
-  static inline Index potrf(MatrixType& m, char uplo) \
-  { \
-    lapack_int matrix_order; \
-    lapack_int size, lda, info, StorageOrder; \
-    EIGTYPE* a; \
-    eigen_assert(m.rows()==m.cols()); \
-    /* Set up parameters for ?potrf */ \
-    size = convert_index<lapack_int>(m.rows()); \
-    StorageOrder = MatrixType::Flags&RowMajorBit?RowMajor:ColMajor; \
-    matrix_order = StorageOrder==RowMajor ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR; \
-    a = &(m.coeffRef(0,0)); \
-    lda = convert_index<lapack_int>(m.outerStride()); \
-\
-    info = LAPACKE_##LAPACKE_PREFIX##potrf( matrix_order, uplo, size, (BLASTYPE*)a, lda ); \
-    info = (info==0) ? -1 : info>0 ? info-1 : size; \
-    return info; \
-  } \
-}; \
-template<> struct llt_inplace<EIGTYPE, Lower> \
-{ \
-  template<typename MatrixType> \
-  static Index blocked(MatrixType& m) \
-  { \
-    return lapacke_llt<EIGTYPE>::potrf(m, 'L'); \
-  } \
-  template<typename MatrixType, typename VectorType> \
-  static Index rankUpdate(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma) \
-  { return Eigen::internal::llt_rank_update_lower(mat, vec, sigma); } \
-}; \
-template<> struct llt_inplace<EIGTYPE, Upper> \
-{ \
-  template<typename MatrixType> \
-  static Index blocked(MatrixType& m) \
-  { \
-    return lapacke_llt<EIGTYPE>::potrf(m, 'U'); \
-  } \
-  template<typename MatrixType, typename VectorType> \
-  static Index rankUpdate(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma) \
-  { \
-    Transpose<MatrixType> matt(mat); \
-    return llt_inplace<EIGTYPE, Lower>::rankUpdate(matt, vec.conjugate(), sigma); \
-  } \
-};
+  template<UpLoType Mode>
+  struct rank_update {};
 
-EIGEN_LAPACKE_LLT(double, double, d)
-EIGEN_LAPACKE_LLT(float, float, s)
-EIGEN_LAPACKE_LLT(dcomplex, lapack_complex_double, z)
-EIGEN_LAPACKE_LLT(scomplex, lapack_complex_float, c)
+  template<>
+  struct rank_update<Lower> {
+      template<typename MatrixType, typename VectorType>
+      static Index run(MatrixType &mat, const VectorType &vec, const typename MatrixType::RealScalar &sigma) {
+        return Eigen::internal::llt_rank_update_lower(mat, vec, sigma);
+      }
+  };
+
+  template<>
+  struct rank_update<Upper> {
+      template<typename MatrixType, typename VectorType>
+      static Index run(MatrixType &mat, const VectorType &vec, const typename MatrixType::RealScalar &sigma) {
+        Transpose<MatrixType> matt(mat);
+        return Eigen::internal::llt_rank_update_lower(matt, vec.conjugate(), sigma);
+      }
+  };
+
+  // -------------------------------------------------------------------------------------------------------------------
+  //        Generic lapacke llt implementation that hands of to the dispatches
+  // -------------------------------------------------------------------------------------------------------------------
+
+  template<typename Scalar, UpLoType Mode>
+  struct lapacke_llt {
+    template<typename MatrixType>
+    static Index blocked(MatrixType& m)
+    {
+      eigen_assert(m.rows() == m.cols());
+      if(m.rows() == 0) {
+        return -1;
+      }
+      /* Set up parameters for ?potrf */
+      lapack_int size = to_lapack(m.rows());
+      lapack_int matrix_order = lapack_storage_of(m);
+      Scalar* a = &(m.coeffRef(0,0));
+      lapack_int lda = to_lapack(m.outerStride());
+
+      lapack_int info = potrf(matrix_order, translate_mode<Mode>, size, to_lapack(a), lda );
+      info = (info==0) ? -1 : info>0 ? info-1 : size;
+      return info;
+    }
+
+    template<typename MatrixType, typename VectorType>
+    static Index rankUpdate(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma)
+    {
+      return rank_update<Mode>::run(mat, vec, sigma);
+    }
+  };
+}
+// end namespace lapacke_helpers
+
+/*
+ * Here, we just put the generic implementation from lapacke_llt into a full specialization of the llt_inplace
+ * type. By being a full specialization, the versions defined here thus get precedence over the generic implementation
+ * in LLT.h for double, float and complex double, complex float types.
+ */
+
+#define EIGEN_LAPACKE_LLT(EIGTYPE) \
+template<> struct llt_inplace<EIGTYPE, Lower> : public lapacke_helpers::lapacke_llt<EIGTYPE, Lower> {}; \
+template<> struct llt_inplace<EIGTYPE, Upper> : public lapacke_helpers::lapacke_llt<EIGTYPE, Upper> {};
+
+EIGEN_LAPACKE_LLT(double)
+EIGEN_LAPACKE_LLT(float)
+EIGEN_LAPACKE_LLT(std::complex<double>)
+EIGEN_LAPACKE_LLT(std::complex<float>)
+
+#undef EIGEN_LAPACKE_LLT
 
 } // end namespace internal
 
diff --git a/libs/eigen/Eigen/src/CholmodSupport/CholmodSupport.h b/libs/eigen/Eigen/src/CholmodSupport/CholmodSupport.h
index adaf528..91c1cfc 100644
--- a/libs/eigen/Eigen/src/CholmodSupport/CholmodSupport.h
+++ b/libs/eigen/Eigen/src/CholmodSupport/CholmodSupport.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_CHOLMODSUPPORT_H
 #define EIGEN_CHOLMODSUPPORT_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
@@ -54,8 +56,8 @@ template<> struct cholmod_configure_matrix<std::complex<double> > {
 /** Wraps the Eigen sparse matrix \a mat into a Cholmod sparse matrix object.
   * Note that the data are shared.
   */
-template<typename _Scalar, int _Options, typename _StorageIndex>
-cholmod_sparse viewAsCholmod(Ref<SparseMatrix<_Scalar,_Options,_StorageIndex> > mat)
+template<typename Scalar_, int Options_, typename StorageIndex_>
+cholmod_sparse viewAsCholmod(Ref<SparseMatrix<Scalar_,Options_,StorageIndex_> > mat)
 {
   cholmod_sparse res;
   res.nzmax   = mat.nonZeros();
@@ -80,11 +82,11 @@ cholmod_sparse viewAsCholmod(Ref<SparseMatrix<_Scalar,_Options,_StorageIndex> >
   res.dtype   = 0;
   res.stype   = -1;
 
-  if (internal::is_same<_StorageIndex,int>::value)
+  if (internal::is_same<StorageIndex_,int>::value)
   {
     res.itype = CHOLMOD_INT;
   }
-  else if (internal::is_same<_StorageIndex,SuiteSparse_long>::value)
+  else if (internal::is_same<StorageIndex_,SuiteSparse_long>::value)
   {
     res.itype = CHOLMOD_LONG;
   }
@@ -94,39 +96,39 @@ cholmod_sparse viewAsCholmod(Ref<SparseMatrix<_Scalar,_Options,_StorageIndex> >
   }
 
   // setup res.xtype
-  internal::cholmod_configure_matrix<_Scalar>::run(res);
+  internal::cholmod_configure_matrix<Scalar_>::run(res);
 
   res.stype = 0;
 
   return res;
 }
 
-template<typename _Scalar, int _Options, typename _Index>
-const cholmod_sparse viewAsCholmod(const SparseMatrix<_Scalar,_Options,_Index>& mat)
+template<typename Scalar_, int Options_, typename Index_>
+const cholmod_sparse viewAsCholmod(const SparseMatrix<Scalar_,Options_,Index_>& mat)
 {
-  cholmod_sparse res = viewAsCholmod(Ref<SparseMatrix<_Scalar,_Options,_Index> >(mat.const_cast_derived()));
+  cholmod_sparse res = viewAsCholmod(Ref<SparseMatrix<Scalar_,Options_,Index_> >(mat.const_cast_derived()));
   return res;
 }
 
-template<typename _Scalar, int _Options, typename _Index>
-const cholmod_sparse viewAsCholmod(const SparseVector<_Scalar,_Options,_Index>& mat)
+template<typename Scalar_, int Options_, typename Index_>
+const cholmod_sparse viewAsCholmod(const SparseVector<Scalar_,Options_,Index_>& mat)
 {
-  cholmod_sparse res = viewAsCholmod(Ref<SparseMatrix<_Scalar,_Options,_Index> >(mat.const_cast_derived()));
+  cholmod_sparse res = viewAsCholmod(Ref<SparseMatrix<Scalar_,Options_,Index_> >(mat.const_cast_derived()));
   return res;
 }
 
 /** Returns a view of the Eigen sparse matrix \a mat as Cholmod sparse matrix.
   * The data are not copied but shared. */
-template<typename _Scalar, int _Options, typename _Index, unsigned int UpLo>
-cholmod_sparse viewAsCholmod(const SparseSelfAdjointView<const SparseMatrix<_Scalar,_Options,_Index>, UpLo>& mat)
+template<typename Scalar_, int Options_, typename Index_, unsigned int UpLo>
+cholmod_sparse viewAsCholmod(const SparseSelfAdjointView<const SparseMatrix<Scalar_,Options_,Index_>, UpLo>& mat)
 {
-  cholmod_sparse res = viewAsCholmod(Ref<SparseMatrix<_Scalar,_Options,_Index> >(mat.matrix().const_cast_derived()));
+  cholmod_sparse res = viewAsCholmod(Ref<SparseMatrix<Scalar_,Options_,Index_> >(mat.matrix().const_cast_derived()));
 
   if(UpLo==Upper) res.stype =  1;
   if(UpLo==Lower) res.stype = -1;
   // swap stype for rowmajor matrices (only works for real matrices)
-  EIGEN_STATIC_ASSERT((_Options & RowMajorBit) == 0 || NumTraits<_Scalar>::IsComplex == 0, THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES);
-  if(_Options & RowMajorBit) res.stype *=-1;
+  EIGEN_STATIC_ASSERT((Options_ & RowMajorBit) == 0 || NumTraits<Scalar_>::IsComplex == 0, THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES);
+  if(Options_ & RowMajorBit) res.stype *=-1;
 
   return res;
 }
@@ -155,9 +157,9 @@ cholmod_dense viewAsCholmod(MatrixBase<Derived>& mat)
 /** Returns a view of the Cholmod sparse matrix \a cm as an Eigen sparse matrix.
   * The data are not copied but shared. */
 template<typename Scalar, int Flags, typename StorageIndex>
-MappedSparseMatrix<Scalar,Flags,StorageIndex> viewAsEigen(cholmod_sparse& cm)
+Map<SparseMatrix<Scalar,Flags,StorageIndex> > viewAsEigen(cholmod_sparse& cm)
 {
-  return MappedSparseMatrix<Scalar,Flags,StorageIndex>
+  return Map<SparseMatrix<Scalar,Flags,StorageIndex> >
          (cm.nrow, cm.ncol, static_cast<StorageIndex*>(cm.p)[cm.ncol],
           static_cast<StorageIndex*>(cm.p), static_cast<StorageIndex*>(cm.i),static_cast<Scalar*>(cm.x) );
 }
@@ -167,11 +169,11 @@ namespace internal {
 // template specializations for int and long that call the correct cholmod method
 
 #define EIGEN_CHOLMOD_SPECIALIZE0(ret, name) \
-    template<typename _StorageIndex> inline ret cm_ ## name       (cholmod_common &Common) { return cholmod_ ## name   (&Common); } \
+    template<typename StorageIndex_> inline ret cm_ ## name       (cholmod_common &Common) { return cholmod_ ## name   (&Common); } \
     template<>                       inline ret cm_ ## name<SuiteSparse_long> (cholmod_common &Common) { return cholmod_l_ ## name (&Common); }
 
 #define EIGEN_CHOLMOD_SPECIALIZE1(ret, name, t1, a1) \
-    template<typename _StorageIndex> inline ret cm_ ## name       (t1& a1, cholmod_common &Common) { return cholmod_ ## name   (&a1, &Common); } \
+    template<typename StorageIndex_> inline ret cm_ ## name       (t1& a1, cholmod_common &Common) { return cholmod_ ## name   (&a1, &Common); } \
     template<>                       inline ret cm_ ## name<SuiteSparse_long> (t1& a1, cholmod_common &Common) { return cholmod_l_ ## name (&a1, &Common); }
 
 EIGEN_CHOLMOD_SPECIALIZE0(int, start)
@@ -183,14 +185,14 @@ EIGEN_CHOLMOD_SPECIALIZE1(int, free_sparse, cholmod_sparse*, A)
 
 EIGEN_CHOLMOD_SPECIALIZE1(cholmod_factor*, analyze, cholmod_sparse, A)
 
-template<typename _StorageIndex> inline cholmod_dense*  cm_solve         (int sys, cholmod_factor& L, cholmod_dense&  B, cholmod_common &Common) { return cholmod_solve     (sys, &L, &B, &Common); }
+template<typename StorageIndex_> inline cholmod_dense*  cm_solve         (int sys, cholmod_factor& L, cholmod_dense&  B, cholmod_common &Common) { return cholmod_solve     (sys, &L, &B, &Common); }
 template<>                       inline cholmod_dense*  cm_solve<SuiteSparse_long>   (int sys, cholmod_factor& L, cholmod_dense&  B, cholmod_common &Common) { return cholmod_l_solve   (sys, &L, &B, &Common); }
 
-template<typename _StorageIndex> inline cholmod_sparse* cm_spsolve       (int sys, cholmod_factor& L, cholmod_sparse& B, cholmod_common &Common) { return cholmod_spsolve   (sys, &L, &B, &Common); }
+template<typename StorageIndex_> inline cholmod_sparse* cm_spsolve       (int sys, cholmod_factor& L, cholmod_sparse& B, cholmod_common &Common) { return cholmod_spsolve   (sys, &L, &B, &Common); }
 template<>                       inline cholmod_sparse* cm_spsolve<SuiteSparse_long> (int sys, cholmod_factor& L, cholmod_sparse& B, cholmod_common &Common) { return cholmod_l_spsolve (sys, &L, &B, &Common); }
 
-template<typename _StorageIndex>
-inline int  cm_factorize_p       (cholmod_sparse*  A, double beta[2], _StorageIndex* fset, std::size_t fsize, cholmod_factor* L, cholmod_common &Common) { return cholmod_factorize_p   (A, beta, fset, fsize, L, &Common); }
+template<typename StorageIndex_>
+inline int  cm_factorize_p       (cholmod_sparse*  A, double beta[2], StorageIndex_* fset, std::size_t fsize, cholmod_factor* L, cholmod_common &Common) { return cholmod_factorize_p   (A, beta, fset, fsize, L, &Common); }
 template<>
 inline int  cm_factorize_p<SuiteSparse_long> (cholmod_sparse*  A, double beta[2], SuiteSparse_long* fset,          std::size_t fsize, cholmod_factor* L, cholmod_common &Common) { return cholmod_l_factorize_p (A, beta, fset, fsize, L, &Common); }
 
@@ -210,7 +212,7 @@ enum CholmodMode {
   * \brief The base class for the direct Cholesky factorization of Cholmod
   * \sa class CholmodSupernodalLLT, class CholmodSimplicialLDLT, class CholmodSimplicialLLT
   */
-template<typename _MatrixType, int _UpLo, typename Derived>
+template<typename MatrixType_, int UpLo_, typename Derived>
 class CholmodBase : public SparseSolverBase<Derived>
 {
   protected:
@@ -218,8 +220,8 @@ class CholmodBase : public SparseSolverBase<Derived>
     using Base::derived;
     using Base::m_isInitialized;
   public:
-    typedef _MatrixType MatrixType;
-    enum { UpLo = _UpLo };
+    typedef MatrixType_ MatrixType;
+    enum { UpLo = UpLo_ };
     typedef typename MatrixType::Scalar Scalar;
     typedef typename MatrixType::RealScalar RealScalar;
     typedef MatrixType CholMatrixType;
@@ -436,7 +438,7 @@ class CholmodBase : public SparseSolverBase<Derived>
       if (m_cholmodFactor->is_ll)
         logDet *= 2.0;
       return logDet;
-    };
+    }
 
     template<typename Stream>
     void dumpMemory(Stream& /*s*/)
@@ -461,8 +463,8 @@ class CholmodBase : public SparseSolverBase<Derived>
   * The sparse matrix A must be selfadjoint and positive definite. The vectors or matrices
   * X and B can be either dense or sparse.
   *
-  * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>
-  * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower
+  * \tparam MatrixType_ the type of the sparse matrix A, it must be a SparseMatrix<>
+  * \tparam UpLo_ the triangular part that will be used for the computations. It can be Lower
   *               or Upper. Default is Lower.
   *
   * \implsparsesolverconcept
@@ -473,15 +475,15 @@ class CholmodBase : public SparseSolverBase<Derived>
   *
   * \sa \ref TutorialSparseSolverConcept, class CholmodSupernodalLLT, class SimplicialLLT
   */
-template<typename _MatrixType, int _UpLo = Lower>
-class CholmodSimplicialLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimplicialLLT<_MatrixType, _UpLo> >
+template<typename MatrixType_, int UpLo_ = Lower>
+class CholmodSimplicialLLT : public CholmodBase<MatrixType_, UpLo_, CholmodSimplicialLLT<MatrixType_, UpLo_> >
 {
-    typedef CholmodBase<_MatrixType, _UpLo, CholmodSimplicialLLT> Base;
+    typedef CholmodBase<MatrixType_, UpLo_, CholmodSimplicialLLT> Base;
     using Base::m_cholmod;
 
   public:
 
-    typedef _MatrixType MatrixType;
+    typedef MatrixType_ MatrixType;
 
     CholmodSimplicialLLT() : Base() { init(); }
 
@@ -512,8 +514,8 @@ class CholmodSimplicialLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimpl
   * The sparse matrix A must be selfadjoint and positive definite. The vectors or matrices
   * X and B can be either dense or sparse.
   *
-  * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>
-  * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower
+  * \tparam MatrixType_ the type of the sparse matrix A, it must be a SparseMatrix<>
+  * \tparam UpLo_ the triangular part that will be used for the computations. It can be Lower
   *               or Upper. Default is Lower.
   *
   * \implsparsesolverconcept
@@ -524,15 +526,15 @@ class CholmodSimplicialLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimpl
   *
   * \sa \ref TutorialSparseSolverConcept, class CholmodSupernodalLLT, class SimplicialLDLT
   */
-template<typename _MatrixType, int _UpLo = Lower>
-class CholmodSimplicialLDLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimplicialLDLT<_MatrixType, _UpLo> >
+template<typename MatrixType_, int UpLo_ = Lower>
+class CholmodSimplicialLDLT : public CholmodBase<MatrixType_, UpLo_, CholmodSimplicialLDLT<MatrixType_, UpLo_> >
 {
-    typedef CholmodBase<_MatrixType, _UpLo, CholmodSimplicialLDLT> Base;
+    typedef CholmodBase<MatrixType_, UpLo_, CholmodSimplicialLDLT> Base;
     using Base::m_cholmod;
 
   public:
 
-    typedef _MatrixType MatrixType;
+    typedef MatrixType_ MatrixType;
 
     CholmodSimplicialLDLT() : Base() { init(); }
 
@@ -561,8 +563,8 @@ class CholmodSimplicialLDLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimp
   * The sparse matrix A must be selfadjoint and positive definite. The vectors or matrices
   * X and B can be either dense or sparse.
   *
-  * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>
-  * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower
+  * \tparam MatrixType_ the type of the sparse matrix A, it must be a SparseMatrix<>
+  * \tparam UpLo_ the triangular part that will be used for the computations. It can be Lower
   *               or Upper. Default is Lower.
   *
   * \implsparsesolverconcept
@@ -573,15 +575,15 @@ class CholmodSimplicialLDLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimp
   *
   * \sa \ref TutorialSparseSolverConcept
   */
-template<typename _MatrixType, int _UpLo = Lower>
-class CholmodSupernodalLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSupernodalLLT<_MatrixType, _UpLo> >
+template<typename MatrixType_, int UpLo_ = Lower>
+class CholmodSupernodalLLT : public CholmodBase<MatrixType_, UpLo_, CholmodSupernodalLLT<MatrixType_, UpLo_> >
 {
-    typedef CholmodBase<_MatrixType, _UpLo, CholmodSupernodalLLT> Base;
+    typedef CholmodBase<MatrixType_, UpLo_, CholmodSupernodalLLT> Base;
     using Base::m_cholmod;
 
   public:
 
-    typedef _MatrixType MatrixType;
+    typedef MatrixType_ MatrixType;
 
     CholmodSupernodalLLT() : Base() { init(); }
 
@@ -612,8 +614,8 @@ class CholmodSupernodalLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSuper
   * On the other hand, it does not provide access to the result of the factorization.
   * The default is to let Cholmod automatically choose between a simplicial and supernodal factorization.
   *
-  * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>
-  * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower
+  * \tparam MatrixType_ the type of the sparse matrix A, it must be a SparseMatrix<>
+  * \tparam UpLo_ the triangular part that will be used for the computations. It can be Lower
   *               or Upper. Default is Lower.
   *
   * \implsparsesolverconcept
@@ -624,15 +626,15 @@ class CholmodSupernodalLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSuper
   *
   * \sa \ref TutorialSparseSolverConcept
   */
-template<typename _MatrixType, int _UpLo = Lower>
-class CholmodDecomposition : public CholmodBase<_MatrixType, _UpLo, CholmodDecomposition<_MatrixType, _UpLo> >
+template<typename MatrixType_, int UpLo_ = Lower>
+class CholmodDecomposition : public CholmodBase<MatrixType_, UpLo_, CholmodDecomposition<MatrixType_, UpLo_> >
 {
-    typedef CholmodBase<_MatrixType, _UpLo, CholmodDecomposition> Base;
+    typedef CholmodBase<MatrixType_, UpLo_, CholmodDecomposition> Base;
     using Base::m_cholmod;
 
   public:
 
-    typedef _MatrixType MatrixType;
+    typedef MatrixType_ MatrixType;
 
     CholmodDecomposition() : Base() { init(); }
 
diff --git a/libs/eigen/Eigen/src/CholmodSupport/InternalHeaderCheck.h b/libs/eigen/Eigen/src/CholmodSupport/InternalHeaderCheck.h
new file mode 100644
index 0000000..0fb3abc
--- /dev/null
+++ b/libs/eigen/Eigen/src/CholmodSupport/InternalHeaderCheck.h
@@ -0,0 +1,3 @@
+#ifndef EIGEN_CHOLMODSUPPORT_MODULE_H
+#error "Please include Eigen/CholmodSupport instead of including headers inside the src directory directly."
+#endif
diff --git a/libs/eigen/Eigen/src/Core/ArithmeticSequence.h b/libs/eigen/Eigen/src/Core/ArithmeticSequence.h
index b6200fa..81005c5 100644
--- a/libs/eigen/Eigen/src/Core/ArithmeticSequence.h
+++ b/libs/eigen/Eigen/src/Core/ArithmeticSequence.h
@@ -10,69 +10,18 @@
 #ifndef EIGEN_ARITHMETIC_SEQUENCE_H
 #define EIGEN_ARITHMETIC_SEQUENCE_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
 
-#if (!EIGEN_HAS_CXX11) || !((!EIGEN_COMP_GNUC) || EIGEN_COMP_GNUC>=48)
-template<typename T> struct aseq_negate {};
-
-template<> struct aseq_negate<Index> {
-  typedef Index type;
-};
-
-template<int N> struct aseq_negate<FixedInt<N> > {
-  typedef FixedInt<-N> type;
-};
-
-// Compilation error in the following case:
-template<> struct aseq_negate<FixedInt<DynamicIndex> > {};
-
-template<typename FirstType,typename SizeType,typename IncrType,
-         bool FirstIsSymbolic=symbolic::is_symbolic<FirstType>::value,
-         bool SizeIsSymbolic =symbolic::is_symbolic<SizeType>::value>
-struct aseq_reverse_first_type {
-  typedef Index type;
-};
-
-template<typename FirstType,typename SizeType,typename IncrType>
-struct aseq_reverse_first_type<FirstType,SizeType,IncrType,true,true> {
-  typedef symbolic::AddExpr<FirstType,
-                            symbolic::ProductExpr<symbolic::AddExpr<SizeType,symbolic::ValueExpr<FixedInt<-1> > >,
-                                                  symbolic::ValueExpr<IncrType> >
-                           > type;
-};
-
-template<typename SizeType,typename IncrType,typename EnableIf = void>
-struct aseq_reverse_first_type_aux {
-  typedef Index type;
-};
-
-template<typename SizeType,typename IncrType>
-struct aseq_reverse_first_type_aux<SizeType,IncrType,typename internal::enable_if<bool((SizeType::value+IncrType::value)|0x1)>::type> {
-  typedef FixedInt<(SizeType::value-1)*IncrType::value> type;
-};
-
-template<typename FirstType,typename SizeType,typename IncrType>
-struct aseq_reverse_first_type<FirstType,SizeType,IncrType,true,false> {
-  typedef typename aseq_reverse_first_type_aux<SizeType,IncrType>::type Aux;
-  typedef symbolic::AddExpr<FirstType,symbolic::ValueExpr<Aux> > type;
-};
-
-template<typename FirstType,typename SizeType,typename IncrType>
-struct aseq_reverse_first_type<FirstType,SizeType,IncrType,false,true> {
-  typedef symbolic::AddExpr<symbolic::ProductExpr<symbolic::AddExpr<SizeType,symbolic::ValueExpr<FixedInt<-1> > >,
-                                                  symbolic::ValueExpr<IncrType> >,
-                            symbolic::ValueExpr<> > type;
-};
-#endif
-
 // Helper to cleanup the type of the increment:
 template<typename T> struct cleanup_seq_incr {
   typedef typename cleanup_index_type<T,DynamicIndex>::type type;
 };
 
-}
+}  // namespace internal
 
 //--------------------------------------------------------------------------------
 // seq(first,last,incr) and seqN(first,size,incr)
@@ -137,21 +86,9 @@ protected:
   IncrType  m_incr;
 
 public:
-
-#if EIGEN_HAS_CXX11 && ((!EIGEN_COMP_GNUC) || EIGEN_COMP_GNUC>=48)
   auto reverse() const -> decltype(Eigen::seqN(m_first+(m_size+fix<-1>())*m_incr,m_size,-m_incr)) {
     return seqN(m_first+(m_size+fix<-1>())*m_incr,m_size,-m_incr);
   }
-#else
-protected:
-  typedef typename internal::aseq_negate<IncrType>::type ReverseIncrType;
-  typedef typename internal::aseq_reverse_first_type<FirstType,SizeType,IncrType>::type ReverseFirstType;
-public:
-  ArithmeticSequence<ReverseFirstType,SizeType,ReverseIncrType>
-  reverse() const {
-    return seqN(m_first+(m_size+fix<-1>())*m_incr,m_size,-m_incr);
-  }
-#endif
 };
 
 /** \returns an ArithmeticSequence starting at \a first, of length \a size, and increment \a incr
@@ -200,7 +137,6 @@ auto seq(FirstType f, LastType l);
 
 #else // EIGEN_PARSED_BY_DOXYGEN
 
-#if EIGEN_HAS_CXX11
 template<typename FirstType,typename LastType>
 auto seq(FirstType f, LastType l) -> decltype(seqN(typename internal::cleanup_index_type<FirstType>::type(f),
                                                    (  typename internal::cleanup_index_type<LastType>::type(l)
@@ -226,101 +162,11 @@ auto seq(FirstType f, LastType l, IncrType incr)
               CleanedIncrType(incr));
 }
 
-#else // EIGEN_HAS_CXX11
-
-template<typename FirstType,typename LastType>
-typename internal::enable_if<!(symbolic::is_symbolic<FirstType>::value || symbolic::is_symbolic<LastType>::value),
-                             ArithmeticSequence<typename internal::cleanup_index_type<FirstType>::type,Index> >::type
-seq(FirstType f, LastType l)
-{
-  return seqN(typename internal::cleanup_index_type<FirstType>::type(f),
-              Index((typename internal::cleanup_index_type<LastType>::type(l)-typename internal::cleanup_index_type<FirstType>::type(f)+fix<1>())));
-}
-
-template<typename FirstTypeDerived,typename LastType>
-typename internal::enable_if<!symbolic::is_symbolic<LastType>::value,
-    ArithmeticSequence<FirstTypeDerived, symbolic::AddExpr<symbolic::AddExpr<symbolic::NegateExpr<FirstTypeDerived>,symbolic::ValueExpr<> >,
-                                                            symbolic::ValueExpr<internal::FixedInt<1> > > > >::type
-seq(const symbolic::BaseExpr<FirstTypeDerived> &f, LastType l)
-{
-  return seqN(f.derived(),(typename internal::cleanup_index_type<LastType>::type(l)-f.derived()+fix<1>()));
-}
-
-template<typename FirstType,typename LastTypeDerived>
-typename internal::enable_if<!symbolic::is_symbolic<FirstType>::value,
-    ArithmeticSequence<typename internal::cleanup_index_type<FirstType>::type,
-                        symbolic::AddExpr<symbolic::AddExpr<LastTypeDerived,symbolic::ValueExpr<> >,
-                                          symbolic::ValueExpr<internal::FixedInt<1> > > > >::type
-seq(FirstType f, const symbolic::BaseExpr<LastTypeDerived> &l)
-{
-  return seqN(typename internal::cleanup_index_type<FirstType>::type(f),(l.derived()-typename internal::cleanup_index_type<FirstType>::type(f)+fix<1>()));
-}
-
-template<typename FirstTypeDerived,typename LastTypeDerived>
-ArithmeticSequence<FirstTypeDerived,
-                    symbolic::AddExpr<symbolic::AddExpr<LastTypeDerived,symbolic::NegateExpr<FirstTypeDerived> >,symbolic::ValueExpr<internal::FixedInt<1> > > >
-seq(const symbolic::BaseExpr<FirstTypeDerived> &f, const symbolic::BaseExpr<LastTypeDerived> &l)
-{
-  return seqN(f.derived(),(l.derived()-f.derived()+fix<1>()));
-}
-
-
-template<typename FirstType,typename LastType, typename IncrType>
-typename internal::enable_if<!(symbolic::is_symbolic<FirstType>::value || symbolic::is_symbolic<LastType>::value),
-    ArithmeticSequence<typename internal::cleanup_index_type<FirstType>::type,Index,typename internal::cleanup_seq_incr<IncrType>::type> >::type
-seq(FirstType f, LastType l, IncrType incr)
-{
-  typedef typename internal::cleanup_seq_incr<IncrType>::type CleanedIncrType;
-  return seqN(typename internal::cleanup_index_type<FirstType>::type(f),
-              Index((typename internal::cleanup_index_type<LastType>::type(l)-typename internal::cleanup_index_type<FirstType>::type(f)+CleanedIncrType(incr))/CleanedIncrType(incr)), incr);
-}
-
-template<typename FirstTypeDerived,typename LastType, typename IncrType>
-typename internal::enable_if<!symbolic::is_symbolic<LastType>::value,
-    ArithmeticSequence<FirstTypeDerived,
-                        symbolic::QuotientExpr<symbolic::AddExpr<symbolic::AddExpr<symbolic::NegateExpr<FirstTypeDerived>,
-                                                                                   symbolic::ValueExpr<> >,
-                                                                 symbolic::ValueExpr<typename internal::cleanup_seq_incr<IncrType>::type> >,
-                                              symbolic::ValueExpr<typename internal::cleanup_seq_incr<IncrType>::type> >,
-                        typename internal::cleanup_seq_incr<IncrType>::type> >::type
-seq(const symbolic::BaseExpr<FirstTypeDerived> &f, LastType l, IncrType incr)
-{
-  typedef typename internal::cleanup_seq_incr<IncrType>::type CleanedIncrType;
-  return seqN(f.derived(),(typename internal::cleanup_index_type<LastType>::type(l)-f.derived()+CleanedIncrType(incr))/CleanedIncrType(incr), incr);
-}
-
-template<typename FirstType,typename LastTypeDerived, typename IncrType>
-typename internal::enable_if<!symbolic::is_symbolic<FirstType>::value,
-    ArithmeticSequence<typename internal::cleanup_index_type<FirstType>::type,
-                        symbolic::QuotientExpr<symbolic::AddExpr<symbolic::AddExpr<LastTypeDerived,symbolic::ValueExpr<> >,
-                                                                 symbolic::ValueExpr<typename internal::cleanup_seq_incr<IncrType>::type> >,
-                                               symbolic::ValueExpr<typename internal::cleanup_seq_incr<IncrType>::type> >,
-                        typename internal::cleanup_seq_incr<IncrType>::type> >::type
-seq(FirstType f, const symbolic::BaseExpr<LastTypeDerived> &l, IncrType incr)
-{
-  typedef typename internal::cleanup_seq_incr<IncrType>::type CleanedIncrType;
-  return seqN(typename internal::cleanup_index_type<FirstType>::type(f),
-              (l.derived()-typename internal::cleanup_index_type<FirstType>::type(f)+CleanedIncrType(incr))/CleanedIncrType(incr), incr);
-}
-
-template<typename FirstTypeDerived,typename LastTypeDerived, typename IncrType>
-ArithmeticSequence<FirstTypeDerived,
-                    symbolic::QuotientExpr<symbolic::AddExpr<symbolic::AddExpr<LastTypeDerived,
-                                                                               symbolic::NegateExpr<FirstTypeDerived> >,
-                                                             symbolic::ValueExpr<typename internal::cleanup_seq_incr<IncrType>::type> >,
-                                          symbolic::ValueExpr<typename internal::cleanup_seq_incr<IncrType>::type> >,
-                    typename internal::cleanup_seq_incr<IncrType>::type>
-seq(const symbolic::BaseExpr<FirstTypeDerived> &f, const symbolic::BaseExpr<LastTypeDerived> &l, IncrType incr)
-{
-  typedef typename internal::cleanup_seq_incr<IncrType>::type CleanedIncrType;
-  return seqN(f.derived(),(l.derived()-f.derived()+CleanedIncrType(incr))/CleanedIncrType(incr), incr);
-}
-#endif // EIGEN_HAS_CXX11
 
 #endif // EIGEN_PARSED_BY_DOXYGEN
 
+namespace placeholders {
 
-#if EIGEN_HAS_CXX11 || defined(EIGEN_PARSED_BY_DOXYGEN)
 /** \cpp11
   * \returns a symbolic ArithmeticSequence representing the last \a size elements with increment \a incr.
   *
@@ -329,9 +175,9 @@ seq(const symbolic::BaseExpr<FirstTypeDerived> &f, const symbolic::BaseExpr<Last
   * \sa lastN(SizeType), seqN(FirstType,SizeType), seq(FirstType,LastType,IncrType) */
 template<typename SizeType,typename IncrType>
 auto lastN(SizeType size, IncrType incr)
--> decltype(seqN(Eigen::last-(size-fix<1>())*incr, size, incr))
+-> decltype(seqN(Eigen::placeholders::last-(size-fix<1>())*incr, size, incr))
 {
-  return seqN(Eigen::last-(size-fix<1>())*incr, size, incr);
+  return seqN(Eigen::placeholders::last-(size-fix<1>())*incr, size, incr);
 }
 
 /** \cpp11
@@ -342,18 +188,19 @@ auto lastN(SizeType size, IncrType incr)
   * \sa lastN(SizeType,IncrType, seqN(FirstType,SizeType), seq(FirstType,LastType) */
 template<typename SizeType>
 auto lastN(SizeType size)
--> decltype(seqN(Eigen::last+fix<1>()-size, size))
+-> decltype(seqN(Eigen::placeholders::last+fix<1>()-size, size))
 {
-  return seqN(Eigen::last+fix<1>()-size, size);
+  return seqN(Eigen::placeholders::last+fix<1>()-size, size);
 }
-#endif
+
+}  // namespace placeholders
 
 namespace internal {
 
 // Convert a symbolic span into a usable one (i.e., remove last/end "keywords")
 template<typename T>
 struct make_size_type {
-  typedef typename internal::conditional<symbolic::is_symbolic<T>::value, Index, T>::type type;
+  typedef std::conditional_t<symbolic::is_symbolic<T>::value, Index, T> type;
 };
 
 template<typename FirstType,typename SizeType,typename IncrType,int XprSize>
@@ -387,25 +234,23 @@ struct get_compile_time_incr<ArithmeticSequence<FirstType,SizeType,IncrType> > {
   * \code using namespace Eigen::indexing; \endcode
   * is equivalent to:
   * \code
-  using Eigen::all;
+  using Eigen::fix;
   using Eigen::seq;
   using Eigen::seqN;
-  using Eigen::lastN; // c++11 only
-  using Eigen::last;
-  using Eigen::lastp1;
-  using Eigen::fix;
+  using Eigen::placeholders::all;
+  using Eigen::placeholders::last;
+  using Eigen::placeholders::lastN;  // c++11 only
+  using Eigen::placeholders::lastp1;
   \endcode
   */
 namespace indexing {
-  using Eigen::all;
+  using Eigen::fix;
   using Eigen::seq;
   using Eigen::seqN;
-  #if EIGEN_HAS_CXX11
-  using Eigen::lastN;
-  #endif
-  using Eigen::last;
-  using Eigen::lastp1;
-  using Eigen::fix;
+  using Eigen::placeholders::all;
+  using Eigen::placeholders::last;
+  using Eigen::placeholders::lastN;
+  using Eigen::placeholders::lastp1;
 }
 
 } // end namespace Eigen
diff --git a/libs/eigen/Eigen/src/Core/Array.h b/libs/eigen/Eigen/src/Core/Array.h
index 20c789b..d7a5e7a 100644
--- a/libs/eigen/Eigen/src/Core/Array.h
+++ b/libs/eigen/Eigen/src/Core/Array.h
@@ -10,14 +10,16 @@
 #ifndef EIGEN_ARRAY_H
 #define EIGEN_ARRAY_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
-template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
-struct traits<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > : traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
+template<typename Scalar_, int Rows_, int Cols_, int Options_, int MaxRows_, int MaxCols_>
+struct traits<Array<Scalar_, Rows_, Cols_, Options_, MaxRows_, MaxCols_> > : traits<Matrix<Scalar_, Rows_, Cols_, Options_, MaxRows_, MaxCols_> >
 {
   typedef ArrayXpr XprKind;
-  typedef ArrayBase<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > XprBase;
+  typedef ArrayBase<Array<Scalar_, Rows_, Cols_, Options_, MaxRows_, MaxCols_> > XprBase;
 };
 }
 
@@ -41,16 +43,16 @@ struct traits<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > : tra
   *
   * \sa \blank \ref TutorialArrayClass, \ref TopicClassHierarchy
   */
-template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
+template<typename Scalar_, int Rows_, int Cols_, int Options_, int MaxRows_, int MaxCols_>
 class Array
-  : public PlainObjectBase<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
+  : public PlainObjectBase<Array<Scalar_, Rows_, Cols_, Options_, MaxRows_, MaxCols_> >
 {
   public:
 
     typedef PlainObjectBase<Array> Base;
     EIGEN_DENSE_PUBLIC_INTERFACE(Array)
 
-    enum { Options = _Options };
+    enum { Options = Options_ };
     typedef typename Base::PlainObject PlainObject;
 
   protected:
@@ -131,7 +133,6 @@ class Array
     EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Array() : Base()
     {
-      Base::_check_template_params();
       EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
     }
 
@@ -142,17 +143,14 @@ class Array
     Array(internal::constructor_without_unaligned_array_assert)
       : Base(internal::constructor_without_unaligned_array_assert())
     {
-      Base::_check_template_params();
       EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
     }
 #endif
 
-#if EIGEN_HAS_RVALUE_REFERENCES
     EIGEN_DEVICE_FUNC
     Array(Array&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_constructible<Scalar>::value)
       : Base(std::move(other))
     {
-      Base::_check_template_params();
     }
     EIGEN_DEVICE_FUNC
     Array& operator=(Array&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_assignable<Scalar>::value)
@@ -160,9 +158,7 @@ class Array
       Base::operator=(std::move(other));
       return *this;
     }
-#endif
 
-    #if EIGEN_HAS_CXX11
     /** \copydoc PlainObjectBase(const Scalar& a0, const Scalar& a1, const Scalar& a2, const Scalar& a3, const ArgTypes&... args)
      *
      * Example: \include Array_variadic_ctor_cxx11.cpp
@@ -197,16 +193,15 @@ class Array
       *
       * \sa  Array(const Scalar& a0, const Scalar& a1, const Scalar& a2, const Scalar& a3, const ArgTypes&... args)
       */
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Array(const std::initializer_list<std::initializer_list<Scalar>>& list) : Base(list) {}
-    #endif // end EIGEN_HAS_CXX11
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Array(
+        const std::initializer_list<std::initializer_list<Scalar>>& list)
+        : Base(list) {}
 
     #ifndef EIGEN_PARSED_BY_DOXYGEN
     template<typename T>
     EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE explicit Array(const T& x)
     {
-      Base::_check_template_params();
       Base::template _init1<T>(x);
     }
 
@@ -214,7 +209,6 @@ class Array
     EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Array(const T0& val0, const T1& val1)
     {
-      Base::_check_template_params();
       this->template _init2<T0,T1>(val0, val1);
     }
 
@@ -249,7 +243,6 @@ class Array
     EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Array(const Scalar& val0, const Scalar& val1, const Scalar& val2)
     {
-      Base::_check_template_params();
       EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Array, 3)
       m_storage.data()[0] = val0;
       m_storage.data()[1] = val1;
@@ -261,7 +254,6 @@ class Array
     EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Array(const Scalar& val0, const Scalar& val1, const Scalar& val2, const Scalar& val3)
     {
-      Base::_check_template_params();
       EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Array, 4)
       m_storage.data()[0] = val0;
       m_storage.data()[1] = val1;
@@ -283,8 +275,8 @@ class Array
     template<typename OtherDerived>
     EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Array(const EigenBase<OtherDerived> &other,
-                              typename internal::enable_if<internal::is_convertible<typename OtherDerived::Scalar,Scalar>::value,
-                                                           PrivateType>::type = PrivateType())
+                              std::enable_if_t<internal::is_convertible<typename OtherDerived::Scalar,Scalar>::value,
+                                               PrivateType> = PrivateType())
       : Base(other.derived())
     { }
 
@@ -359,8 +351,6 @@ EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(std::complex<double>, cd)
 #undef EIGEN_MAKE_ARRAY_TYPEDEFS
 #undef EIGEN_MAKE_ARRAY_FIXED_TYPEDEFS
 
-#if EIGEN_HAS_CXX11
-
 #define EIGEN_MAKE_ARRAY_TYPEDEFS(Size, SizeSuffix)               \
 /** \ingroup arraytypedefs */                                     \
 /** \brief \cpp11 */                                              \
@@ -392,8 +382,6 @@ EIGEN_MAKE_ARRAY_FIXED_TYPEDEFS(4)
 #undef EIGEN_MAKE_ARRAY_TYPEDEFS
 #undef EIGEN_MAKE_ARRAY_FIXED_TYPEDEFS
 
-#endif // EIGEN_HAS_CXX11
-
 #define EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, SizeSuffix) \
 using Eigen::Matrix##SizeSuffix##TypeSuffix; \
 using Eigen::Vector##SizeSuffix##TypeSuffix; \
diff --git a/libs/eigen/Eigen/src/Core/ArrayBase.h b/libs/eigen/Eigen/src/Core/ArrayBase.h
index ea3dd1c..28397e5 100644
--- a/libs/eigen/Eigen/src/Core/ArrayBase.h
+++ b/libs/eigen/Eigen/src/Core/ArrayBase.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_ARRAYBASE_H
 #define EIGEN_ARRAYBASE_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 template<typename ExpressionType> class MatrixWrapper;
@@ -21,7 +23,7 @@ template<typename ExpressionType> class MatrixWrapper;
   *
   * An array is similar to a dense vector or matrix. While matrices are mathematical
   * objects with well defined linear algebra operators, an array is just a collection
-  * of scalar values arranged in a one or two dimensionnal fashion. As the main consequence,
+  * of scalar values arranged in a one or two dimensional fashion. As the main consequence,
   * all operations applied to an array are performed coefficient wise. Furthermore,
   * arrays support scalar math functions of the c++ standard library (e.g., std::sin(x)), and convenient
   * constructors allowing to easily write generic code working for both scalar values
diff --git a/libs/eigen/Eigen/src/Core/ArrayWrapper.h b/libs/eigen/Eigen/src/Core/ArrayWrapper.h
index 2e9555b..e65b8fb 100644
--- a/libs/eigen/Eigen/src/Core/ArrayWrapper.h
+++ b/libs/eigen/Eigen/src/Core/ArrayWrapper.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_ARRAYWRAPPER_H
 #define EIGEN_ARRAYWRAPPER_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /** \class ArrayWrapper
@@ -26,12 +28,12 @@ namespace Eigen {
 namespace internal {
 template<typename ExpressionType>
 struct traits<ArrayWrapper<ExpressionType> >
-  : public traits<typename remove_all<typename ExpressionType::Nested>::type >
+  : public traits<remove_all_t<typename ExpressionType::Nested> >
 {
   typedef ArrayXpr XprKind;
   // Let's remove NestByRefBit
   enum {
-    Flags0 = traits<typename remove_all<typename ExpressionType::Nested>::type >::Flags,
+    Flags0 = traits<remove_all_t<typename ExpressionType::Nested> >::Flags,
     LvalueBitFlag = is_lvalue<ExpressionType>::value ? LvalueBit : 0,
     Flags = (Flags0 & ~(NestByRefBit | LvalueBit)) | LvalueBitFlag
   };
@@ -45,13 +47,13 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
     typedef ArrayBase<ArrayWrapper> Base;
     EIGEN_DENSE_PUBLIC_INTERFACE(ArrayWrapper)
     EIGEN_INHERIT_ASSIGNMENT_OPERATORS(ArrayWrapper)
-    typedef typename internal::remove_all<ExpressionType>::type NestedExpression;
+    typedef internal::remove_all_t<ExpressionType> NestedExpression;
 
-    typedef typename internal::conditional<
+    typedef std::conditional_t<
                        internal::is_lvalue<ExpressionType>::value,
                        Scalar,
                        const Scalar
-                     >::type ScalarWithConstIfNotLvalue;
+                     > ScalarWithConstIfNotLvalue;
 
     typedef typename internal::ref_selector<ExpressionType>::non_const_type NestedExpressionType;
 
@@ -91,7 +93,7 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
     inline void evalTo(Dest& dst) const { dst = m_expression; }
 
     EIGEN_DEVICE_FUNC
-    const typename internal::remove_all<NestedExpressionType>::type&
+    const internal::remove_all_t<NestedExpressionType>&
     nestedExpression() const
     {
       return m_expression;
@@ -124,12 +126,12 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
 namespace internal {
 template<typename ExpressionType>
 struct traits<MatrixWrapper<ExpressionType> >
- : public traits<typename remove_all<typename ExpressionType::Nested>::type >
+ : public traits<remove_all_t<typename ExpressionType::Nested> >
 {
   typedef MatrixXpr XprKind;
   // Let's remove NestByRefBit
   enum {
-    Flags0 = traits<typename remove_all<typename ExpressionType::Nested>::type >::Flags,
+    Flags0 = traits<remove_all_t<typename ExpressionType::Nested> >::Flags,
     LvalueBitFlag = is_lvalue<ExpressionType>::value ? LvalueBit : 0,
     Flags = (Flags0 & ~(NestByRefBit | LvalueBit)) | LvalueBitFlag
   };
@@ -143,13 +145,13 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
     typedef MatrixBase<MatrixWrapper<ExpressionType> > Base;
     EIGEN_DENSE_PUBLIC_INTERFACE(MatrixWrapper)
     EIGEN_INHERIT_ASSIGNMENT_OPERATORS(MatrixWrapper)
-    typedef typename internal::remove_all<ExpressionType>::type NestedExpression;
+    typedef internal::remove_all_t<ExpressionType> NestedExpression;
 
-    typedef typename internal::conditional<
-                       internal::is_lvalue<ExpressionType>::value,
-                       Scalar,
-                       const Scalar
-                     >::type ScalarWithConstIfNotLvalue;
+    typedef std::conditional_t<
+              internal::is_lvalue<ExpressionType>::value,
+              Scalar,
+              const Scalar
+            > ScalarWithConstIfNotLvalue;
 
     typedef typename internal::ref_selector<ExpressionType>::non_const_type NestedExpressionType;
 
@@ -185,7 +187,7 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
     }
 
     EIGEN_DEVICE_FUNC
-    const typename internal::remove_all<NestedExpressionType>::type&
+    const internal::remove_all_t<NestedExpressionType>&
     nestedExpression() const
     {
       return m_expression;
diff --git a/libs/eigen/Eigen/src/Core/Assign.h b/libs/eigen/Eigen/src/Core/Assign.h
index 655412e..dc716d3 100644
--- a/libs/eigen/Eigen/src/Core/Assign.h
+++ b/libs/eigen/Eigen/src/Core/Assign.h
@@ -12,6 +12,8 @@
 #ifndef EIGEN_ASSIGN_H
 #define EIGEN_ASSIGN_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 template<typename Derived>
diff --git a/libs/eigen/Eigen/src/Core/AssignEvaluator.h b/libs/eigen/Eigen/src/Core/AssignEvaluator.h
index 7d76f0c..8fb1f81 100644
--- a/libs/eigen/Eigen/src/Core/AssignEvaluator.h
+++ b/libs/eigen/Eigen/src/Core/AssignEvaluator.h
@@ -12,6 +12,8 @@
 #ifndef EIGEN_ASSIGN_EVALUATOR_H
 #define EIGEN_ASSIGN_EVALUATOR_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 // This implementation is based on Assign.h
@@ -40,7 +42,7 @@ public:
     DstAlignment = DstEvaluator::Alignment,
     SrcAlignment = SrcEvaluator::Alignment,
     DstHasDirectAccess = (DstFlags & DirectAccessBit) == DirectAccessBit,
-    JointAlignment = EIGEN_PLAIN_ENUM_MIN(DstAlignment,SrcAlignment)
+    JointAlignment = plain_enum_min(DstAlignment, SrcAlignment)
   };
 
 private:
@@ -51,8 +53,8 @@ private:
     InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime)
               : int(DstFlags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime)
               : int(Dst::MaxRowsAtCompileTime),
-    RestrictedInnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(InnerSize,MaxPacketSize),
-    RestrictedLinearSize = EIGEN_SIZE_MIN_PREFER_FIXED(Dst::SizeAtCompileTime,MaxPacketSize),
+    RestrictedInnerSize = min_size_prefer_fixed(InnerSize, MaxPacketSize),
+    RestrictedLinearSize = min_size_prefer_fixed(Dst::SizeAtCompileTime, MaxPacketSize),
     OuterStride = int(outer_stride_at_compile_time<Dst>::ret),
     MaxSizeAtCompileTime = Dst::SizeAtCompileTime
   };
@@ -111,7 +113,7 @@ public:
               || int(Traversal) == SliceVectorizedTraversal
   };
 
-  typedef typename conditional<int(Traversal)==LinearVectorizedTraversal, LinearPacketType, InnerPacketType>::type PacketType;
+  typedef std::conditional_t<int(Traversal)==LinearVectorizedTraversal, LinearPacketType, InnerPacketType> PacketType;
 
 private:
   enum {
@@ -216,7 +218,7 @@ struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling
 template<typename Kernel, int Stop>
 struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Stop, Stop>
 {
-  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel&) { }
 };
 
 template<typename Kernel, int Index_, int Stop>
@@ -285,7 +287,7 @@ struct copy_using_evaluator_innervec_CompleteUnrolling
 template<typename Kernel, int Stop>
 struct copy_using_evaluator_innervec_CompleteUnrolling<Kernel, Stop, Stop>
 {
-  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel&) { }
 };
 
 template<typename Kernel, int Index_, int Stop, int SrcAlignment, int DstAlignment>
@@ -325,10 +327,9 @@ struct dense_assignment_loop;
 template<typename Kernel, int Unrolling>
 struct dense_assignment_loop<Kernel, AllAtOnceTraversal, Unrolling>
 {
-  EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE run(Kernel& /*kernel*/)
+  EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE EIGEN_CONSTEXPR run(Kernel& /*kernel*/)
   {
-    typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
-    EIGEN_STATIC_ASSERT(int(DstXprType::SizeAtCompileTime) == 0,
+    EIGEN_STATIC_ASSERT(int(Kernel::DstEvaluatorType::XprType::SizeAtCompileTime) == 0,
       EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT)
   }
 };
@@ -386,7 +387,7 @@ struct unaligned_dense_assignment_loop
 {
   // if IsAligned = true, then do nothing
   template <typename Kernel>
-  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index, Index) {}
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel&, Index, Index) {}
 };
 
 template <>
@@ -402,7 +403,7 @@ struct unaligned_dense_assignment_loop<false>
                                     Index end)
 #else
   template <typename Kernel>
-  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel,
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel &kernel,
                                       Index start,
                                       Index end)
 #endif
@@ -415,7 +416,7 @@ struct unaligned_dense_assignment_loop<false>
 template<typename Kernel>
 struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, NoUnrolling>
 {
-  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel &kernel)
   {
     const Index size = kernel.size();
     typedef typename Kernel::Scalar Scalar;
@@ -443,7 +444,7 @@ struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, NoUnrolling>
 template<typename Kernel>
 struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, CompleteUnrolling>
 {
-  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel &kernel)
   {
     typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
     typedef typename Kernel::PacketType PacketType;
@@ -469,7 +470,7 @@ struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling>
     SrcAlignment = Kernel::AssignmentTraits::SrcAlignment,
     DstAlignment = Kernel::AssignmentTraits::DstAlignment
   };
-  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel &kernel)
   {
     const Index innerSize = kernel.innerSize();
     const Index outerSize = kernel.outerSize();
@@ -511,7 +512,7 @@ struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, InnerUnrolling>
 template<typename Kernel>
 struct dense_assignment_loop<Kernel, LinearTraversal, NoUnrolling>
 {
-  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel &kernel)
   {
     const Index size = kernel.size();
     for(Index i = 0; i < size; ++i)
@@ -522,7 +523,7 @@ struct dense_assignment_loop<Kernel, LinearTraversal, NoUnrolling>
 template<typename Kernel>
 struct dense_assignment_loop<Kernel, LinearTraversal, CompleteUnrolling>
 {
-  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel &kernel)
   {
     typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
     copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
@@ -536,7 +537,7 @@ struct dense_assignment_loop<Kernel, LinearTraversal, CompleteUnrolling>
 template<typename Kernel>
 struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
 {
-  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel &kernel)
   {
     typedef typename Kernel::Scalar Scalar;
     typedef typename Kernel::PacketType PacketType;
@@ -584,7 +585,7 @@ struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
 template<typename Kernel>
 struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, InnerUnrolling>
 {
-  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel &kernel)
   {
     typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
     typedef typename Kernel::PacketType PacketType;
@@ -766,7 +767,7 @@ void resize_if_allowed(DstXprType &dst, const SrcXprType& src, const internal::a
 }
 
 template<typename DstXprType, typename SrcXprType, typename Functor>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src, const Functor &func)
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src, const Functor &func)
 {
   typedef evaluator<DstXprType> DstEvaluatorType;
   typedef evaluator<SrcXprType> SrcEvaluatorType;
@@ -844,8 +845,8 @@ void call_assignment(const Dst& dst, const Src& src)
 
 // Deal with "assume-aliasing"
 template<typename Dst, typename Src, typename Func>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if< evaluator_assume_aliasing<Src>::value, void*>::type = 0)
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+void call_assignment(Dst& dst, const Src& src, const Func& func, std::enable_if_t< evaluator_assume_aliasing<Src>::value, void*> = 0)
 {
   typename plain_matrix_type<Src>::type tmp(src);
   call_assignment_no_alias(dst, tmp, func);
@@ -853,7 +854,7 @@ void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable
 
 template<typename Dst, typename Src, typename Func>
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if<!evaluator_assume_aliasing<Src>::value, void*>::type = 0)
+void call_assignment(Dst& dst, const Src& src, const Func& func, std::enable_if_t<!evaluator_assume_aliasing<Src>::value, void*> = 0)
 {
   call_assignment_no_alias(dst, src, func);
 }
@@ -861,7 +862,7 @@ void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable
 // by-pass "assume-aliasing"
 // When there is no aliasing, we require that 'dst' has been properly resized
 template<typename Dst, template <typename> class StorageBase, typename Src, typename Func>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
 void call_assignment(NoAlias<Dst,StorageBase>& dst, const Src& src, const Func& func)
 {
   call_assignment_no_alias(dst.expression(), src, func);
@@ -869,7 +870,7 @@ void call_assignment(NoAlias<Dst,StorageBase>& dst, const Src& src, const Func&
 
 
 template<typename Dst, typename Src, typename Func>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
 void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func)
 {
   enum {
@@ -878,8 +879,8 @@ void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func)
                       ) && int(Dst::SizeAtCompileTime) != 1
   };
 
-  typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst>::type ActualDstTypeCleaned;
-  typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst&>::type ActualDstType;
+  typedef std::conditional_t<NeedToTranspose, Transpose<Dst>, Dst> ActualDstTypeCleaned;
+  typedef std::conditional_t<NeedToTranspose, Transpose<Dst>, Dst&> ActualDstType;
   ActualDstType actualDst(dst);
 
   // TODO check whether this is the right place to perform these checks:
@@ -911,14 +912,14 @@ void call_restricted_packet_assignment_no_alias(Dst& dst, const Src& src, const
 }
 
 template<typename Dst, typename Src>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
 void call_assignment_no_alias(Dst& dst, const Src& src)
 {
   call_assignment_no_alias(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
 }
 
 template<typename Dst, typename Src, typename Func>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
 void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src, const Func& func)
 {
   // TODO check whether this is the right place to perform these checks:
@@ -929,7 +930,7 @@ void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src, const Func&
   Assignment<Dst,Src,Func>::run(dst, src, func);
 }
 template<typename Dst, typename Src>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
 void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src)
 {
   call_assignment_no_alias_no_transpose(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
diff --git a/libs/eigen/Eigen/src/Core/Assign_MKL.h b/libs/eigen/Eigen/src/Core/Assign_MKL.h
old mode 100755
new mode 100644
index c6140d1..f9b86c8
--- a/libs/eigen/Eigen/src/Core/Assign_MKL.h
+++ b/libs/eigen/Eigen/src/Core/Assign_MKL.h
@@ -34,6 +34,8 @@
 #ifndef EIGEN_ASSIGN_VML_H
 #define EIGEN_ASSIGN_VML_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
@@ -82,7 +84,7 @@ class vml_assign_traits
 #define EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE)                                           \
   template< typename DstXprType, typename SrcXprNested>                                                                         \
   struct Assignment<DstXprType, CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested>, assign_op<EIGENTYPE,EIGENTYPE>,   \
-                   Dense2Dense, typename enable_if<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml>::type> {              \
+                   Dense2Dense, std::enable_if_t<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml>> {              \
     typedef CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested> SrcXprType;                                            \
     static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &func) {                       \
       resize_if_allowed(dst, src, func);                                                                                        \
@@ -142,7 +144,7 @@ EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(ceil,  Ceil,   _)
   template< typename DstXprType, typename SrcXprNested, typename Plain>                                                       \
   struct Assignment<DstXprType, CwiseBinaryOp<scalar_##EIGENOP##_op<EIGENTYPE,EIGENTYPE>, SrcXprNested,                       \
                     const CwiseNullaryOp<internal::scalar_constant_op<EIGENTYPE>,Plain> >, assign_op<EIGENTYPE,EIGENTYPE>,    \
-                   Dense2Dense, typename enable_if<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml>::type> {            \
+                   Dense2Dense, std::enable_if_t<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml>> {            \
     typedef CwiseBinaryOp<scalar_##EIGENOP##_op<EIGENTYPE,EIGENTYPE>, SrcXprNested,                                           \
                     const CwiseNullaryOp<internal::scalar_constant_op<EIGENTYPE>,Plain> > SrcXprType;                         \
     static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &func) {                     \
diff --git a/libs/eigen/Eigen/src/Core/BandMatrix.h b/libs/eigen/Eigen/src/Core/BandMatrix.h
index 878c024..dcb0d13 100644
--- a/libs/eigen/Eigen/src/Core/BandMatrix.h
+++ b/libs/eigen/Eigen/src/Core/BandMatrix.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_BANDMATRIX_H
 #define EIGEN_BANDMATRIX_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
@@ -41,7 +43,7 @@ class BandMatrixBase : public EigenBase<Derived>
       DataRowsAtCompileTime = ((Supers!=Dynamic) && (Subs!=Dynamic))
                             ? 1 + Supers + Subs
                             : Dynamic,
-      SizeAtCompileTime = EIGEN_SIZE_MIN_PREFER_DYNAMIC(RowsAtCompileTime,ColsAtCompileTime)
+      SizeAtCompileTime = min_size_prefer_dynamic(RowsAtCompileTime,ColsAtCompileTime)
     };
 
   public:
@@ -96,13 +98,13 @@ class BandMatrixBase : public EigenBase<Derived>
         DiagonalSize = (RowsAtCompileTime==Dynamic || ColsAtCompileTime==Dynamic)
                      ? Dynamic
                      : (ActualIndex<0
-                     ? EIGEN_SIZE_MIN_PREFER_DYNAMIC(ColsAtCompileTime, RowsAtCompileTime + ActualIndex)
-                     : EIGEN_SIZE_MIN_PREFER_DYNAMIC(RowsAtCompileTime, ColsAtCompileTime - ActualIndex))
+                     ? min_size_prefer_dynamic(ColsAtCompileTime, RowsAtCompileTime + ActualIndex)
+                     : min_size_prefer_dynamic(RowsAtCompileTime, ColsAtCompileTime - ActualIndex))
       };
       typedef Block<CoefficientsType,1, DiagonalSize> BuildType;
-      typedef typename internal::conditional<Conjugate,
+      typedef std::conditional_t<Conjugate,
                  CwiseUnaryOp<internal::scalar_conjugate_op<Scalar>,BuildType >,
-                 BuildType>::type Type;
+                 BuildType> Type;
     };
 
     /** \returns a vector expression of the \a N -th sub or super diagonal */
@@ -161,12 +163,12 @@ class BandMatrixBase : public EigenBase<Derived>
   *
   * \brief Represents a rectangular matrix with a banded storage
   *
-  * \tparam _Scalar Numeric type, i.e. float, double, int
-  * \tparam _Rows Number of rows, or \b Dynamic
-  * \tparam _Cols Number of columns, or \b Dynamic
-  * \tparam _Supers Number of super diagonal
-  * \tparam _Subs Number of sub diagonal
-  * \tparam _Options A combination of either \b #RowMajor or \b #ColMajor, and of \b #SelfAdjoint
+  * \tparam Scalar_ Numeric type, i.e. float, double, int
+  * \tparam Rows_ Number of rows, or \b Dynamic
+  * \tparam Cols_ Number of columns, or \b Dynamic
+  * \tparam Supers_ Number of super diagonal
+  * \tparam Subs_ Number of sub diagonal
+  * \tparam Options_ A combination of either \b #RowMajor or \b #ColMajor, and of \b #SelfAdjoint
   *                  The former controls \ref TopicStorageOrders "storage order", and defaults to
   *                  column-major. The latter controls whether the matrix represents a selfadjoint
   *                  matrix in which case either Supers of Subs have to be null.
@@ -174,29 +176,29 @@ class BandMatrixBase : public EigenBase<Derived>
   * \sa class TridiagonalMatrix
   */
 
-template<typename _Scalar, int _Rows, int _Cols, int _Supers, int _Subs, int _Options>
-struct traits<BandMatrix<_Scalar,_Rows,_Cols,_Supers,_Subs,_Options> >
+template<typename Scalar_, int Rows_, int Cols_, int Supers_, int Subs_, int Options_>
+struct traits<BandMatrix<Scalar_,Rows_,Cols_,Supers_,Subs_,Options_> >
 {
-  typedef _Scalar Scalar;
+  typedef Scalar_ Scalar;
   typedef Dense StorageKind;
   typedef Eigen::Index StorageIndex;
   enum {
     CoeffReadCost = NumTraits<Scalar>::ReadCost,
-    RowsAtCompileTime = _Rows,
-    ColsAtCompileTime = _Cols,
-    MaxRowsAtCompileTime = _Rows,
-    MaxColsAtCompileTime = _Cols,
+    RowsAtCompileTime = Rows_,
+    ColsAtCompileTime = Cols_,
+    MaxRowsAtCompileTime = Rows_,
+    MaxColsAtCompileTime = Cols_,
     Flags = LvalueBit,
-    Supers = _Supers,
-    Subs = _Subs,
-    Options = _Options,
+    Supers = Supers_,
+    Subs = Subs_,
+    Options = Options_,
     DataRowsAtCompileTime = ((Supers!=Dynamic) && (Subs!=Dynamic)) ? 1 + Supers + Subs : Dynamic
   };
   typedef Matrix<Scalar, DataRowsAtCompileTime, ColsAtCompileTime, int(Options) & int(RowMajor) ? RowMajor : ColMajor> CoefficientsType;
 };
 
-template<typename _Scalar, int Rows, int Cols, int Supers, int Subs, int Options>
-class BandMatrix : public BandMatrixBase<BandMatrix<_Scalar,Rows,Cols,Supers,Subs,Options> >
+template<typename Scalar_, int Rows, int Cols, int Supers, int Subs, int Options>
+class BandMatrix : public BandMatrixBase<BandMatrix<Scalar_,Rows,Cols,Supers,Subs,Options> >
 {
   public:
 
@@ -233,32 +235,32 @@ class BandMatrix : public BandMatrixBase<BandMatrix<_Scalar,Rows,Cols,Supers,Sub
     internal::variable_if_dynamic<Index, Subs>   m_subs;
 };
 
-template<typename _CoefficientsType,int _Rows, int _Cols, int _Supers, int _Subs,int _Options>
+template<typename CoefficientsType_,int Rows_, int Cols_, int Supers_, int Subs_,int Options_>
 class BandMatrixWrapper;
 
-template<typename _CoefficientsType,int _Rows, int _Cols, int _Supers, int _Subs,int _Options>
-struct traits<BandMatrixWrapper<_CoefficientsType,_Rows,_Cols,_Supers,_Subs,_Options> >
+template<typename CoefficientsType_,int Rows_, int Cols_, int Supers_, int Subs_,int Options_>
+struct traits<BandMatrixWrapper<CoefficientsType_,Rows_,Cols_,Supers_,Subs_,Options_> >
 {
-  typedef typename _CoefficientsType::Scalar Scalar;
-  typedef typename _CoefficientsType::StorageKind StorageKind;
-  typedef typename _CoefficientsType::StorageIndex StorageIndex;
+  typedef typename CoefficientsType_::Scalar Scalar;
+  typedef typename CoefficientsType_::StorageKind StorageKind;
+  typedef typename CoefficientsType_::StorageIndex StorageIndex;
   enum {
-    CoeffReadCost = internal::traits<_CoefficientsType>::CoeffReadCost,
-    RowsAtCompileTime = _Rows,
-    ColsAtCompileTime = _Cols,
-    MaxRowsAtCompileTime = _Rows,
-    MaxColsAtCompileTime = _Cols,
+    CoeffReadCost = internal::traits<CoefficientsType_>::CoeffReadCost,
+    RowsAtCompileTime = Rows_,
+    ColsAtCompileTime = Cols_,
+    MaxRowsAtCompileTime = Rows_,
+    MaxColsAtCompileTime = Cols_,
     Flags = LvalueBit,
-    Supers = _Supers,
-    Subs = _Subs,
-    Options = _Options,
+    Supers = Supers_,
+    Subs = Subs_,
+    Options = Options_,
     DataRowsAtCompileTime = ((Supers!=Dynamic) && (Subs!=Dynamic)) ? 1 + Supers + Subs : Dynamic
   };
-  typedef _CoefficientsType CoefficientsType;
+  typedef CoefficientsType_ CoefficientsType;
 };
 
-template<typename _CoefficientsType,int _Rows, int _Cols, int _Supers, int _Subs,int _Options>
-class BandMatrixWrapper : public BandMatrixBase<BandMatrixWrapper<_CoefficientsType,_Rows,_Cols,_Supers,_Subs,_Options> >
+template<typename CoefficientsType_,int Rows_, int Cols_, int Supers_, int Subs_,int Options_>
+class BandMatrixWrapper : public BandMatrixBase<BandMatrixWrapper<CoefficientsType_,Rows_,Cols_,Supers_,Subs_,Options_> >
 {
   public:
 
@@ -266,12 +268,12 @@ class BandMatrixWrapper : public BandMatrixBase<BandMatrixWrapper<_CoefficientsT
     typedef typename internal::traits<BandMatrixWrapper>::CoefficientsType CoefficientsType;
     typedef typename internal::traits<BandMatrixWrapper>::StorageIndex StorageIndex;
 
-    explicit inline BandMatrixWrapper(const CoefficientsType& coeffs, Index rows=_Rows, Index cols=_Cols, Index supers=_Supers, Index subs=_Subs)
+    explicit inline BandMatrixWrapper(const CoefficientsType& coeffs, Index rows=Rows_, Index cols=Cols_, Index supers=Supers_, Index subs=Subs_)
       : m_coeffs(coeffs),
         m_rows(rows), m_supers(supers), m_subs(subs)
     {
       EIGEN_UNUSED_VARIABLE(cols);
-      //internal::assert(coeffs.cols()==cols() && (supers()+subs()+1)==coeffs.rows());
+      // eigen_assert(coeffs.cols()==cols() && (supers()+subs()+1)==coeffs.rows());
     }
 
     /** \returns the number of columns */
@@ -291,9 +293,9 @@ class BandMatrixWrapper : public BandMatrixBase<BandMatrixWrapper<_CoefficientsT
   protected:
 
     const CoefficientsType& m_coeffs;
-    internal::variable_if_dynamic<Index, _Rows>   m_rows;
-    internal::variable_if_dynamic<Index, _Supers> m_supers;
-    internal::variable_if_dynamic<Index, _Subs>   m_subs;
+    internal::variable_if_dynamic<Index, Rows_>   m_rows;
+    internal::variable_if_dynamic<Index, Supers_> m_supers;
+    internal::variable_if_dynamic<Index, Subs_>   m_subs;
 };
 
 /**
@@ -330,16 +332,16 @@ class TridiagonalMatrix : public BandMatrix<Scalar,Size,Size,Options&SelfAdjoint
 
 struct BandShape {};
 
-template<typename _Scalar, int _Rows, int _Cols, int _Supers, int _Subs, int _Options>
-struct evaluator_traits<BandMatrix<_Scalar,_Rows,_Cols,_Supers,_Subs,_Options> >
-  : public evaluator_traits_base<BandMatrix<_Scalar,_Rows,_Cols,_Supers,_Subs,_Options> >
+template<typename Scalar_, int Rows_, int Cols_, int Supers_, int Subs_, int Options_>
+struct evaluator_traits<BandMatrix<Scalar_,Rows_,Cols_,Supers_,Subs_,Options_> >
+  : public evaluator_traits_base<BandMatrix<Scalar_,Rows_,Cols_,Supers_,Subs_,Options_> >
 {
   typedef BandShape Shape;
 };
 
-template<typename _CoefficientsType,int _Rows, int _Cols, int _Supers, int _Subs,int _Options>
-struct evaluator_traits<BandMatrixWrapper<_CoefficientsType,_Rows,_Cols,_Supers,_Subs,_Options> >
-  : public evaluator_traits_base<BandMatrixWrapper<_CoefficientsType,_Rows,_Cols,_Supers,_Subs,_Options> >
+template<typename CoefficientsType_,int Rows_, int Cols_, int Supers_, int Subs_,int Options_>
+struct evaluator_traits<BandMatrixWrapper<CoefficientsType_,Rows_,Cols_,Supers_,Subs_,Options_> >
+  : public evaluator_traits_base<BandMatrixWrapper<CoefficientsType_,Rows_,Cols_,Supers_,Subs_,Options_> >
 {
   typedef BandShape Shape;
 };
diff --git a/libs/eigen/Eigen/src/Core/Block.h b/libs/eigen/Eigen/src/Core/Block.h
index 3206d66..19c4b68 100644
--- a/libs/eigen/Eigen/src/Core/Block.h
+++ b/libs/eigen/Eigen/src/Core/Block.h
@@ -11,6 +11,8 @@
 #ifndef EIGEN_BLOCK_H
 #define EIGEN_BLOCK_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
@@ -21,7 +23,7 @@ struct traits<Block<XprType, BlockRows, BlockCols, InnerPanel> > : traits<XprTyp
   typedef typename traits<XprType>::StorageKind StorageKind;
   typedef typename traits<XprType>::XprKind XprKind;
   typedef typename ref_selector<XprType>::type XprTypeNested;
-  typedef typename remove_reference<XprTypeNested>::type _XprTypeNested;
+  typedef std::remove_reference_t<XprTypeNested> XprTypeNested_;
   enum{
     MatrixRows = traits<XprType>::RowsAtCompileTime,
     MatrixCols = traits<XprType>::ColsAtCompileTime,
@@ -110,7 +112,7 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel> class
     EIGEN_GENERIC_PUBLIC_INTERFACE(Block)
     EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Block)
 
-    typedef typename internal::remove_all<XprType>::type NestedExpression;
+    typedef internal::remove_all_t<XprType> NestedExpression;
 
     /** Column or Row constructor
       */
@@ -260,19 +262,19 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
     }
 
     template<int LoadMode>
-    inline PacketScalar packet(Index rowId, Index colId) const
+    EIGEN_DEVICE_FUNC inline PacketScalar packet(Index rowId, Index colId) const
     {
       return m_xpr.template packet<Unaligned>(rowId + m_startRow.value(), colId + m_startCol.value());
     }
 
     template<int LoadMode>
-    inline void writePacket(Index rowId, Index colId, const PacketScalar& val)
+    EIGEN_DEVICE_FUNC inline void writePacket(Index rowId, Index colId, const PacketScalar& val)
     {
       m_xpr.template writePacket<Unaligned>(rowId + m_startRow.value(), colId + m_startCol.value(), val);
     }
 
     template<int LoadMode>
-    inline PacketScalar packet(Index index) const
+    EIGEN_DEVICE_FUNC inline PacketScalar packet(Index index) const
     {
       return m_xpr.template packet<Unaligned>
               (m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
@@ -280,7 +282,7 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
     }
 
     template<int LoadMode>
-    inline void writePacket(Index index, const PacketScalar& val)
+    EIGEN_DEVICE_FUNC inline void writePacket(Index index, const PacketScalar& val)
     {
       m_xpr.template writePacket<Unaligned>
          (m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
@@ -295,7 +297,7 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
     #endif
 
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const typename internal::remove_all<XprTypeNested>::type& nestedExpression() const
+    const internal::remove_all_t<XprTypeNested>& nestedExpression() const
     {
       return m_xpr;
     }
@@ -378,7 +380,7 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
     }
 
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const typename internal::remove_all<XprTypeNested>::type& nestedExpression() const EIGEN_NOEXCEPT
+    const internal::remove_all_t<XprTypeNested>& nestedExpression() const EIGEN_NOEXCEPT
     {
       return m_xpr;
     }
diff --git a/libs/eigen/Eigen/src/Core/BooleanRedux.h b/libs/eigen/Eigen/src/Core/BooleanRedux.h
index 852de8b..20e5bd9 100644
--- a/libs/eigen/Eigen/src/Core/BooleanRedux.h
+++ b/libs/eigen/Eigen/src/Core/BooleanRedux.h
@@ -10,58 +10,62 @@
 #ifndef EIGEN_ALLANDANY_H
 #define EIGEN_ALLANDANY_H
 
-namespace Eigen { 
+#include "./InternalHeaderCheck.h"
+
+namespace Eigen {
 
 namespace internal {
 
-template<typename Derived, int UnrollCount, int Rows>
+template<typename Derived, int UnrollCount, int InnerSize>
 struct all_unroller
 {
   enum {
-    col = (UnrollCount-1) / Rows,
-    row = (UnrollCount-1) % Rows
+    IsRowMajor = (int(Derived::Flags) & int(RowMajor)),
+    i = (UnrollCount-1) / InnerSize,
+    j = (UnrollCount-1) % InnerSize
   };
 
   EIGEN_DEVICE_FUNC static inline bool run(const Derived &mat)
   {
-    return all_unroller<Derived, UnrollCount-1, Rows>::run(mat) && mat.coeff(row, col);
+    return all_unroller<Derived, UnrollCount-1, InnerSize>::run(mat) && mat.coeff(IsRowMajor ? i : j, IsRowMajor ? j : i);
   }
 };
 
-template<typename Derived, int Rows>
-struct all_unroller<Derived, 0, Rows>
+template<typename Derived, int InnerSize>
+struct all_unroller<Derived, 0, InnerSize>
 {
   EIGEN_DEVICE_FUNC static inline bool run(const Derived &/*mat*/) { return true; }
 };
 
-template<typename Derived, int Rows>
-struct all_unroller<Derived, Dynamic, Rows>
+template<typename Derived, int InnerSize>
+struct all_unroller<Derived, Dynamic, InnerSize>
 {
   EIGEN_DEVICE_FUNC static inline bool run(const Derived &) { return false; }
 };
 
-template<typename Derived, int UnrollCount, int Rows>
+template<typename Derived, int UnrollCount, int InnerSize>
 struct any_unroller
 {
   enum {
-    col = (UnrollCount-1) / Rows,
-    row = (UnrollCount-1) % Rows
+    IsRowMajor = (int(Derived::Flags) & int(RowMajor)),
+    i = (UnrollCount-1) / InnerSize,
+    j = (UnrollCount-1) % InnerSize
   };
-  
+
   EIGEN_DEVICE_FUNC static inline bool run(const Derived &mat)
   {
-    return any_unroller<Derived, UnrollCount-1, Rows>::run(mat) || mat.coeff(row, col);
+    return any_unroller<Derived, UnrollCount-1, InnerSize>::run(mat) || mat.coeff(IsRowMajor ? i : j, IsRowMajor ? j : i);
   }
 };
 
-template<typename Derived, int Rows>
-struct any_unroller<Derived, 0, Rows>
+template<typename Derived, int InnerSize>
+struct any_unroller<Derived, 0, InnerSize>
 {
   EIGEN_DEVICE_FUNC static inline bool run(const Derived & /*mat*/) { return false; }
 };
 
-template<typename Derived, int Rows>
-struct any_unroller<Derived, Dynamic, Rows>
+template<typename Derived, int InnerSize>
+struct any_unroller<Derived, Dynamic, InnerSize>
 {
   EIGEN_DEVICE_FUNC static inline bool run(const Derived &) { return false; }
 };
@@ -81,16 +85,16 @@ EIGEN_DEVICE_FUNC inline bool DenseBase<Derived>::all() const
   typedef internal::evaluator<Derived> Evaluator;
   enum {
     unroll = SizeAtCompileTime != Dynamic
-          && SizeAtCompileTime * (int(Evaluator::CoeffReadCost) + int(NumTraits<Scalar>::AddCost)) <= EIGEN_UNROLLING_LIMIT
+          && SizeAtCompileTime * (int(Evaluator::CoeffReadCost) + int(NumTraits<Scalar>::AddCost)) <= EIGEN_UNROLLING_LIMIT,
   };
   Evaluator evaluator(derived());
   if(unroll)
-    return internal::all_unroller<Evaluator, unroll ? int(SizeAtCompileTime) : Dynamic, internal::traits<Derived>::RowsAtCompileTime>::run(evaluator);
+    return internal::all_unroller<Evaluator, unroll ? int(SizeAtCompileTime) : Dynamic, InnerSizeAtCompileTime>::run(evaluator);
   else
   {
-    for(Index j = 0; j < cols(); ++j)
-      for(Index i = 0; i < rows(); ++i)
-        if (!evaluator.coeff(i, j)) return false;
+    for(Index i = 0; i < derived().outerSize(); ++i)
+      for(Index j = 0; j < derived().innerSize(); ++j)
+        if (!evaluator.coeff(IsRowMajor ? i : j, IsRowMajor ? j : i)) return false;
     return true;
   }
 }
@@ -105,16 +109,16 @@ EIGEN_DEVICE_FUNC inline bool DenseBase<Derived>::any() const
   typedef internal::evaluator<Derived> Evaluator;
   enum {
     unroll = SizeAtCompileTime != Dynamic
-          && SizeAtCompileTime * (int(Evaluator::CoeffReadCost) + int(NumTraits<Scalar>::AddCost)) <= EIGEN_UNROLLING_LIMIT
+          && SizeAtCompileTime * (int(Evaluator::CoeffReadCost) + int(NumTraits<Scalar>::AddCost)) <= EIGEN_UNROLLING_LIMIT,
   };
   Evaluator evaluator(derived());
   if(unroll)
-    return internal::any_unroller<Evaluator, unroll ? int(SizeAtCompileTime) : Dynamic, internal::traits<Derived>::RowsAtCompileTime>::run(evaluator);
+    return internal::any_unroller<Evaluator, unroll ? int(SizeAtCompileTime) : Dynamic, InnerSizeAtCompileTime>::run(evaluator);
   else
   {
-    for(Index j = 0; j < cols(); ++j)
-      for(Index i = 0; i < rows(); ++i)
-        if (evaluator.coeff(i, j)) return true;
+    for(Index i = 0; i < derived().outerSize(); ++i)
+      for(Index j = 0; j < derived().innerSize(); ++j)
+        if (evaluator.coeff(IsRowMajor ? i : j, IsRowMajor ? j : i)) return true;
     return false;
   }
 }
@@ -134,7 +138,7 @@ EIGEN_DEVICE_FUNC inline Eigen::Index DenseBase<Derived>::count() const
   * \sa allFinite()
   */
 template<typename Derived>
-inline bool DenseBase<Derived>::hasNaN() const
+EIGEN_DEVICE_FUNC inline bool DenseBase<Derived>::hasNaN() const
 {
 #if EIGEN_COMP_MSVC || (defined __FAST_MATH__)
   return derived().array().isNaN().any();
@@ -148,7 +152,7 @@ inline bool DenseBase<Derived>::hasNaN() const
   * \sa hasNaN()
   */
 template<typename Derived>
-inline bool DenseBase<Derived>::allFinite() const
+EIGEN_DEVICE_FUNC inline bool DenseBase<Derived>::allFinite() const
 {
 #if EIGEN_COMP_MSVC || (defined __FAST_MATH__)
   return derived().array().isFinite().all();
@@ -156,7 +160,7 @@ inline bool DenseBase<Derived>::allFinite() const
   return !((derived()-derived()).hasNaN());
 #endif
 }
-    
+
 } // end namespace Eigen
 
 #endif // EIGEN_ALLANDANY_H
diff --git a/libs/eigen/Eigen/src/Core/CommaInitializer.h b/libs/eigen/Eigen/src/Core/CommaInitializer.h
index c0e29c7..7c2eea8 100644
--- a/libs/eigen/Eigen/src/Core/CommaInitializer.h
+++ b/libs/eigen/Eigen/src/Core/CommaInitializer.h
@@ -11,6 +11,8 @@
 #ifndef EIGEN_COMMAINITIALIZER_H
 #define EIGEN_COMMAINITIALIZER_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 /** \class CommaInitializer
@@ -45,7 +47,7 @@ struct CommaInitializer
   {
     eigen_assert(m_xpr.rows() >= other.rows() && m_xpr.cols() >= other.cols()
       && "Cannot comma-initialize a 0x0 matrix (operator<<)");
-    m_xpr.block(0, 0, other.rows(), other.cols()) = other;
+    m_xpr.template block<OtherDerived::RowsAtCompileTime, OtherDerived::ColsAtCompileTime>(0, 0, other.rows(), other.cols()) = other;
   }
 
   /* Copy/Move constructor which transfers ownership. This is crucial in 
diff --git a/libs/eigen/Eigen/src/Core/ConditionEstimator.h b/libs/eigen/Eigen/src/Core/ConditionEstimator.h
index 51a2e5f..694be8b 100644
--- a/libs/eigen/Eigen/src/Core/ConditionEstimator.h
+++ b/libs/eigen/Eigen/src/Core/ConditionEstimator.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_CONDITIONESTIMATOR_H
 #define EIGEN_CONDITIONESTIMATOR_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
@@ -160,12 +162,12 @@ rcond_estimate_helper(typename Decomposition::RealScalar matrix_norm, const Deco
 {
   typedef typename Decomposition::RealScalar RealScalar;
   eigen_assert(dec.rows() == dec.cols());
-  if (dec.rows() == 0)              return NumTraits<RealScalar>::infinity();
-  if (matrix_norm == RealScalar(0)) return RealScalar(0);
-  if (dec.rows() == 1)              return RealScalar(1);
+  if (dec.rows() == 0)                        return NumTraits<RealScalar>::infinity();
+  if (numext::is_exactly_zero(matrix_norm)) return RealScalar(0);
+  if (dec.rows() == 1)                        return RealScalar(1);
   const RealScalar inverse_matrix_norm = rcond_invmatrix_L1_norm_estimate(dec);
-  return (inverse_matrix_norm == RealScalar(0) ? RealScalar(0)
-                                               : (RealScalar(1) / inverse_matrix_norm) / matrix_norm);
+  return (numext::is_exactly_zero(inverse_matrix_norm) ? RealScalar(0)
+                                                       : (RealScalar(1) / inverse_matrix_norm) / matrix_norm);
 }
 
 }  // namespace internal
diff --git a/libs/eigen/Eigen/src/Core/CoreEvaluators.h b/libs/eigen/Eigen/src/Core/CoreEvaluators.h
index 0ff8c8d..1729507 100644
--- a/libs/eigen/Eigen/src/Core/CoreEvaluators.h
+++ b/libs/eigen/Eigen/src/Core/CoreEvaluators.h
@@ -13,6 +13,8 @@
 #ifndef EIGEN_COREEVALUATORS_H
 #define EIGEN_COREEVALUATORS_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
@@ -498,7 +500,7 @@ struct evaluator<CwiseNullaryOp<NullaryOp,PlainObjectType> >
   : evaluator_base<CwiseNullaryOp<NullaryOp,PlainObjectType> >
 {
   typedef CwiseNullaryOp<NullaryOp,PlainObjectType> XprType;
-  typedef typename internal::remove_all<PlainObjectType>::type PlainObjectTypeCleaned;
+  typedef internal::remove_all_t<PlainObjectType> PlainObjectTypeCleaned;
 
   enum {
     CoeffReadCost = internal::functor_traits<NullaryOp>::Cost,
@@ -655,9 +657,9 @@ struct ternary_evaluator<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3>, IndexBased
         )
      ),
     Flags = (Flags0 & ~RowMajorBit) | (Arg1Flags & RowMajorBit),
-    Alignment = EIGEN_PLAIN_ENUM_MIN(
-        EIGEN_PLAIN_ENUM_MIN(evaluator<Arg1>::Alignment, evaluator<Arg2>::Alignment),
-        evaluator<Arg3>::Alignment)
+    Alignment = plain_enum_min(
+            plain_enum_min(evaluator<Arg1>::Alignment, evaluator<Arg2>::Alignment),
+            evaluator<Arg3>::Alignment)
   };
 
   EIGEN_DEVICE_FUNC explicit ternary_evaluator(const XprType& xpr) : m_d(xpr)
@@ -751,7 +753,7 @@ struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IndexBased, IndexBase
         )
      ),
     Flags = (Flags0 & ~RowMajorBit) | (LhsFlags & RowMajorBit),
-    Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<Lhs>::Alignment,evaluator<Rhs>::Alignment)
+    Alignment = plain_enum_min(evaluator<Lhs>::Alignment, evaluator<Rhs>::Alignment)
   };
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
@@ -810,11 +812,11 @@ protected:
 
 // -------------------- CwiseUnaryView --------------------
 
-template<typename UnaryOp, typename ArgType>
-struct unary_evaluator<CwiseUnaryView<UnaryOp, ArgType>, IndexBased>
-  : evaluator_base<CwiseUnaryView<UnaryOp, ArgType> >
+template<typename UnaryOp, typename ArgType, typename StrideType>
+struct unary_evaluator<CwiseUnaryView<UnaryOp, ArgType, StrideType>, IndexBased>
+  : evaluator_base<CwiseUnaryView<UnaryOp, ArgType, StrideType> >
 {
-  typedef CwiseUnaryView<UnaryOp, ArgType> XprType;
+  typedef CwiseUnaryView<UnaryOp, ArgType, StrideType> XprType;
 
   enum {
     CoeffReadCost = int(evaluator<ArgType>::CoeffReadCost) + int(functor_traits<UnaryOp>::Cost),
@@ -900,7 +902,8 @@ struct mapbase_evaluator : evaluator_base<Derived>
       m_innerStride(map.innerStride()),
       m_outerStride(map.outerStride())
   {
-    EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(evaluator<Derived>::Flags&PacketAccessBit, internal::inner_stride_at_compile_time<Derived>::ret==1),
+    EIGEN_STATIC_ASSERT(check_implication((evaluator<Derived>::Flags & PacketAccessBit) != 0,
+                                          internal::inner_stride_at_compile_time<Derived>::ret == 1),
                         PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1);
     EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
   }
@@ -1072,7 +1075,7 @@ struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
     Alignment0 = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic)
                              && (OuterStrideAtCompileTime!=0)
                              && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % int(PacketAlignment)) == 0)) ? int(PacketAlignment) : 0,
-    Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<ArgType>::Alignment, Alignment0)
+    Alignment = plain_enum_min(evaluator<ArgType>::Alignment, Alignment0)
   };
   typedef block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel> block_evaluator_type;
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
@@ -1222,8 +1225,8 @@ struct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, /* HasDirectAc
   explicit block_evaluator(const XprType& block)
     : mapbase_evaluator<XprType, typename XprType::PlainObject>(block)
   {
-    // TODO: for the 3.3 release, this should be turned to an internal assertion, but let's keep it as is for the beta lifetime
-    eigen_assert(((internal::UIntPtr(block.data()) % EIGEN_PLAIN_ENUM_MAX(1,evaluator<XprType>::Alignment)) == 0) && "data is not aligned");
+    eigen_internal_assert((internal::is_constant_evaluated() || (internal::UIntPtr(block.data()) % plain_enum_max(1,evaluator<XprType>::Alignment)) == 0) \
+      && "data is not aligned");
   }
 };
 
@@ -1239,12 +1242,12 @@ struct evaluator<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
   typedef Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> XprType;
   enum {
     CoeffReadCost = evaluator<ConditionMatrixType>::CoeffReadCost
-                  + EIGEN_PLAIN_ENUM_MAX(evaluator<ThenMatrixType>::CoeffReadCost,
-                                         evaluator<ElseMatrixType>::CoeffReadCost),
+                  + plain_enum_max(evaluator<ThenMatrixType>::CoeffReadCost,
+                                             evaluator<ElseMatrixType>::CoeffReadCost),
 
     Flags = (unsigned int)evaluator<ThenMatrixType>::Flags & evaluator<ElseMatrixType>::Flags & HereditaryBits,
 
-    Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<ThenMatrixType>::Alignment, evaluator<ElseMatrixType>::Alignment)
+    Alignment = plain_enum_min(evaluator<ThenMatrixType>::Alignment, evaluator<ElseMatrixType>::Alignment)
   };
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
@@ -1295,7 +1298,7 @@ struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor> >
     Factor = (RowFactor==Dynamic || ColFactor==Dynamic) ? Dynamic : RowFactor*ColFactor
   };
   typedef typename internal::nested_eval<ArgType,Factor>::type ArgTypeNested;
-  typedef typename internal::remove_all<ArgTypeNested>::type ArgTypeNestedCleaned;
+  typedef internal::remove_all_t<ArgTypeNested> ArgTypeNestedCleaned;
 
   enum {
     CoeffReadCost = evaluator<ArgTypeNestedCleaned>::CoeffReadCost,
@@ -1379,7 +1382,7 @@ template<typename XprType>
 struct evaluator_wrapper_base
   : evaluator_base<XprType>
 {
-  typedef typename remove_all<typename XprType::NestedExpressionType>::type ArgType;
+  typedef remove_all_t<typename XprType::NestedExpressionType> ArgType;
   enum {
     CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
     Flags = evaluator<ArgType>::Flags,
@@ -1720,14 +1723,14 @@ struct evaluator<EvalToTemp<ArgType> >
   EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr)
     : m_result(xpr.arg())
   {
-    ::new (static_cast<Base*>(this)) Base(m_result);
+    internal::construct_at<Base>(this, m_result);
   }
 
   // This constructor is used when nesting an EvalTo evaluator in another evaluator
   EIGEN_DEVICE_FUNC evaluator(const ArgType& arg)
     : m_result(arg)
   {
-    ::new (static_cast<Base*>(this)) Base(m_result);
+    internal::construct_at<Base>(this, m_result);
   }
 
 protected:
diff --git a/libs/eigen/Eigen/src/Core/CoreIterators.h b/libs/eigen/Eigen/src/Core/CoreIterators.h
index b967196..f74568a 100644
--- a/libs/eigen/Eigen/src/Core/CoreIterators.h
+++ b/libs/eigen/Eigen/src/Core/CoreIterators.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_COREITERATORS_H
 #define EIGEN_COREITERATORS_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 /* This file contains the respective InnerIterator definition of the expressions defined in Eigen/Core
diff --git a/libs/eigen/Eigen/src/Core/CwiseBinaryOp.h b/libs/eigen/Eigen/src/Core/CwiseBinaryOp.h
index 2202b1c..21a061a 100644
--- a/libs/eigen/Eigen/src/Core/CwiseBinaryOp.h
+++ b/libs/eigen/Eigen/src/Core/CwiseBinaryOp.h
@@ -11,6 +11,8 @@
 #ifndef EIGEN_CWISE_BINARY_OP_H
 #define EIGEN_CWISE_BINARY_OP_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
@@ -19,7 +21,7 @@ struct traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
 {
   // we must not inherit from traits<Lhs> since it has
   // the potential to cause problems with MSVC
-  typedef typename remove_all<Lhs>::type Ancestor;
+  typedef remove_all_t<Lhs> Ancestor;
   typedef typename traits<Ancestor>::XprKind XprKind;
   enum {
     RowsAtCompileTime = traits<Ancestor>::RowsAtCompileTime,
@@ -43,10 +45,10 @@ struct traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
                                       typename traits<Rhs>::StorageIndex>::type StorageIndex;
   typedef typename Lhs::Nested LhsNested;
   typedef typename Rhs::Nested RhsNested;
-  typedef typename remove_reference<LhsNested>::type _LhsNested;
-  typedef typename remove_reference<RhsNested>::type _RhsNested;
+  typedef std::remove_reference_t<LhsNested> LhsNested_;
+  typedef std::remove_reference_t<RhsNested> RhsNested_;
   enum {
-    Flags = cwise_promote_storage_order<typename traits<Lhs>::StorageKind,typename traits<Rhs>::StorageKind,_LhsNested::Flags & RowMajorBit,_RhsNested::Flags & RowMajorBit>::value
+    Flags = cwise_promote_storage_order<typename traits<Lhs>::StorageKind,typename traits<Rhs>::StorageKind,LhsNested_::Flags & RowMajorBit,RhsNested_::Flags & RowMajorBit>::value
   };
 };
 } // end namespace internal
@@ -84,9 +86,9 @@ class CwiseBinaryOp :
 {
   public:
 
-    typedef typename internal::remove_all<BinaryOp>::type Functor;
-    typedef typename internal::remove_all<LhsType>::type Lhs;
-    typedef typename internal::remove_all<RhsType>::type Rhs;
+    typedef internal::remove_all_t<BinaryOp> Functor;
+    typedef internal::remove_all_t<LhsType> Lhs;
+    typedef internal::remove_all_t<RhsType> Rhs;
 
     typedef typename CwiseBinaryOpImpl<
         BinaryOp, LhsType, RhsType,
@@ -95,12 +97,15 @@ class CwiseBinaryOp :
                                                       BinaryOp>::ret>::Base Base;
     EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseBinaryOp)
 
+    EIGEN_CHECK_BINARY_COMPATIBILIY(BinaryOp,typename Lhs::Scalar,typename Rhs::Scalar)
+    EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Lhs, Rhs)
+
     typedef typename internal::ref_selector<LhsType>::type LhsNested;
     typedef typename internal::ref_selector<RhsType>::type RhsNested;
-    typedef typename internal::remove_reference<LhsNested>::type _LhsNested;
-    typedef typename internal::remove_reference<RhsNested>::type _RhsNested;
+    typedef std::remove_reference_t<LhsNested> LhsNested_;
+    typedef std::remove_reference_t<RhsNested> RhsNested_;
 
-#if EIGEN_COMP_MSVC && EIGEN_HAS_CXX11
+#if EIGEN_COMP_MSVC
     //Required for Visual Studio or the Copy constructor will probably not get inlined!
     EIGEN_STRONG_INLINE
     CwiseBinaryOp(const CwiseBinaryOp<BinaryOp,LhsType,RhsType>&) = default;
@@ -110,29 +115,26 @@ class CwiseBinaryOp :
     CwiseBinaryOp(const Lhs& aLhs, const Rhs& aRhs, const BinaryOp& func = BinaryOp())
       : m_lhs(aLhs), m_rhs(aRhs), m_functor(func)
     {
-      EIGEN_CHECK_BINARY_COMPATIBILIY(BinaryOp,typename Lhs::Scalar,typename Rhs::Scalar);
-      // require the sizes to match
-      EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Lhs, Rhs)
       eigen_assert(aLhs.rows() == aRhs.rows() && aLhs.cols() == aRhs.cols());
     }
 
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
     Index rows() const EIGEN_NOEXCEPT {
       // return the fixed size type if available to enable compile time optimizations
-      return internal::traits<typename internal::remove_all<LhsNested>::type>::RowsAtCompileTime==Dynamic ? m_rhs.rows() : m_lhs.rows();
+      return internal::traits<internal::remove_all_t<LhsNested>>::RowsAtCompileTime==Dynamic ? m_rhs.rows() : m_lhs.rows();
     }
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
     Index cols() const EIGEN_NOEXCEPT {
       // return the fixed size type if available to enable compile time optimizations
-      return internal::traits<typename internal::remove_all<LhsNested>::type>::ColsAtCompileTime==Dynamic ? m_rhs.cols() : m_lhs.cols();
+      return internal::traits<internal::remove_all_t<LhsNested>>::ColsAtCompileTime==Dynamic ? m_rhs.cols() : m_lhs.cols();
     }
 
     /** \returns the left hand side nested expression */
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const _LhsNested& lhs() const { return m_lhs; }
+    const LhsNested_& lhs() const { return m_lhs; }
     /** \returns the right hand side nested expression */
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const _RhsNested& rhs() const { return m_rhs; }
+    const RhsNested_& rhs() const { return m_rhs; }
     /** \returns the functor representing the binary operation */
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     const BinaryOp& functor() const { return m_functor; }
diff --git a/libs/eigen/Eigen/src/Core/CwiseNullaryOp.h b/libs/eigen/Eigen/src/Core/CwiseNullaryOp.h
index 289ec51..b33c052 100644
--- a/libs/eigen/Eigen/src/Core/CwiseNullaryOp.h
+++ b/libs/eigen/Eigen/src/Core/CwiseNullaryOp.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_CWISE_NULLARY_OP_H
 #define EIGEN_CWISE_NULLARY_OP_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
@@ -304,6 +306,20 @@ DenseBase<Derived>::LinSpaced(const Scalar& low, const Scalar& high)
   return DenseBase<Derived>::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op<Scalar>(low,high,Derived::SizeAtCompileTime));
 }
 
+template <typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessEqualSpacedReturnType
+DenseBase<Derived>::EqualSpaced(Index size, const Scalar& low, const Scalar& step) {
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+  return DenseBase<Derived>::NullaryExpr(size, internal::equalspaced_op<Scalar>(low, step));
+}
+
+template <typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessEqualSpacedReturnType
+DenseBase<Derived>::EqualSpaced(const Scalar& low, const Scalar& step) {
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+  return DenseBase<Derived>::NullaryExpr(Derived::SizeAtCompileTime, internal::equalspaced_op<Scalar>(low, step));
+}
+
 /** \returns true if all coefficients in this matrix are approximately equal to \a val, to within precision \a prec */
 template<typename Derived>
 EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isApproxToConstant
@@ -453,6 +469,19 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(
   return setLinSpaced(size(), low, high);
 }
 
+template <typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setEqualSpaced(Index newSize, const Scalar& low,
+                                                                                  const Scalar& step) {
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+  return derived() = Derived::NullaryExpr(newSize, internal::equalspaced_op<Scalar>(low, step));
+}
+template <typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setEqualSpaced(const Scalar& low,
+                                                                                  const Scalar& step) {
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+  return setEqualSpaced(size(), low, step);
+}
+
 // zero:
 
 /** \returns an expression of a zero matrix.
diff --git a/libs/eigen/Eigen/src/Core/CwiseTernaryOp.h b/libs/eigen/Eigen/src/Core/CwiseTernaryOp.h
index 9f3576f..8d24a48 100644
--- a/libs/eigen/Eigen/src/Core/CwiseTernaryOp.h
+++ b/libs/eigen/Eigen/src/Core/CwiseTernaryOp.h
@@ -12,6 +12,8 @@
 #ifndef EIGEN_CWISE_TERNARY_OP_H
 #define EIGEN_CWISE_TERNARY_OP_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
@@ -19,7 +21,7 @@ template <typename TernaryOp, typename Arg1, typename Arg2, typename Arg3>
 struct traits<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> > {
   // we must not inherit from traits<Arg1> since it has
   // the potential to cause problems with MSVC
-  typedef typename remove_all<Arg1>::type Ancestor;
+  typedef remove_all_t<Arg1> Ancestor;
   typedef typename traits<Ancestor>::XprKind XprKind;
   enum {
     RowsAtCompileTime = traits<Ancestor>::RowsAtCompileTime,
@@ -41,10 +43,10 @@ struct traits<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> > {
   typedef typename Arg1::Nested Arg1Nested;
   typedef typename Arg2::Nested Arg2Nested;
   typedef typename Arg3::Nested Arg3Nested;
-  typedef typename remove_reference<Arg1Nested>::type _Arg1Nested;
-  typedef typename remove_reference<Arg2Nested>::type _Arg2Nested;
-  typedef typename remove_reference<Arg3Nested>::type _Arg3Nested;
-  enum { Flags = _Arg1Nested::Flags & RowMajorBit };
+  typedef std::remove_reference_t<Arg1Nested> Arg1Nested_;
+  typedef std::remove_reference_t<Arg2Nested> Arg2Nested_;
+  typedef std::remove_reference_t<Arg3Nested> Arg3Nested_;
+  enum { Flags = Arg1Nested_::Flags & RowMajorBit };
 };
 }  // end namespace internal
 
@@ -87,9 +89,23 @@ class CwiseTernaryOp : public CwiseTernaryOpImpl<
                        internal::no_assignment_operator
 {
  public:
-  typedef typename internal::remove_all<Arg1Type>::type Arg1;
-  typedef typename internal::remove_all<Arg2Type>::type Arg2;
-  typedef typename internal::remove_all<Arg3Type>::type Arg3;
+  typedef internal::remove_all_t<Arg1Type> Arg1;
+  typedef internal::remove_all_t<Arg2Type> Arg2;
+  typedef internal::remove_all_t<Arg3Type> Arg3;
+
+  // require the sizes to match
+  EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Arg1, Arg2)
+  EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Arg1, Arg3)
+
+  // The index types should match
+  EIGEN_STATIC_ASSERT((internal::is_same<
+                       typename internal::traits<Arg1Type>::StorageKind,
+                       typename internal::traits<Arg2Type>::StorageKind>::value),
+                      STORAGE_KIND_MUST_MATCH)
+  EIGEN_STATIC_ASSERT((internal::is_same<
+                       typename internal::traits<Arg1Type>::StorageKind,
+                       typename internal::traits<Arg3Type>::StorageKind>::value),
+                      STORAGE_KIND_MUST_MATCH)
 
   typedef typename CwiseTernaryOpImpl<
       TernaryOp, Arg1Type, Arg2Type, Arg3Type,
@@ -99,29 +115,15 @@ class CwiseTernaryOp : public CwiseTernaryOpImpl<
   typedef typename internal::ref_selector<Arg1Type>::type Arg1Nested;
   typedef typename internal::ref_selector<Arg2Type>::type Arg2Nested;
   typedef typename internal::ref_selector<Arg3Type>::type Arg3Nested;
-  typedef typename internal::remove_reference<Arg1Nested>::type _Arg1Nested;
-  typedef typename internal::remove_reference<Arg2Nested>::type _Arg2Nested;
-  typedef typename internal::remove_reference<Arg3Nested>::type _Arg3Nested;
+  typedef std::remove_reference_t<Arg1Nested> Arg1Nested_;
+  typedef std::remove_reference_t<Arg2Nested> Arg2Nested_;
+  typedef std::remove_reference_t<Arg3Nested> Arg3Nested_;
 
   EIGEN_DEVICE_FUNC
   EIGEN_STRONG_INLINE CwiseTernaryOp(const Arg1& a1, const Arg2& a2,
                                      const Arg3& a3,
                                      const TernaryOp& func = TernaryOp())
       : m_arg1(a1), m_arg2(a2), m_arg3(a3), m_functor(func) {
-    // require the sizes to match
-    EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Arg1, Arg2)
-    EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Arg1, Arg3)
-
-    // The index types should match
-    EIGEN_STATIC_ASSERT((internal::is_same<
-                         typename internal::traits<Arg1Type>::StorageKind,
-                         typename internal::traits<Arg2Type>::StorageKind>::value),
-                        STORAGE_KIND_MUST_MATCH)
-    EIGEN_STATIC_ASSERT((internal::is_same<
-                         typename internal::traits<Arg1Type>::StorageKind,
-                         typename internal::traits<Arg3Type>::StorageKind>::value),
-                        STORAGE_KIND_MUST_MATCH)
-
     eigen_assert(a1.rows() == a2.rows() && a1.cols() == a2.cols() &&
                  a1.rows() == a3.rows() && a1.cols() == a3.cols());
   }
@@ -130,14 +132,14 @@ class CwiseTernaryOp : public CwiseTernaryOpImpl<
   EIGEN_STRONG_INLINE Index rows() const {
     // return the fixed size type if available to enable compile time
     // optimizations
-    if (internal::traits<typename internal::remove_all<Arg1Nested>::type>::
+    if (internal::traits<internal::remove_all_t<Arg1Nested>>::
                 RowsAtCompileTime == Dynamic &&
-        internal::traits<typename internal::remove_all<Arg2Nested>::type>::
+        internal::traits<internal::remove_all_t<Arg2Nested>>::
                 RowsAtCompileTime == Dynamic)
       return m_arg3.rows();
-    else if (internal::traits<typename internal::remove_all<Arg1Nested>::type>::
+    else if (internal::traits<internal::remove_all_t<Arg1Nested>>::
                      RowsAtCompileTime == Dynamic &&
-             internal::traits<typename internal::remove_all<Arg3Nested>::type>::
+             internal::traits<internal::remove_all_t<Arg3Nested>>::
                      RowsAtCompileTime == Dynamic)
       return m_arg2.rows();
     else
@@ -147,14 +149,14 @@ class CwiseTernaryOp : public CwiseTernaryOpImpl<
   EIGEN_STRONG_INLINE Index cols() const {
     // return the fixed size type if available to enable compile time
     // optimizations
-    if (internal::traits<typename internal::remove_all<Arg1Nested>::type>::
+    if (internal::traits<internal::remove_all_t<Arg1Nested>>::
                 ColsAtCompileTime == Dynamic &&
-        internal::traits<typename internal::remove_all<Arg2Nested>::type>::
+        internal::traits<internal::remove_all_t<Arg2Nested>>::
                 ColsAtCompileTime == Dynamic)
       return m_arg3.cols();
-    else if (internal::traits<typename internal::remove_all<Arg1Nested>::type>::
+    else if (internal::traits<internal::remove_all_t<Arg1Nested>>::
                      ColsAtCompileTime == Dynamic &&
-             internal::traits<typename internal::remove_all<Arg3Nested>::type>::
+             internal::traits<internal::remove_all_t<Arg3Nested>>::
                      ColsAtCompileTime == Dynamic)
       return m_arg2.cols();
     else
@@ -163,13 +165,13 @@ class CwiseTernaryOp : public CwiseTernaryOpImpl<
 
   /** \returns the first argument nested expression */
   EIGEN_DEVICE_FUNC
-  const _Arg1Nested& arg1() const { return m_arg1; }
+  const Arg1Nested_& arg1() const { return m_arg1; }
   /** \returns the first argument nested expression */
   EIGEN_DEVICE_FUNC
-  const _Arg2Nested& arg2() const { return m_arg2; }
+  const Arg2Nested_& arg2() const { return m_arg2; }
   /** \returns the third argument nested expression */
   EIGEN_DEVICE_FUNC
-  const _Arg3Nested& arg3() const { return m_arg3; }
+  const Arg3Nested_& arg3() const { return m_arg3; }
   /** \returns the functor representing the ternary operation */
   EIGEN_DEVICE_FUNC
   const TernaryOp& functor() const { return m_functor; }
diff --git a/libs/eigen/Eigen/src/Core/CwiseUnaryOp.h b/libs/eigen/Eigen/src/Core/CwiseUnaryOp.h
index e68c4f7..ff7d0b9 100644
--- a/libs/eigen/Eigen/src/Core/CwiseUnaryOp.h
+++ b/libs/eigen/Eigen/src/Core/CwiseUnaryOp.h
@@ -11,6 +11,8 @@
 #ifndef EIGEN_CWISE_UNARY_OP_H
 #define EIGEN_CWISE_UNARY_OP_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
@@ -22,9 +24,9 @@ struct traits<CwiseUnaryOp<UnaryOp, XprType> >
                      UnaryOp(const typename XprType::Scalar&)
                    >::type Scalar;
   typedef typename XprType::Nested XprTypeNested;
-  typedef typename remove_reference<XprTypeNested>::type _XprTypeNested;
+  typedef std::remove_reference_t<XprTypeNested> XprTypeNested_;
   enum {
-    Flags = _XprTypeNested::Flags & RowMajorBit
+    Flags = XprTypeNested_::Flags & RowMajorBit
   };
 };
 }
@@ -59,7 +61,7 @@ class CwiseUnaryOp : public CwiseUnaryOpImpl<UnaryOp, XprType, typename internal
     typedef typename CwiseUnaryOpImpl<UnaryOp, XprType,typename internal::traits<XprType>::StorageKind>::Base Base;
     EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryOp)
     typedef typename internal::ref_selector<XprType>::type XprTypeNested;
-    typedef typename internal::remove_all<XprType>::type NestedExpression;
+    typedef internal::remove_all_t<XprType> NestedExpression;
 
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     explicit CwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp())
@@ -76,12 +78,12 @@ class CwiseUnaryOp : public CwiseUnaryOpImpl<UnaryOp, XprType, typename internal
 
     /** \returns the nested expression */
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const typename internal::remove_all<XprTypeNested>::type&
+    const internal::remove_all_t<XprTypeNested>&
     nestedExpression() const { return m_xpr; }
 
     /** \returns the nested expression */
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    typename internal::remove_all<XprTypeNested>::type&
+    internal::remove_all_t<XprTypeNested>&
     nestedExpression() { return m_xpr; }
 
   protected:
diff --git a/libs/eigen/Eigen/src/Core/CwiseUnaryView.h b/libs/eigen/Eigen/src/Core/CwiseUnaryView.h
index a06d762..b4539a6 100644
--- a/libs/eigen/Eigen/src/Core/CwiseUnaryView.h
+++ b/libs/eigen/Eigen/src/Core/CwiseUnaryView.h
@@ -10,35 +10,42 @@
 #ifndef EIGEN_CWISE_UNARY_VIEW_H
 #define EIGEN_CWISE_UNARY_VIEW_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
-template<typename ViewOp, typename MatrixType>
-struct traits<CwiseUnaryView<ViewOp, MatrixType> >
+template<typename ViewOp, typename MatrixType, typename StrideType>
+struct traits<CwiseUnaryView<ViewOp, MatrixType, StrideType> >
  : traits<MatrixType>
 {
   typedef typename result_of<
                      ViewOp(const typename traits<MatrixType>::Scalar&)
                    >::type Scalar;
   typedef typename MatrixType::Nested MatrixTypeNested;
-  typedef typename remove_all<MatrixTypeNested>::type _MatrixTypeNested;
+  typedef remove_all_t<MatrixTypeNested> MatrixTypeNested_;
   enum {
     FlagsLvalueBit = is_lvalue<MatrixType>::value ? LvalueBit : 0,
-    Flags = traits<_MatrixTypeNested>::Flags & (RowMajorBit | FlagsLvalueBit | DirectAccessBit), // FIXME DirectAccessBit should not be handled by expressions
+    Flags = traits<MatrixTypeNested_>::Flags & (RowMajorBit | FlagsLvalueBit | DirectAccessBit), // FIXME DirectAccessBit should not be handled by expressions
     MatrixTypeInnerStride =  inner_stride_at_compile_time<MatrixType>::ret,
     // need to cast the sizeof's from size_t to int explicitly, otherwise:
     // "error: no integral type can represent all of the enumerator values
-    InnerStrideAtCompileTime = MatrixTypeInnerStride == Dynamic
-                             ? int(Dynamic)
-                             : int(MatrixTypeInnerStride) * int(sizeof(typename traits<MatrixType>::Scalar) / sizeof(Scalar)),
-    OuterStrideAtCompileTime = outer_stride_at_compile_time<MatrixType>::ret == Dynamic
-                             ? int(Dynamic)
-                             : outer_stride_at_compile_time<MatrixType>::ret * int(sizeof(typename traits<MatrixType>::Scalar) / sizeof(Scalar))
+    InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0
+                             ? (MatrixTypeInnerStride == Dynamic
+                               ? int(Dynamic)
+                               : int(MatrixTypeInnerStride) * int(sizeof(typename traits<MatrixType>::Scalar) / sizeof(Scalar)))
+                             : int(StrideType::InnerStrideAtCompileTime),
+
+    OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0
+                             ? (outer_stride_at_compile_time<MatrixType>::ret == Dynamic
+                               ? int(Dynamic)
+                               : outer_stride_at_compile_time<MatrixType>::ret * int(sizeof(typename traits<MatrixType>::Scalar) / sizeof(Scalar)))
+                             : int(StrideType::OuterStrideAtCompileTime)
   };
 };
 }
 
-template<typename ViewOp, typename MatrixType, typename StorageKind>
+template<typename ViewOp, typename MatrixType, typename StrideType, typename StorageKind>
 class CwiseUnaryViewImpl;
 
 /** \class CwiseUnaryView
@@ -54,15 +61,15 @@ class CwiseUnaryViewImpl;
   *
   * \sa MatrixBase::unaryViewExpr(const CustomUnaryOp &) const, class CwiseUnaryOp
   */
-template<typename ViewOp, typename MatrixType>
-class CwiseUnaryView : public CwiseUnaryViewImpl<ViewOp, MatrixType, typename internal::traits<MatrixType>::StorageKind>
+template<typename ViewOp, typename MatrixType, typename StrideType>
+class CwiseUnaryView : public CwiseUnaryViewImpl<ViewOp, MatrixType, StrideType, typename internal::traits<MatrixType>::StorageKind>
 {
   public:
 
-    typedef typename CwiseUnaryViewImpl<ViewOp, MatrixType,typename internal::traits<MatrixType>::StorageKind>::Base Base;
+    typedef typename CwiseUnaryViewImpl<ViewOp, MatrixType, StrideType, typename internal::traits<MatrixType>::StorageKind>::Base Base;
     EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryView)
     typedef typename internal::ref_selector<MatrixType>::non_const_type MatrixTypeNested;
-    typedef typename internal::remove_all<MatrixType>::type NestedExpression;
+    typedef internal::remove_all_t<MatrixType> NestedExpression;
 
     explicit EIGEN_DEVICE_FUNC inline CwiseUnaryView(MatrixType& mat, const ViewOp& func = ViewOp())
       : m_matrix(mat), m_functor(func) {}
@@ -78,11 +85,11 @@ class CwiseUnaryView : public CwiseUnaryViewImpl<ViewOp, MatrixType, typename in
     EIGEN_DEVICE_FUNC const ViewOp& functor() const { return m_functor; }
 
     /** \returns the nested expression */
-    EIGEN_DEVICE_FUNC const typename internal::remove_all<MatrixTypeNested>::type&
+    EIGEN_DEVICE_FUNC const internal::remove_all_t<MatrixTypeNested>&
     nestedExpression() const { return m_matrix; }
 
     /** \returns the nested expression */
-    EIGEN_DEVICE_FUNC typename internal::remove_reference<MatrixTypeNested>::type&
+    EIGEN_DEVICE_FUNC std::remove_reference_t<MatrixTypeNested>&
     nestedExpression() { return m_matrix; }
 
   protected:
@@ -91,22 +98,22 @@ class CwiseUnaryView : public CwiseUnaryViewImpl<ViewOp, MatrixType, typename in
 };
 
 // Generic API dispatcher
-template<typename ViewOp, typename XprType, typename StorageKind>
+template<typename ViewOp, typename XprType, typename StrideType, typename StorageKind>
 class CwiseUnaryViewImpl
-  : public internal::generic_xpr_base<CwiseUnaryView<ViewOp, XprType> >::type
+  : public internal::generic_xpr_base<CwiseUnaryView<ViewOp, XprType, StrideType> >::type
 {
 public:
-  typedef typename internal::generic_xpr_base<CwiseUnaryView<ViewOp, XprType> >::type Base;
+  typedef typename internal::generic_xpr_base<CwiseUnaryView<ViewOp, XprType, StrideType> >::type Base;
 };
 
-template<typename ViewOp, typename MatrixType>
-class CwiseUnaryViewImpl<ViewOp,MatrixType,Dense>
-  : public internal::dense_xpr_base< CwiseUnaryView<ViewOp, MatrixType> >::type
+template<typename ViewOp, typename MatrixType, typename StrideType>
+class CwiseUnaryViewImpl<ViewOp,MatrixType,StrideType,Dense>
+  : public internal::dense_xpr_base< CwiseUnaryView<ViewOp, MatrixType, StrideType> >::type
 {
   public:
 
-    typedef CwiseUnaryView<ViewOp, MatrixType> Derived;
-    typedef typename internal::dense_xpr_base< CwiseUnaryView<ViewOp, MatrixType> >::type Base;
+    typedef CwiseUnaryView<ViewOp, MatrixType,StrideType> Derived;
+    typedef typename internal::dense_xpr_base< CwiseUnaryView<ViewOp, MatrixType,StrideType> >::type Base;
 
     EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
     EIGEN_INHERIT_ASSIGNMENT_OPERATORS(CwiseUnaryViewImpl)
@@ -116,12 +123,16 @@ class CwiseUnaryViewImpl<ViewOp,MatrixType,Dense>
 
     EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index innerStride() const
     {
-      return derived().nestedExpression().innerStride() * sizeof(typename internal::traits<MatrixType>::Scalar) / sizeof(Scalar);
+      return StrideType::InnerStrideAtCompileTime != 0
+             ? int(StrideType::InnerStrideAtCompileTime)
+             : derived().nestedExpression().innerStride() * sizeof(typename internal::traits<MatrixType>::Scalar) / sizeof(Scalar);
     }
 
     EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index outerStride() const
     {
-      return derived().nestedExpression().outerStride() * sizeof(typename internal::traits<MatrixType>::Scalar) / sizeof(Scalar);
+      return StrideType::OuterStrideAtCompileTime != 0
+             ? int(StrideType::OuterStrideAtCompileTime)
+             : derived().nestedExpression().outerStride() * sizeof(typename internal::traits<MatrixType>::Scalar) / sizeof(Scalar);
     }
   protected:
     EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(CwiseUnaryViewImpl)
diff --git a/libs/eigen/Eigen/src/Core/DenseBase.h b/libs/eigen/Eigen/src/Core/DenseBase.h
index 9b16db6..bcfd0f6 100644
--- a/libs/eigen/Eigen/src/Core/DenseBase.h
+++ b/libs/eigen/Eigen/src/Core/DenseBase.h
@@ -11,17 +11,12 @@
 #ifndef EIGEN_DENSEBASE_H
 #define EIGEN_DENSEBASE_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
-namespace internal {
-
 // The index type defined by EIGEN_DEFAULT_DENSE_INDEX_TYPE must be a signed type.
-// This dummy function simply aims at checking that at compile time.
-static inline void check_DenseIndex_is_signed() {
-  EIGEN_STATIC_ASSERT(NumTraits<DenseIndex>::IsSigned,THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE)
-}
-
-} // end namespace internal
+EIGEN_STATIC_ASSERT(NumTraits<DenseIndex>::IsSigned,THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE)
 
 /** \class DenseBase
   * \ingroup Core_Module
@@ -110,8 +105,7 @@ template<typename Derived> class DenseBase
           * \sa MatrixBase::rows(), MatrixBase::cols(), RowsAtCompileTime, SizeAtCompileTime */
 
 
-      SizeAtCompileTime = (internal::size_at_compile_time<internal::traits<Derived>::RowsAtCompileTime,
-                                                   internal::traits<Derived>::ColsAtCompileTime>::ret),
+      SizeAtCompileTime = (internal::size_of_xpr_at_compile_time<Derived>::ret),
         /**< This is equal to the number of coefficients, i.e. the number of
           * rows times the number of columns, or to \a Dynamic if this is not
           * known at compile-time. \sa RowsAtCompileTime, ColsAtCompileTime */
@@ -138,8 +132,8 @@ template<typename Derived> class DenseBase
           * \sa ColsAtCompileTime, MaxRowsAtCompileTime, MaxSizeAtCompileTime
           */
 
-      MaxSizeAtCompileTime = (internal::size_at_compile_time<internal::traits<Derived>::MaxRowsAtCompileTime,
-                                                      internal::traits<Derived>::MaxColsAtCompileTime>::ret),
+      MaxSizeAtCompileTime = internal::size_at_compile_time(internal::traits<Derived>::MaxRowsAtCompileTime,
+                                                            internal::traits<Derived>::MaxColsAtCompileTime),
         /**< This value is equal to the maximum possible number of coefficients that this expression
           * might have. If this expression might have an arbitrarily high number of coefficients,
           * this value is set to \a Dynamic.
@@ -206,13 +200,8 @@ template<typename Derived> class DenseBase
       * the return type of eval() is a const reference to a matrix, not a matrix! It is however guaranteed
       * that the return type of eval() is either PlainObject or const PlainObject&.
       */
-    typedef typename internal::conditional<internal::is_same<typename internal::traits<Derived>::XprKind,MatrixXpr >::value,
-                                 PlainMatrix, PlainArray>::type PlainObject;
-
-    /** \returns the number of nonzero coefficients which is in practice the number
-      * of stored coefficients. */
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
-    inline Index nonZeros() const { return size(); }
+    typedef std::conditional_t<internal::is_same<typename internal::traits<Derived>::XprKind,MatrixXpr >::value,
+                                 PlainMatrix, PlainArray> PlainObject;
 
     /** \returns the outer size.
       *
@@ -269,6 +258,8 @@ template<typename Derived> class DenseBase
     EIGEN_DEPRECATED typedef CwiseNullaryOp<internal::linspaced_op<Scalar>,PlainObject> SequentialLinSpacedReturnType;
     /** \internal Represents a vector with linearly spaced coefficients that allows random access. */
     typedef CwiseNullaryOp<internal::linspaced_op<Scalar>,PlainObject> RandomAccessLinSpacedReturnType;
+    /** \internal Represents a vector with equally spaced coefficients that allows random access. */
+    typedef CwiseNullaryOp<internal::equalspaced_op<Scalar>, PlainObject> RandomAccessEqualSpacedReturnType;
     /** \internal the return type of MatrixBase::eigenvalues() */
     typedef Matrix<typename NumTraits<typename internal::traits<Derived>::Scalar>::Real, internal::traits<Derived>::ColsAtCompileTime, 1> EigenvaluesReturnType;
 
@@ -324,9 +315,9 @@ template<typename Derived> class DenseBase
     typedef Transpose<Derived> TransposeReturnType;
     EIGEN_DEVICE_FUNC
     TransposeReturnType transpose();
-    typedef typename internal::add_const<Transpose<const Derived> >::type ConstTransposeReturnType;
+    typedef Transpose<const Derived> ConstTransposeReturnType;
     EIGEN_DEVICE_FUNC
-    ConstTransposeReturnType transpose() const;
+    const ConstTransposeReturnType transpose() const;
     EIGEN_DEVICE_FUNC
     void transposeInPlace();
 
@@ -347,6 +338,11 @@ template<typename Derived> class DenseBase
     EIGEN_DEVICE_FUNC static const RandomAccessLinSpacedReturnType
     LinSpaced(const Scalar& low, const Scalar& high);
 
+    EIGEN_DEVICE_FUNC static const RandomAccessEqualSpacedReturnType
+    EqualSpaced(Index size, const Scalar& low, const Scalar& step);
+    EIGEN_DEVICE_FUNC static const RandomAccessEqualSpacedReturnType
+    EqualSpaced(const Scalar& low, const Scalar& step);
+
     template<typename CustomNullaryOp> EIGEN_DEVICE_FUNC
     static const CwiseNullaryOp<CustomNullaryOp, PlainObject>
     NullaryExpr(Index rows, Index cols, const CustomNullaryOp& func);
@@ -368,6 +364,8 @@ template<typename Derived> class DenseBase
     EIGEN_DEVICE_FUNC Derived& setConstant(const Scalar& value);
     EIGEN_DEVICE_FUNC Derived& setLinSpaced(Index size, const Scalar& low, const Scalar& high);
     EIGEN_DEVICE_FUNC Derived& setLinSpaced(const Scalar& low, const Scalar& high);
+    EIGEN_DEVICE_FUNC Derived& setEqualSpaced(Index size, const Scalar& low, const Scalar& step);
+    EIGEN_DEVICE_FUNC Derived& setEqualSpaced(const Scalar& low, const Scalar& step);
     EIGEN_DEVICE_FUNC Derived& setZero();
     EIGEN_DEVICE_FUNC Derived& setOnes();
     EIGEN_DEVICE_FUNC Derived& setRandom();
@@ -387,15 +385,15 @@ template<typename Derived> class DenseBase
     EIGEN_DEVICE_FUNC bool isZero(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
     EIGEN_DEVICE_FUNC bool isOnes(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
 
-    inline bool hasNaN() const;
-    inline bool allFinite() const;
+    EIGEN_DEVICE_FUNC inline bool hasNaN() const;
+    EIGEN_DEVICE_FUNC inline bool allFinite() const;
 
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     Derived& operator*=(const Scalar& other);
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     Derived& operator/=(const Scalar& other);
 
-    typedef typename internal::add_const_on_value_type<typename internal::eval<Derived>::type>::type EvalReturnType;
+    typedef internal::add_const_on_value_type_t<typename internal::eval<Derived>::type> EvalReturnType;
     /** \returns the matrix or vector obtained by evaluating this expression.
       *
       * Notice that in the case of a plain matrix or vector (not an expression) this function just returns
@@ -439,9 +437,9 @@ template<typename Derived> class DenseBase
     EIGEN_DEVICE_FUNC inline const ForceAlignedAccess<Derived> forceAlignedAccess() const;
     EIGEN_DEVICE_FUNC inline ForceAlignedAccess<Derived> forceAlignedAccess();
     template<bool Enable> EIGEN_DEVICE_FUNC
-    inline const typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type forceAlignedAccessIf() const;
+    inline const std::conditional_t<Enable,ForceAlignedAccess<Derived>,Derived&> forceAlignedAccessIf() const;
     template<bool Enable> EIGEN_DEVICE_FUNC
-    inline typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type forceAlignedAccessIf();
+    inline std::conditional_t<Enable,ForceAlignedAccess<Derived>,Derived&> forceAlignedAccessIf();
 
     EIGEN_DEVICE_FUNC Scalar sum() const;
     EIGEN_DEVICE_FUNC Scalar mean() const;
@@ -621,27 +619,21 @@ template<typename Derived> class DenseBase
     /** This is the const version of iterator (aka read-only) */
     typedef random_access_iterator_type const_iterator;
     #else
-    typedef typename internal::conditional< (Flags&DirectAccessBit)==DirectAccessBit,
-                                            internal::pointer_based_stl_iterator<Derived>,
-                                            internal::generic_randaccess_stl_iterator<Derived>
-                                          >::type iterator_type;
+    typedef std::conditional_t< (Flags&DirectAccessBit)==DirectAccessBit,
+                                     internal::pointer_based_stl_iterator<Derived>,
+                                     internal::generic_randaccess_stl_iterator<Derived>
+                                   > iterator_type;
 
-    typedef typename internal::conditional< (Flags&DirectAccessBit)==DirectAccessBit,
-                                            internal::pointer_based_stl_iterator<const Derived>,
-                                            internal::generic_randaccess_stl_iterator<const Derived>
-                                          >::type const_iterator_type;
+    typedef std::conditional_t< (Flags&DirectAccessBit)==DirectAccessBit,
+                                     internal::pointer_based_stl_iterator<const Derived>,
+                                     internal::generic_randaccess_stl_iterator<const Derived>
+                                   > const_iterator_type;
 
     // Stl-style iterators are supported only for vectors.
 
-    typedef typename internal::conditional< IsVectorAtCompileTime,
-                                            iterator_type,
-                                            void
-                                          >::type iterator;
+    typedef std::conditional_t<IsVectorAtCompileTime, iterator_type, void> iterator;
 
-    typedef typename internal::conditional< IsVectorAtCompileTime,
-                                            const_iterator_type,
-                                            void
-                                          >::type const_iterator;
+    typedef std::conditional_t<IsVectorAtCompileTime, const_iterator_type, void> const_iterator;
     #endif
 
     inline iterator begin();
@@ -678,14 +670,13 @@ template<typename Derived> class DenseBase
   protected:
     EIGEN_DEFAULT_COPY_CONSTRUCTOR(DenseBase)
     /** Default constructor. Do nothing. */
-    EIGEN_DEVICE_FUNC DenseBase()
-    {
+    EIGEN_DEVICE_FUNC constexpr DenseBase() {
       /* Just checks for self-consistency of the flags.
        * Only do it when debugging Eigen, as this borders on paranoia and could slow compilation down
        */
 #ifdef EIGEN_INTERNAL_DEBUGGING
-      EIGEN_STATIC_ASSERT((EIGEN_IMPLIES(MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1, int(IsRowMajor))
-                        && EIGEN_IMPLIES(MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1, int(!IsRowMajor))),
+      EIGEN_STATIC_ASSERT((internal::check_implication(MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1, int(IsRowMajor))
+                        && internal::check_implication(MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1, int(!IsRowMajor))),
                           INVALID_STORAGE_ORDER_FOR_THIS_VECTOR_EXPRESSION)
 #endif
     }
diff --git a/libs/eigen/Eigen/src/Core/DenseCoeffsBase.h b/libs/eigen/Eigen/src/Core/DenseCoeffsBase.h
index 37fcdb5..7f0bcf4 100644
--- a/libs/eigen/Eigen/src/Core/DenseCoeffsBase.h
+++ b/libs/eigen/Eigen/src/Core/DenseCoeffsBase.h
@@ -10,12 +10,14 @@
 #ifndef EIGEN_DENSECOEFFSBASE_H
 #define EIGEN_DENSECOEFFSBASE_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
 template<typename T> struct add_const_on_value_type_if_arithmetic
 {
-  typedef typename conditional<is_arithmetic<T>::value, T, typename add_const_on_value_type<T>::type>::type type;
+  typedef std::conditional_t<is_arithmetic<T>::value, T, add_const_on_value_type_t<T>> type;
 };
 }
 
@@ -43,13 +45,13 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
     // - This is the return type of the coeff() method.
     // - The LvalueBit means exactly that we can offer a coeffRef() method, which means exactly that we can get references
     // to coeffs, which means exactly that we can have coeff() return a const reference (as opposed to returning a value).
-    // - The is_artihmetic check is required since "const int", "const double", etc. will cause warnings on some systems
+    // - The is_arithmetic check is required since "const int", "const double", etc. will cause warnings on some systems
     // while the declaration of "const T", where T is a non arithmetic type does not. Always returning "const Scalar&" is
     // not possible, since the underlying expressions might not offer a valid address the reference could be referring to.
-    typedef typename internal::conditional<bool(internal::traits<Derived>::Flags&LvalueBit),
-                         const Scalar&,
-                         typename internal::conditional<internal::is_arithmetic<Scalar>::value, Scalar, const Scalar>::type
-                     >::type CoeffReturnType;
+    typedef std::conditional_t<bool(internal::traits<Derived>::Flags&LvalueBit),
+                const Scalar&,
+                std::conditional_t<internal::is_arithmetic<Scalar>::value, Scalar, const Scalar>
+            > CoeffReturnType;
 
     typedef typename internal::add_const_on_value_type_if_arithmetic<
                          typename internal::packet_traits<Scalar>::type
diff --git a/libs/eigen/Eigen/src/Core/DenseStorage.h b/libs/eigen/Eigen/src/Core/DenseStorage.h
index 08ef6c5..cf588bd 100644
--- a/libs/eigen/Eigen/src/Core/DenseStorage.h
+++ b/libs/eigen/Eigen/src/Core/DenseStorage.h
@@ -18,20 +18,20 @@
   #define EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(X)
 #endif
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
 
 struct constructor_without_unaligned_array_assert {};
 
-template<typename T, int Size>
-EIGEN_DEVICE_FUNC
-void check_static_allocation_size()
-{
-  // if EIGEN_STACK_ALLOCATION_LIMIT is defined to 0, then no limit
-  #if EIGEN_STACK_ALLOCATION_LIMIT
+template <typename T, int Size>
+EIGEN_DEVICE_FUNC constexpr void check_static_allocation_size() {
+// if EIGEN_STACK_ALLOCATION_LIMIT is defined to 0, then no limit
+#if EIGEN_STACK_ALLOCATION_LIMIT
   EIGEN_STATIC_ASSERT(Size * sizeof(T) <= EIGEN_STACK_ALLOCATION_LIMIT, OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG);
-  #endif
+#endif
 }
 
 /** \internal
@@ -45,35 +45,30 @@ struct plain_array
 {
   T array[Size];
 
-  EIGEN_DEVICE_FUNC
-  plain_array()
-  {
-    check_static_allocation_size<T,Size>();
-  }
+  EIGEN_DEVICE_FUNC constexpr plain_array() { check_static_allocation_size<T, Size>(); }
 
-  EIGEN_DEVICE_FUNC
-  plain_array(constructor_without_unaligned_array_assert)
-  {
-    check_static_allocation_size<T,Size>();
+  EIGEN_DEVICE_FUNC constexpr plain_array(constructor_without_unaligned_array_assert) {
+    check_static_allocation_size<T, Size>();
   }
 };
 
 #if defined(EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT)
   #define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask)
-#elif EIGEN_GNUC_AT_LEAST(4,7)
+#elif EIGEN_COMP_GNUC
   // GCC 4.7 is too aggressive in its optimizations and remove the alignment test based on the fact the array is declared to be aligned.
   // See this bug report: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=53900
   // Hiding the origin of the array pointer behind a function argument seems to do the trick even if the function is inlined:
   template<typename PtrType>
   EIGEN_ALWAYS_INLINE PtrType eigen_unaligned_array_assert_workaround_gcc47(PtrType array) { return array; }
   #define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) \
-    eigen_assert((internal::UIntPtr(eigen_unaligned_array_assert_workaround_gcc47(array)) & (sizemask)) == 0 \
+    eigen_assert((internal::is_constant_evaluated() \
+                || (internal::UIntPtr(eigen_unaligned_array_assert_workaround_gcc47(array)) & (sizemask)) == 0) \
               && "this assertion is explained here: " \
               "http://eigen.tuxfamily.org/dox-devel/group__TopicUnalignedArrayAssert.html" \
               " **** READ THIS WEB PAGE !!! ****");
 #else
   #define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) \
-    eigen_assert((internal::UIntPtr(array) & (sizemask)) == 0 \
+    eigen_assert((internal::is_constant_evaluated() || (internal::UIntPtr(array) & (sizemask)) == 0) \
               && "this assertion is explained here: " \
               "http://eigen.tuxfamily.org/dox-devel/group__TopicUnalignedArrayAssert.html" \
               " **** READ THIS WEB PAGE !!! ****");
@@ -84,17 +79,13 @@ struct plain_array<T, Size, MatrixOrArrayOptions, 8>
 {
   EIGEN_ALIGN_TO_BOUNDARY(8) T array[Size];
 
-  EIGEN_DEVICE_FUNC
-  plain_array()
-  {
+  EIGEN_DEVICE_FUNC constexpr plain_array() {
     EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(7);
     check_static_allocation_size<T,Size>();
   }
 
-  EIGEN_DEVICE_FUNC
-  plain_array(constructor_without_unaligned_array_assert)
-  {
-    check_static_allocation_size<T,Size>();
+  EIGEN_DEVICE_FUNC constexpr plain_array(constructor_without_unaligned_array_assert) {
+    check_static_allocation_size<T, Size>();
   }
 };
 
@@ -103,17 +94,13 @@ struct plain_array<T, Size, MatrixOrArrayOptions, 16>
 {
   EIGEN_ALIGN_TO_BOUNDARY(16) T array[Size];
 
-  EIGEN_DEVICE_FUNC
-  plain_array()
-  {
+  EIGEN_DEVICE_FUNC constexpr plain_array() {
     EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(15);
     check_static_allocation_size<T,Size>();
   }
 
-  EIGEN_DEVICE_FUNC
-  plain_array(constructor_without_unaligned_array_assert)
-  {
-    check_static_allocation_size<T,Size>();
+  EIGEN_DEVICE_FUNC constexpr plain_array(constructor_without_unaligned_array_assert) {
+    check_static_allocation_size<T, Size>();
   }
 };
 
@@ -122,17 +109,13 @@ struct plain_array<T, Size, MatrixOrArrayOptions, 32>
 {
   EIGEN_ALIGN_TO_BOUNDARY(32) T array[Size];
 
-  EIGEN_DEVICE_FUNC
-  plain_array()
-  {
+  EIGEN_DEVICE_FUNC constexpr plain_array() {
     EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(31);
     check_static_allocation_size<T,Size>();
   }
 
-  EIGEN_DEVICE_FUNC
-  plain_array(constructor_without_unaligned_array_assert)
-  {
-    check_static_allocation_size<T,Size>();
+  EIGEN_DEVICE_FUNC constexpr plain_array(constructor_without_unaligned_array_assert) {
+    check_static_allocation_size<T, Size>();
   }
 };
 
@@ -141,17 +124,13 @@ struct plain_array<T, Size, MatrixOrArrayOptions, 64>
 {
   EIGEN_ALIGN_TO_BOUNDARY(64) T array[Size];
 
-  EIGEN_DEVICE_FUNC
-  plain_array()
-  {
+  EIGEN_DEVICE_FUNC constexpr plain_array() {
     EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(63);
     check_static_allocation_size<T,Size>();
   }
 
-  EIGEN_DEVICE_FUNC
-  plain_array(constructor_without_unaligned_array_assert)
-  {
-    check_static_allocation_size<T,Size>();
+  EIGEN_DEVICE_FUNC constexpr plain_array(constructor_without_unaligned_array_assert) {
+    check_static_allocation_size<T, Size>();
   }
 };
 
@@ -159,8 +138,8 @@ template <typename T, int MatrixOrArrayOptions, int Alignment>
 struct plain_array<T, 0, MatrixOrArrayOptions, Alignment>
 {
   T array[1];
-  EIGEN_DEVICE_FUNC plain_array() {}
-  EIGEN_DEVICE_FUNC plain_array(constructor_without_unaligned_array_assert) {}
+  EIGEN_DEVICE_FUNC constexpr plain_array() {}
+  EIGEN_DEVICE_FUNC constexpr plain_array(constructor_without_unaligned_array_assert) {}
 };
 
 struct plain_array_helper {
@@ -201,57 +180,32 @@ struct plain_array_helper {
   *
   * \sa Matrix
   */
-template<typename T, int Size, int _Rows, int _Cols, int _Options> class DenseStorage;
+template<typename T, int Size, int Rows_, int Cols_, int Options_> class DenseStorage;
 
 // purely fixed-size matrix
-template<typename T, int Size, int _Rows, int _Cols, int _Options> class DenseStorage
+template<typename T, int Size, int Rows_, int Cols_, int Options_> class DenseStorage
 {
-    internal::plain_array<T,Size,_Options> m_data;
+    internal::plain_array<T,Size,Options_> m_data;
   public:
-    EIGEN_DEVICE_FUNC DenseStorage() {
+    constexpr EIGEN_DEVICE_FUNC DenseStorage() {
       EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = Size)
     }
-    EIGEN_DEVICE_FUNC
-    explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
+    EIGEN_DEVICE_FUNC explicit constexpr DenseStorage(internal::constructor_without_unaligned_array_assert)
       : m_data(internal::constructor_without_unaligned_array_assert()) {}
-#if !EIGEN_HAS_CXX11 || defined(EIGEN_DENSE_STORAGE_CTOR_PLUGIN)
-    EIGEN_DEVICE_FUNC
+#if defined(EIGEN_DENSE_STORAGE_CTOR_PLUGIN)
+    EIGEN_DEVICE_FUNC constexpr
     DenseStorage(const DenseStorage& other) : m_data(other.m_data) {
       EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = Size)
     }
 #else
-    EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage&) = default;
+    EIGEN_DEVICE_FUNC constexpr DenseStorage(const DenseStorage&) = default;
 #endif
-#if !EIGEN_HAS_CXX11
-    EIGEN_DEVICE_FUNC
-    DenseStorage& operator=(const DenseStorage& other)
-    {
-      if (this != &other) m_data = other.m_data;
-      return *this;
-    }
-#else
-    EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage&) = default;
-#endif
-#if EIGEN_HAS_RVALUE_REFERENCES
-#if !EIGEN_HAS_CXX11
-    EIGEN_DEVICE_FUNC DenseStorage(DenseStorage&& other) EIGEN_NOEXCEPT
-      : m_data(std::move(other.m_data))
-    {
-    }
-    EIGEN_DEVICE_FUNC DenseStorage& operator=(DenseStorage&& other) EIGEN_NOEXCEPT
-    {
-      if (this != &other)
-        m_data = std::move(other.m_data);
-      return *this;
-    }
-#else
-    EIGEN_DEVICE_FUNC DenseStorage(DenseStorage&&) = default;
-    EIGEN_DEVICE_FUNC DenseStorage& operator=(DenseStorage&&) = default;
-#endif
-#endif
-    EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) {
+    EIGEN_DEVICE_FUNC constexpr DenseStorage& operator=(const DenseStorage&) = default;
+    EIGEN_DEVICE_FUNC constexpr DenseStorage(DenseStorage&&) = default;
+    EIGEN_DEVICE_FUNC constexpr DenseStorage& operator=(DenseStorage&&) = default;
+    EIGEN_DEVICE_FUNC constexpr DenseStorage(Index size, Index rows, Index cols) {
       EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
-      eigen_internal_assert(size==rows*cols && rows==_Rows && cols==_Cols);
+      eigen_internal_assert(size == rows * cols && rows == Rows_ && cols == Cols_);
       EIGEN_UNUSED_VARIABLE(size);
       EIGEN_UNUSED_VARIABLE(rows);
       EIGEN_UNUSED_VARIABLE(cols);
@@ -259,57 +213,148 @@ template<typename T, int Size, int _Rows, int _Cols, int _Options> class DenseSt
     EIGEN_DEVICE_FUNC void swap(DenseStorage& other) {
       numext::swap(m_data, other.m_data);
     }
-    EIGEN_DEVICE_FUNC static EIGEN_CONSTEXPR Index rows(void) EIGEN_NOEXCEPT {return _Rows;}
-    EIGEN_DEVICE_FUNC static EIGEN_CONSTEXPR Index cols(void) EIGEN_NOEXCEPT {return _Cols;}
-    EIGEN_DEVICE_FUNC void conservativeResize(Index,Index,Index) {}
-    EIGEN_DEVICE_FUNC void resize(Index,Index,Index) {}
-    EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
-    EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
+    EIGEN_DEVICE_FUNC static constexpr Index rows(void) EIGEN_NOEXCEPT { return Rows_; }
+    EIGEN_DEVICE_FUNC static constexpr Index cols(void) EIGEN_NOEXCEPT { return Cols_; }
+    EIGEN_DEVICE_FUNC constexpr void conservativeResize(Index, Index, Index) {}
+    EIGEN_DEVICE_FUNC constexpr void resize(Index, Index, Index) {}
+    EIGEN_DEVICE_FUNC constexpr const T* data() const { return m_data.array; }
+    EIGEN_DEVICE_FUNC constexpr T* data() { return m_data.array; }
 };
 
 // null matrix
-template<typename T, int _Rows, int _Cols, int _Options> class DenseStorage<T, 0, _Rows, _Cols, _Options>
+template<typename T, int Rows_, int Cols_, int Options_>
+class DenseStorage<T, 0, Rows_, Cols_, Options_>
 {
   public:
-    EIGEN_DEVICE_FUNC DenseStorage() {}
-    EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert) {}
-    EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage&) {}
-    EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage&) { return *this; }
-    EIGEN_DEVICE_FUNC DenseStorage(Index,Index,Index) {}
-    EIGEN_DEVICE_FUNC void swap(DenseStorage& ) {}
-    EIGEN_DEVICE_FUNC static EIGEN_CONSTEXPR Index rows(void) EIGEN_NOEXCEPT {return _Rows;}
-    EIGEN_DEVICE_FUNC static EIGEN_CONSTEXPR Index cols(void) EIGEN_NOEXCEPT {return _Cols;}
-    EIGEN_DEVICE_FUNC void conservativeResize(Index,Index,Index) {}
-    EIGEN_DEVICE_FUNC void resize(Index,Index,Index) {}
-    EIGEN_DEVICE_FUNC const T *data() const { return 0; }
-    EIGEN_DEVICE_FUNC T *data() { return 0; }
+    static_assert(Rows_ * Cols_ == 0, "The fixed number of rows times columns must equal the storage size.");
+    EIGEN_DEVICE_FUNC constexpr DenseStorage() {}
+    EIGEN_DEVICE_FUNC explicit constexpr DenseStorage(internal::constructor_without_unaligned_array_assert) {}
+    EIGEN_DEVICE_FUNC constexpr DenseStorage(const DenseStorage&) {}
+    EIGEN_DEVICE_FUNC constexpr DenseStorage& operator=(const DenseStorage&) { return *this; }
+    EIGEN_DEVICE_FUNC constexpr DenseStorage(Index,Index,Index) {}
+    EIGEN_DEVICE_FUNC constexpr void swap(DenseStorage& ) {}
+    EIGEN_DEVICE_FUNC static constexpr Index rows(void) EIGEN_NOEXCEPT {return Rows_;}
+    EIGEN_DEVICE_FUNC static constexpr Index cols(void) EIGEN_NOEXCEPT {return Cols_;}
+    EIGEN_DEVICE_FUNC constexpr void conservativeResize(Index,Index,Index) {}
+    EIGEN_DEVICE_FUNC constexpr void resize(Index,Index,Index) {}
+    EIGEN_DEVICE_FUNC constexpr const T *data() const { return 0; }
+    EIGEN_DEVICE_FUNC constexpr T *data() { return 0; }
 };
 
 // more specializations for null matrices; these are necessary to resolve ambiguities
-template<typename T, int _Options> class DenseStorage<T, 0, Dynamic, Dynamic, _Options>
-: public DenseStorage<T, 0, 0, 0, _Options> { };
-
-template<typename T, int _Rows, int _Options> class DenseStorage<T, 0, _Rows, Dynamic, _Options>
-: public DenseStorage<T, 0, 0, 0, _Options> { };
-
-template<typename T, int _Cols, int _Options> class DenseStorage<T, 0, Dynamic, _Cols, _Options>
-: public DenseStorage<T, 0, 0, 0, _Options> { };
-
-// dynamic-size matrix with fixed-size storage
-template<typename T, int Size, int _Options> class DenseStorage<T, Size, Dynamic, Dynamic, _Options>
-{
-    internal::plain_array<T,Size,_Options> m_data;
+template<typename T, int Options_>
+class DenseStorage<T, 0, Dynamic, Dynamic, Options_> {
     Index m_rows;
     Index m_cols;
   public:
     EIGEN_DEVICE_FUNC DenseStorage() : m_rows(0), m_cols(0) {}
-    EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
-      : m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0), m_cols(0) {}
-    EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other)
-      : m_data(internal::constructor_without_unaligned_array_assert()), m_rows(other.m_rows), m_cols(other.m_cols)
-    {
-      internal::plain_array_helper::copy(other.m_data, m_rows * m_cols, m_data);
+    EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : DenseStorage() {}
+    EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_rows(other.m_rows), m_cols(other.m_cols) {}
+    EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) {
+      m_rows = other.m_rows;
+      m_cols = other.m_cols;
+      return *this;
     }
+    EIGEN_DEVICE_FUNC DenseStorage(Index, Index rows, Index cols) : m_rows(rows), m_cols(cols) {
+      eigen_assert(m_rows * m_cols == 0 && "The number of rows times columns must equal the storage size.");
+    }
+    EIGEN_DEVICE_FUNC void swap(DenseStorage& other) {
+      numext::swap(m_rows,other.m_rows);
+      numext::swap(m_cols,other.m_cols);
+    }
+    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index rows() const EIGEN_NOEXCEPT {return m_rows;}
+    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index cols() const EIGEN_NOEXCEPT {return m_cols;}
+    EIGEN_DEVICE_FUNC void conservativeResize(Index, Index rows, Index cols) {
+      m_rows = rows;
+      m_cols = cols;
+      eigen_assert(m_rows * m_cols == 0 && "The number of rows times columns must equal the storage size.");
+    }
+    EIGEN_DEVICE_FUNC void resize(Index, Index rows, Index cols) {
+      m_rows = rows;
+      m_cols = cols;
+      eigen_assert(m_rows * m_cols == 0 && "The number of rows times columns must equal the storage size.");
+    }
+    EIGEN_DEVICE_FUNC const T *data() const { return nullptr; }
+    EIGEN_DEVICE_FUNC T *data() { return nullptr; }
+};
+
+template<typename T, int Rows_, int Options_>
+class DenseStorage<T, 0, Rows_, Dynamic, Options_> {
+    Index m_cols;
+  public:
+    EIGEN_DEVICE_FUNC DenseStorage() : m_cols(0) {}
+    EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : DenseStorage() {}
+    EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_cols(other.m_cols) {}
+    EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) {
+      m_cols = other.m_cols;
+      return *this;
+    }
+    EIGEN_DEVICE_FUNC DenseStorage(Index, Index, Index cols) : m_cols(cols) {
+      eigen_assert(Rows_ * m_cols == 0 && "The number of rows times columns must equal the storage size.");
+    }
+    EIGEN_DEVICE_FUNC void swap(DenseStorage& other) {
+      numext::swap(m_cols, other.m_cols);
+    }
+    EIGEN_DEVICE_FUNC static EIGEN_CONSTEXPR Index rows(void) EIGEN_NOEXCEPT {return Rows_;}
+    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index cols(void) const EIGEN_NOEXCEPT {return m_cols;}
+    EIGEN_DEVICE_FUNC void conservativeResize(Index, Index, Index cols) {
+      m_cols = cols;
+      eigen_assert(Rows_ * m_cols == 0 && "The number of rows times columns must equal the storage size.");
+    }
+    EIGEN_DEVICE_FUNC void resize(Index, Index, Index cols) {
+      m_cols = cols;
+      eigen_assert(Rows_ * m_cols == 0 && "The number of rows times columns must equal the storage size.");
+    }
+    EIGEN_DEVICE_FUNC const T *data() const { return nullptr; }
+    EIGEN_DEVICE_FUNC T *data() { return nullptr; }
+};
+
+template<typename T, int Cols_, int Options_>
+class DenseStorage<T, 0, Dynamic, Cols_, Options_> {
+    Index m_rows;
+  public:
+    EIGEN_DEVICE_FUNC DenseStorage() : m_rows(0) {}
+    EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : DenseStorage() {}
+    EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_rows(other.m_rows) {}
+    EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) {
+      m_rows = other.m_rows;
+      return *this;
+    }
+    EIGEN_DEVICE_FUNC DenseStorage(Index, Index rows, Index) : m_rows(rows) {
+      eigen_assert(m_rows * Cols_ == 0 && "The number of rows times columns must equal the storage size.");
+    }
+    EIGEN_DEVICE_FUNC void swap(DenseStorage& other) {
+      numext::swap(m_rows, other.m_rows);
+    }
+    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index rows(void) const EIGEN_NOEXCEPT {return m_rows;}
+    EIGEN_DEVICE_FUNC static EIGEN_CONSTEXPR Index cols(void) EIGEN_NOEXCEPT {return Cols_;}
+    EIGEN_DEVICE_FUNC void conservativeResize(Index, Index rows, Index) {
+      m_rows = rows;
+      eigen_assert(m_rows * Cols_ == 0 && "The number of rows times columns must equal the storage size.");
+    }
+    EIGEN_DEVICE_FUNC void resize(Index, Index rows, Index) {
+      m_rows = rows;
+      eigen_assert(m_rows * Cols_ == 0 && "The number of rows times columns must equal the storage size.");
+    }
+    EIGEN_DEVICE_FUNC const T *data() const { return nullptr; }
+    EIGEN_DEVICE_FUNC T *data() { return nullptr; }
+};
+
+// dynamic-size matrix with fixed-size storage
+template<typename T, int Size, int Options_>
+class DenseStorage<T, Size, Dynamic, Dynamic, Options_>
+{
+    internal::plain_array<T,Size,Options_> m_data;
+    Index m_rows;
+    Index m_cols;
+  public:
+   EIGEN_DEVICE_FUNC constexpr DenseStorage() : m_data(), m_rows(0), m_cols(0) {}
+   EIGEN_DEVICE_FUNC explicit constexpr DenseStorage(internal::constructor_without_unaligned_array_assert)
+       : m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0), m_cols(0) {}
+   EIGEN_DEVICE_FUNC constexpr DenseStorage(const DenseStorage& other)
+       : m_data(internal::constructor_without_unaligned_array_assert()), m_rows(other.m_rows), m_cols(other.m_cols) {
+    internal::plain_array_helper::copy(other.m_data, m_rows * m_cols, m_data);
+   }
     EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
     {
       if (this != &other)
@@ -320,113 +365,121 @@ template<typename T, int Size, int _Options> class DenseStorage<T, Size, Dynamic
       }
       return *this;
     }
-    EIGEN_DEVICE_FUNC DenseStorage(Index, Index rows, Index cols) : m_rows(rows), m_cols(cols) {}
+    EIGEN_DEVICE_FUNC constexpr DenseStorage(Index, Index rows, Index cols) : m_rows(rows), m_cols(cols) {}
     EIGEN_DEVICE_FUNC void swap(DenseStorage& other)
     {
       internal::plain_array_helper::swap(m_data, m_rows * m_cols, other.m_data, other.m_rows * other.m_cols);
       numext::swap(m_rows,other.m_rows);
       numext::swap(m_cols,other.m_cols);
     }
-    EIGEN_DEVICE_FUNC Index rows() const {return m_rows;}
-    EIGEN_DEVICE_FUNC Index cols() const {return m_cols;}
-    EIGEN_DEVICE_FUNC void conservativeResize(Index, Index rows, Index cols) { m_rows = rows; m_cols = cols; }
-    EIGEN_DEVICE_FUNC void resize(Index, Index rows, Index cols) { m_rows = rows; m_cols = cols; }
-    EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
-    EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
+    EIGEN_DEVICE_FUNC constexpr Index rows() const { return m_rows; }
+    EIGEN_DEVICE_FUNC constexpr Index cols() const { return m_cols; }
+    EIGEN_DEVICE_FUNC constexpr void conservativeResize(Index, Index rows, Index cols) {
+      m_rows = rows;
+      m_cols = cols;
+    }
+    EIGEN_DEVICE_FUNC constexpr void resize(Index, Index rows, Index cols) {
+      m_rows = rows;
+      m_cols = cols;
+    }
+    EIGEN_DEVICE_FUNC constexpr const T* data() const { return m_data.array; }
+    EIGEN_DEVICE_FUNC constexpr T* data() { return m_data.array; }
 };
 
 // dynamic-size matrix with fixed-size storage and fixed width
-template<typename T, int Size, int _Cols, int _Options> class DenseStorage<T, Size, Dynamic, _Cols, _Options>
+template<typename T, int Size, int Cols_, int Options_>
+class DenseStorage<T, Size, Dynamic, Cols_, Options_>
 {
-    internal::plain_array<T,Size,_Options> m_data;
+    internal::plain_array<T,Size,Options_> m_data;
     Index m_rows;
   public:
-    EIGEN_DEVICE_FUNC DenseStorage() : m_rows(0) {}
-    EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
-      : m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0) {}
-    EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other)
-      : m_data(internal::constructor_without_unaligned_array_assert()), m_rows(other.m_rows)
-    {
-      internal::plain_array_helper::copy(other.m_data, m_rows * _Cols, m_data);
-    }
-    
+   EIGEN_DEVICE_FUNC constexpr DenseStorage() : m_rows(0) {}
+   EIGEN_DEVICE_FUNC explicit constexpr DenseStorage(internal::constructor_without_unaligned_array_assert)
+       : m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0) {}
+   EIGEN_DEVICE_FUNC constexpr DenseStorage(const DenseStorage& other)
+       : m_data(internal::constructor_without_unaligned_array_assert()), m_rows(other.m_rows) {
+      internal::plain_array_helper::copy(other.m_data, m_rows * Cols_, m_data);
+   }
+
     EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
     {
       if (this != &other)
       {
         m_rows = other.m_rows;
-        internal::plain_array_helper::copy(other.m_data, m_rows * _Cols, m_data);
+        internal::plain_array_helper::copy(other.m_data, m_rows * Cols_, m_data);
       }
       return *this;
     }
-    EIGEN_DEVICE_FUNC DenseStorage(Index, Index rows, Index) : m_rows(rows) {}
+    EIGEN_DEVICE_FUNC constexpr DenseStorage(Index, Index rows, Index) : m_rows(rows) {}
     EIGEN_DEVICE_FUNC void swap(DenseStorage& other)
     { 
-      internal::plain_array_helper::swap(m_data, m_rows * _Cols, other.m_data, other.m_rows * _Cols);
+      internal::plain_array_helper::swap(m_data, m_rows * Cols_, other.m_data, other.m_rows * Cols_);
       numext::swap(m_rows, other.m_rows);
     }
-    EIGEN_DEVICE_FUNC Index rows(void) const EIGEN_NOEXCEPT {return m_rows;}
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index cols(void) const EIGEN_NOEXCEPT {return _Cols;}
-    EIGEN_DEVICE_FUNC void conservativeResize(Index, Index rows, Index) { m_rows = rows; }
-    EIGEN_DEVICE_FUNC void resize(Index, Index rows, Index) { m_rows = rows; }
-    EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
-    EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
+    EIGEN_DEVICE_FUNC constexpr Index rows(void) const EIGEN_NOEXCEPT { return m_rows; }
+    EIGEN_DEVICE_FUNC constexpr Index cols(void) const EIGEN_NOEXCEPT { return Cols_; }
+    EIGEN_DEVICE_FUNC constexpr void conservativeResize(Index, Index rows, Index) { m_rows = rows; }
+    EIGEN_DEVICE_FUNC constexpr void resize(Index, Index rows, Index) { m_rows = rows; }
+    EIGEN_DEVICE_FUNC constexpr const T* data() const { return m_data.array; }
+    EIGEN_DEVICE_FUNC constexpr T* data() { return m_data.array; }
 };
 
 // dynamic-size matrix with fixed-size storage and fixed height
-template<typename T, int Size, int _Rows, int _Options> class DenseStorage<T, Size, _Rows, Dynamic, _Options>
+template<typename T, int Size, int Rows_, int Options_>
+class DenseStorage<T, Size, Rows_, Dynamic, Options_>
 {
-    internal::plain_array<T,Size,_Options> m_data;
+    internal::plain_array<T,Size,Options_> m_data;
     Index m_cols;
   public:
-    EIGEN_DEVICE_FUNC DenseStorage() : m_cols(0) {}
-    EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
-      : m_data(internal::constructor_without_unaligned_array_assert()), m_cols(0) {}
-    EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) 
-      : m_data(internal::constructor_without_unaligned_array_assert()), m_cols(other.m_cols)
-    {
-      internal::plain_array_helper::copy(other.m_data, _Rows * m_cols, m_data);
-    }
+   EIGEN_DEVICE_FUNC constexpr DenseStorage() : m_cols(0) {}
+   EIGEN_DEVICE_FUNC explicit constexpr DenseStorage(internal::constructor_without_unaligned_array_assert)
+       : m_data(internal::constructor_without_unaligned_array_assert()), m_cols(0) {}
+   EIGEN_DEVICE_FUNC constexpr DenseStorage(const DenseStorage& other)
+       : m_data(internal::constructor_without_unaligned_array_assert()), m_cols(other.m_cols) {
+      internal::plain_array_helper::copy(other.m_data, Rows_ * m_cols, m_data);
+   }
     EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
     {
       if (this != &other)
       {
         m_cols = other.m_cols;
-        internal::plain_array_helper::copy(other.m_data, _Rows * m_cols, m_data);
+        internal::plain_array_helper::copy(other.m_data, Rows_ * m_cols, m_data);
       }
       return *this;
     }
     EIGEN_DEVICE_FUNC DenseStorage(Index, Index, Index cols) : m_cols(cols) {}
     EIGEN_DEVICE_FUNC void swap(DenseStorage& other) {
-      internal::plain_array_helper::swap(m_data, _Rows * m_cols, other.m_data, _Rows * other.m_cols);
+      internal::plain_array_helper::swap(m_data, Rows_ * m_cols, other.m_data, Rows_ * other.m_cols);
       numext::swap(m_cols, other.m_cols);
     }
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index rows(void) const EIGEN_NOEXCEPT {return _Rows;}
-    EIGEN_DEVICE_FUNC Index cols(void) const EIGEN_NOEXCEPT {return m_cols;}
-    EIGEN_DEVICE_FUNC void conservativeResize(Index, Index, Index cols) { m_cols = cols; }
-    EIGEN_DEVICE_FUNC void resize(Index, Index, Index cols) { m_cols = cols; }
-    EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
-    EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
+    EIGEN_DEVICE_FUNC constexpr Index rows(void) const EIGEN_NOEXCEPT { return Rows_; }
+    EIGEN_DEVICE_FUNC constexpr Index cols(void) const EIGEN_NOEXCEPT { return m_cols; }
+    EIGEN_DEVICE_FUNC constexpr void conservativeResize(Index, Index, Index cols) { m_cols = cols; }
+    EIGEN_DEVICE_FUNC constexpr void resize(Index, Index, Index cols) { m_cols = cols; }
+    EIGEN_DEVICE_FUNC constexpr const T* data() const { return m_data.array; }
+    EIGEN_DEVICE_FUNC constexpr T* data() { return m_data.array; }
 };
 
 // purely dynamic matrix.
-template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynamic, _Options>
+template<typename T, int Options_>
+class DenseStorage<T, Dynamic, Dynamic, Dynamic, Options_>
 {
     T *m_data;
     Index m_rows;
     Index m_cols;
   public:
-    EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_rows(0), m_cols(0) {}
-    EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
+   EIGEN_DEVICE_FUNC constexpr DenseStorage() : m_data(0), m_rows(0), m_cols(0) {}
+   EIGEN_DEVICE_FUNC explicit constexpr DenseStorage(internal::constructor_without_unaligned_array_assert)
        : m_data(0), m_rows(0), m_cols(0) {}
-    EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols)
-      : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(rows), m_cols(cols)
-    {
+   EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols)
+       : m_data(internal::conditional_aligned_new_auto<T, (Options_ & DontAlign) == 0>(size)),
+         m_rows(rows),
+         m_cols(cols) {
       EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
       eigen_internal_assert(size==rows*cols && rows>=0 && cols >=0);
-    }
+   }
     EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other)
-      : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(other.m_rows*other.m_cols))
+      : m_data(internal::conditional_aligned_new_auto<T,(Options_&DontAlign)==0>(other.m_rows*other.m_cols))
       , m_rows(other.m_rows)
       , m_cols(other.m_cols)
     {
@@ -442,7 +495,6 @@ template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynam
       }
       return *this;
     }
-#if EIGEN_HAS_RVALUE_REFERENCES
     EIGEN_DEVICE_FUNC
     DenseStorage(DenseStorage&& other) EIGEN_NOEXCEPT
       : m_data(std::move(other.m_data))
@@ -461,8 +513,7 @@ template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynam
       numext::swap(m_cols, other.m_cols);
       return *this;
     }
-#endif
-    EIGEN_DEVICE_FUNC ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, m_rows*m_cols); }
+    EIGEN_DEVICE_FUNC ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(Options_&DontAlign)==0>(m_data, m_rows*m_cols); }
     EIGEN_DEVICE_FUNC void swap(DenseStorage& other)
     {
       numext::swap(m_data,other.m_data);
@@ -473,7 +524,7 @@ template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynam
     EIGEN_DEVICE_FUNC Index cols(void) const EIGEN_NOEXCEPT {return m_cols;}
     void conservativeResize(Index size, Index rows, Index cols)
     {
-      m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, m_rows*m_cols);
+      m_data = internal::conditional_aligned_realloc_new_auto<T,(Options_&DontAlign)==0>(m_data, size, m_rows*m_cols);
       m_rows = rows;
       m_cols = cols;
     }
@@ -481,9 +532,9 @@ template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynam
     {
       if(size != m_rows*m_cols)
       {
-        internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, m_rows*m_cols);
+        internal::conditional_aligned_delete_auto<T,(Options_&DontAlign)==0>(m_data, m_rows*m_cols);
         if (size>0) // >0 and not simply !=0 to let the compiler knows that size cannot be negative
-          m_data = internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size);
+          m_data = internal::conditional_aligned_new_auto<T,(Options_&DontAlign)==0>(size);
         else
           m_data = 0;
         EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
@@ -496,25 +547,25 @@ template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynam
 };
 
 // matrix with dynamic width and fixed height (so that matrix has dynamic size).
-template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Rows, Dynamic, _Options>
-{
+template<typename T, int Rows_, int Options_>
+class DenseStorage<T, Dynamic, Rows_, Dynamic, Options_> {
     T *m_data;
     Index m_cols;
   public:
-    EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_cols(0) {}
-    explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_cols(0) {}
-    EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_cols(cols)
-    {
+   EIGEN_DEVICE_FUNC constexpr DenseStorage() : m_data(0), m_cols(0) {}
+   explicit constexpr DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_cols(0) {}
+   EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols)
+       : m_data(internal::conditional_aligned_new_auto<T, (Options_ & DontAlign) == 0>(size)), m_cols(cols) {
       EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
-      eigen_internal_assert(size==rows*cols && rows==_Rows && cols >=0);
+      eigen_internal_assert(size==rows*cols && rows==Rows_ && cols >=0);
       EIGEN_UNUSED_VARIABLE(rows);
-    }
+   }
     EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other)
-      : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(_Rows*other.m_cols))
+      : m_data(internal::conditional_aligned_new_auto<T,(Options_&DontAlign)==0>(Rows_*other.m_cols))
       , m_cols(other.m_cols)
     {
-      EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = m_cols*_Rows)
-      internal::smart_copy(other.m_data, other.m_data+_Rows*m_cols, m_data);
+      EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = m_cols*Rows_)
+      internal::smart_copy(other.m_data, other.m_data+Rows_*m_cols, m_data);
     }
     EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
     {
@@ -525,7 +576,6 @@ template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Ro
       }
       return *this;
     }
-#if EIGEN_HAS_RVALUE_REFERENCES
     EIGEN_DEVICE_FUNC
     DenseStorage(DenseStorage&& other) EIGEN_NOEXCEPT
       : m_data(std::move(other.m_data))
@@ -541,26 +591,25 @@ template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Ro
       numext::swap(m_cols, other.m_cols);
       return *this;
     }
-#endif
-    EIGEN_DEVICE_FUNC ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Rows*m_cols); }
+    EIGEN_DEVICE_FUNC ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(Options_&DontAlign)==0>(m_data, Rows_*m_cols); }
     EIGEN_DEVICE_FUNC void swap(DenseStorage& other) {
       numext::swap(m_data,other.m_data);
       numext::swap(m_cols,other.m_cols);
     }
-    EIGEN_DEVICE_FUNC static EIGEN_CONSTEXPR Index rows(void) EIGEN_NOEXCEPT {return _Rows;}
+    EIGEN_DEVICE_FUNC static constexpr Index rows(void) EIGEN_NOEXCEPT { return Rows_; }
     EIGEN_DEVICE_FUNC Index cols(void) const EIGEN_NOEXCEPT {return m_cols;}
     EIGEN_DEVICE_FUNC void conservativeResize(Index size, Index, Index cols)
     {
-      m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, _Rows*m_cols);
+      m_data = internal::conditional_aligned_realloc_new_auto<T,(Options_&DontAlign)==0>(m_data, size, Rows_*m_cols);
       m_cols = cols;
     }
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize(Index size, Index, Index cols)
     {
-      if(size != _Rows*m_cols)
+      if(size != Rows_*m_cols)
       {
-        internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Rows*m_cols);
+        internal::conditional_aligned_delete_auto<T,(Options_&DontAlign)==0>(m_data, Rows_*m_cols);
         if (size>0) // >0 and not simply !=0 to let the compiler knows that size cannot be negative
-          m_data = internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size);
+          m_data = internal::conditional_aligned_new_auto<T,(Options_&DontAlign)==0>(size);
         else
           m_data = 0;
         EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
@@ -572,25 +621,26 @@ template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Ro
 };
 
 // matrix with dynamic height and fixed width (so that matrix has dynamic size).
-template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dynamic, _Cols, _Options>
+template<typename T, int Cols_, int Options_>
+class DenseStorage<T, Dynamic, Dynamic, Cols_, Options_>
 {
     T *m_data;
     Index m_rows;
   public:
-    EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_rows(0) {}
-    explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_rows(0) {}
-    EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(rows)
-    {
+   EIGEN_DEVICE_FUNC constexpr DenseStorage() : m_data(0), m_rows(0) {}
+   explicit constexpr DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_rows(0) {}
+   EIGEN_DEVICE_FUNC constexpr DenseStorage(Index size, Index rows, Index cols)
+       : m_data(internal::conditional_aligned_new_auto<T, (Options_ & DontAlign) == 0>(size)), m_rows(rows) {
       EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
-      eigen_internal_assert(size==rows*cols && rows>=0 && cols == _Cols);
+      eigen_internal_assert(size==rows*cols && rows>=0 && cols == Cols_);
       EIGEN_UNUSED_VARIABLE(cols);
-    }
+   }
     EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other)
-      : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(other.m_rows*_Cols))
+      : m_data(internal::conditional_aligned_new_auto<T,(Options_&DontAlign)==0>(other.m_rows*Cols_))
       , m_rows(other.m_rows)
     {
-      EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = m_rows*_Cols)
-      internal::smart_copy(other.m_data, other.m_data+other.m_rows*_Cols, m_data);
+      EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = m_rows*Cols_)
+      internal::smart_copy(other.m_data, other.m_data+other.m_rows*Cols_, m_data);
     }
     EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
     {
@@ -601,7 +651,6 @@ template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dyn
       }
       return *this;
     }
-#if EIGEN_HAS_RVALUE_REFERENCES
     EIGEN_DEVICE_FUNC
     DenseStorage(DenseStorage&& other) EIGEN_NOEXCEPT
       : m_data(std::move(other.m_data))
@@ -617,26 +666,25 @@ template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dyn
       numext::swap(m_rows, other.m_rows);
       return *this;
     }
-#endif
-    EIGEN_DEVICE_FUNC ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Cols*m_rows); }
+    EIGEN_DEVICE_FUNC ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(Options_&DontAlign)==0>(m_data, Cols_*m_rows); }
     EIGEN_DEVICE_FUNC void swap(DenseStorage& other) {
       numext::swap(m_data,other.m_data);
       numext::swap(m_rows,other.m_rows);
     }
     EIGEN_DEVICE_FUNC Index rows(void) const EIGEN_NOEXCEPT {return m_rows;}
-    EIGEN_DEVICE_FUNC static EIGEN_CONSTEXPR Index cols(void) {return _Cols;}
+    EIGEN_DEVICE_FUNC static constexpr Index cols(void) { return Cols_; }
     void conservativeResize(Index size, Index rows, Index)
     {
-      m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, m_rows*_Cols);
+      m_data = internal::conditional_aligned_realloc_new_auto<T,(Options_&DontAlign)==0>(m_data, size, m_rows*Cols_);
       m_rows = rows;
     }
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize(Index size, Index rows, Index)
     {
-      if(size != m_rows*_Cols)
+      if(size != m_rows*Cols_)
       {
-        internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Cols*m_rows);
+        internal::conditional_aligned_delete_auto<T,(Options_&DontAlign)==0>(m_data, Cols_*m_rows);
         if (size>0) // >0 and not simply !=0 to let the compiler knows that size cannot be negative
-          m_data = internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size);
+          m_data = internal::conditional_aligned_new_auto<T,(Options_&DontAlign)==0>(size);
         else
           m_data = 0;
         EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
diff --git a/libs/eigen/Eigen/src/Core/Diagonal.h b/libs/eigen/Eigen/src/Core/Diagonal.h
index 3112d2c..4af17dd 100644
--- a/libs/eigen/Eigen/src/Core/Diagonal.h
+++ b/libs/eigen/Eigen/src/Core/Diagonal.h
@@ -11,6 +11,8 @@
 #ifndef EIGEN_DIAGONAL_H
 #define EIGEN_DIAGONAL_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /** \class Diagonal
@@ -18,8 +20,8 @@ namespace Eigen {
   *
   * \brief Expression of a diagonal/subdiagonal/superdiagonal in a matrix
   *
-  * \param MatrixType the type of the object in which we are taking a sub/main/super diagonal
-  * \param DiagIndex the index of the sub/super diagonal. The default is 0 and it means the main diagonal.
+  * \tparam MatrixType the type of the object in which we are taking a sub/main/super diagonal
+  * \tparam DiagIndex the index of the sub/super diagonal. The default is 0 and it means the main diagonal.
   *              A positive value means a superdiagonal, a negative value means a subdiagonal.
   *              You can also use DynamicIndex so the index can be set at runtime.
   *
@@ -38,21 +40,21 @@ struct traits<Diagonal<MatrixType,DiagIndex> >
  : traits<MatrixType>
 {
   typedef typename ref_selector<MatrixType>::type MatrixTypeNested;
-  typedef typename remove_reference<MatrixTypeNested>::type _MatrixTypeNested;
+  typedef std::remove_reference_t<MatrixTypeNested> MatrixTypeNested_;
   typedef typename MatrixType::StorageKind StorageKind;
   enum {
     RowsAtCompileTime = (int(DiagIndex) == DynamicIndex || int(MatrixType::SizeAtCompileTime) == Dynamic) ? Dynamic
-                      : (EIGEN_PLAIN_ENUM_MIN(MatrixType::RowsAtCompileTime - EIGEN_PLAIN_ENUM_MAX(-DiagIndex, 0),
-                                              MatrixType::ColsAtCompileTime - EIGEN_PLAIN_ENUM_MAX( DiagIndex, 0))),
+                      : (plain_enum_min(MatrixType::RowsAtCompileTime - plain_enum_max(-DiagIndex, 0),
+                                        MatrixType::ColsAtCompileTime - plain_enum_max( DiagIndex, 0))),
     ColsAtCompileTime = 1,
     MaxRowsAtCompileTime = int(MatrixType::MaxSizeAtCompileTime) == Dynamic ? Dynamic
-                         : DiagIndex == DynamicIndex ? EIGEN_SIZE_MIN_PREFER_FIXED(MatrixType::MaxRowsAtCompileTime,
-                                                                              MatrixType::MaxColsAtCompileTime)
-                         : (EIGEN_PLAIN_ENUM_MIN(MatrixType::MaxRowsAtCompileTime - EIGEN_PLAIN_ENUM_MAX(-DiagIndex, 0),
-                                                 MatrixType::MaxColsAtCompileTime - EIGEN_PLAIN_ENUM_MAX( DiagIndex, 0))),
+                         : DiagIndex == DynamicIndex ? min_size_prefer_fixed(MatrixType::MaxRowsAtCompileTime,
+                                                                             MatrixType::MaxColsAtCompileTime)
+                         : (plain_enum_min(MatrixType::MaxRowsAtCompileTime - plain_enum_max(-DiagIndex, 0),
+                                           MatrixType::MaxColsAtCompileTime - plain_enum_max( DiagIndex, 0))),
     MaxColsAtCompileTime = 1,
     MaskLvalueBit = is_lvalue<MatrixType>::value ? LvalueBit : 0,
-    Flags = (unsigned int)_MatrixTypeNested::Flags & (RowMajorBit | MaskLvalueBit | DirectAccessBit) & ~RowMajorBit, // FIXME DirectAccessBit should not be handled by expressions
+    Flags = (unsigned int)MatrixTypeNested_::Flags & (RowMajorBit | MaskLvalueBit | DirectAccessBit) & ~RowMajorBit, // FIXME DirectAccessBit should not be handled by expressions
     MatrixTypeOuterStride = outer_stride_at_compile_time<MatrixType>::ret,
     InnerStrideAtCompileTime = MatrixTypeOuterStride == Dynamic ? Dynamic : MatrixTypeOuterStride+1,
     OuterStrideAtCompileTime = 0
@@ -60,12 +62,12 @@ struct traits<Diagonal<MatrixType,DiagIndex> >
 };
 }
 
-template<typename MatrixType, int _DiagIndex> class Diagonal
-   : public internal::dense_xpr_base< Diagonal<MatrixType,_DiagIndex> >::type
+template<typename MatrixType, int DiagIndex_> class Diagonal
+   : public internal::dense_xpr_base< Diagonal<MatrixType,DiagIndex_> >::type
 {
   public:
 
-    enum { DiagIndex = _DiagIndex };
+    enum { DiagIndex = DiagIndex_ };
     typedef typename internal::dense_xpr_base<Diagonal>::type Base;
     EIGEN_DENSE_PUBLIC_INTERFACE(Diagonal)
 
@@ -95,11 +97,11 @@ template<typename MatrixType, int _DiagIndex> class Diagonal
     EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
     inline Index outerStride() const EIGEN_NOEXCEPT { return 0; }
 
-    typedef typename internal::conditional<
-                       internal::is_lvalue<MatrixType>::value,
-                       Scalar,
-                       const Scalar
-                     >::type ScalarWithConstIfNotLvalue;
+    typedef std::conditional_t<
+              internal::is_lvalue<MatrixType>::value,
+              Scalar,
+              const Scalar
+            > ScalarWithConstIfNotLvalue;
 
     EIGEN_DEVICE_FUNC
     inline ScalarWithConstIfNotLvalue* data() { return &(m_matrix.coeffRef(rowOffset(), colOffset())); }
@@ -145,7 +147,7 @@ template<typename MatrixType, int _DiagIndex> class Diagonal
     }
 
     EIGEN_DEVICE_FUNC
-    inline const typename internal::remove_all<typename MatrixType::Nested>::type&
+    inline const internal::remove_all_t<typename MatrixType::Nested>&
     nestedExpression() const
     {
       return m_matrix;
@@ -191,7 +193,8 @@ MatrixBase<Derived>::diagonal()
 
 /** This is the const version of diagonal(). */
 template<typename Derived>
-EIGEN_DEVICE_FUNC inline typename MatrixBase<Derived>::ConstDiagonalReturnType
+EIGEN_DEVICE_FUNC inline
+const typename MatrixBase<Derived>::ConstDiagonalReturnType
 MatrixBase<Derived>::diagonal() const
 {
   return ConstDiagonalReturnType(derived());
@@ -209,18 +212,18 @@ MatrixBase<Derived>::diagonal() const
   *
   * \sa MatrixBase::diagonal(), class Diagonal */
 template<typename Derived>
-EIGEN_DEVICE_FUNC inline typename MatrixBase<Derived>::DiagonalDynamicIndexReturnType
+EIGEN_DEVICE_FUNC inline Diagonal<Derived, DynamicIndex>
 MatrixBase<Derived>::diagonal(Index index)
 {
-  return DiagonalDynamicIndexReturnType(derived(), index);
+  return Diagonal<Derived, DynamicIndex>(derived(), index);
 }
 
 /** This is the const version of diagonal(Index). */
 template<typename Derived>
-EIGEN_DEVICE_FUNC inline typename MatrixBase<Derived>::ConstDiagonalDynamicIndexReturnType
+EIGEN_DEVICE_FUNC inline const Diagonal<const Derived, DynamicIndex>
 MatrixBase<Derived>::diagonal(Index index) const
 {
-  return ConstDiagonalDynamicIndexReturnType(derived(), index);
+  return Diagonal<const Derived, DynamicIndex>(derived(), index);
 }
 
 /** \returns an expression of the \a DiagIndex-th sub or super diagonal of the matrix \c *this
@@ -237,20 +240,20 @@ MatrixBase<Derived>::diagonal(Index index) const
 template<typename Derived>
 template<int Index_>
 EIGEN_DEVICE_FUNC
-inline typename MatrixBase<Derived>::template DiagonalIndexReturnType<Index_>::Type
+inline Diagonal<Derived, Index_>
 MatrixBase<Derived>::diagonal()
 {
-  return typename DiagonalIndexReturnType<Index_>::Type(derived());
+  return Diagonal<Derived, Index_>(derived());
 }
 
 /** This is the const version of diagonal<int>(). */
 template<typename Derived>
 template<int Index_>
 EIGEN_DEVICE_FUNC
-inline typename MatrixBase<Derived>::template ConstDiagonalIndexReturnType<Index_>::Type
+inline const Diagonal<const Derived, Index_>
 MatrixBase<Derived>::diagonal() const
 {
-  return typename ConstDiagonalIndexReturnType<Index_>::Type(derived());
+  return  Diagonal<const Derived, Index_>(derived());
 }
 
 } // end namespace Eigen
diff --git a/libs/eigen/Eigen/src/Core/DiagonalMatrix.h b/libs/eigen/Eigen/src/Core/DiagonalMatrix.h
index 542685c..405cc71 100644
--- a/libs/eigen/Eigen/src/Core/DiagonalMatrix.h
+++ b/libs/eigen/Eigen/src/Core/DiagonalMatrix.h
@@ -11,9 +11,23 @@
 #ifndef EIGEN_DIAGONALMATRIX_H
 #define EIGEN_DIAGONALMATRIX_H
 
-namespace Eigen { 
+#include "./InternalHeaderCheck.h"
 
-#ifndef EIGEN_PARSED_BY_DOXYGEN
+namespace Eigen {
+
+/** \class DiagonalBase
+ * \ingroup Core_Module
+ *
+ * \brief Base class for diagonal matrices and expressions
+ *
+ * This is the base class that is inherited by diagonal matrix and related expression
+ * types, which internally use a vector for storing the diagonal entries. Diagonal
+ * types always represent square matrices.
+ *
+ * \tparam Derived is the derived type, a DiagonalMatrix or DiagonalWrapper.
+ *
+ * \sa class DiagonalMatrix, class DiagonalWrapper
+ */
 template<typename Derived>
 class DiagonalBase : public EigenBase<Derived>
 {
@@ -37,24 +51,35 @@ class DiagonalBase : public EigenBase<Derived>
     typedef DenseMatrixType DenseType;
     typedef DiagonalMatrix<Scalar,DiagonalVectorType::SizeAtCompileTime,DiagonalVectorType::MaxSizeAtCompileTime> PlainObject;
 
+    /** \returns a reference to the derived object. */
     EIGEN_DEVICE_FUNC
     inline const Derived& derived() const { return *static_cast<const Derived*>(this); }
+    /** \returns a const reference to the derived object. */
     EIGEN_DEVICE_FUNC
     inline Derived& derived() { return *static_cast<Derived*>(this); }
 
+    /**
+     * Constructs a dense matrix from \c *this. Note, this directly returns a dense matrix type,
+     * not an expression.
+     * \returns A dense matrix, with its diagonal entries set from the the derived object. */
     EIGEN_DEVICE_FUNC
     DenseMatrixType toDenseMatrix() const { return derived(); }
 
+    /** \returns a reference to the derived object's vector of diagonal coefficients. */
     EIGEN_DEVICE_FUNC
     inline const DiagonalVectorType& diagonal() const { return derived().diagonal(); }
+    /** \returns a const reference to the derived object's vector of diagonal coefficients. */
     EIGEN_DEVICE_FUNC
     inline DiagonalVectorType& diagonal() { return derived().diagonal(); }
 
-    EIGEN_DEVICE_FUNC
+    /** \returns the number of rows. */
+    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR 
     inline Index rows() const { return diagonal().size(); }
-    EIGEN_DEVICE_FUNC
+    /** \returns the number of columns. */
+    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR 
     inline Index cols() const { return diagonal().size(); }
 
+    /** \returns the diagonal matrix product of \c *this by the dense matrix, \a matrix */
     template<typename MatrixDerived>
     EIGEN_DEVICE_FUNC
     const Product<Derived,MatrixDerived,LazyProduct>
@@ -63,88 +88,99 @@ class DiagonalBase : public EigenBase<Derived>
       return Product<Derived, MatrixDerived, LazyProduct>(derived(),matrix.derived());
     }
 
-    typedef DiagonalWrapper<const CwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const DiagonalVectorType> > InverseReturnType;
-    EIGEN_DEVICE_FUNC
-    inline const InverseReturnType
-    inverse() const
-    {
-      return InverseReturnType(diagonal().cwiseInverse());
-    }
-    
-    EIGEN_DEVICE_FUNC
-    inline const DiagonalWrapper<const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(DiagonalVectorType,Scalar,product) >
-    operator*(const Scalar& scalar) const
-    {
-      return DiagonalWrapper<const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(DiagonalVectorType,Scalar,product) >(diagonal() * scalar);
-    }
-    EIGEN_DEVICE_FUNC
-    friend inline const DiagonalWrapper<const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,DiagonalVectorType,product) >
-    operator*(const Scalar& scalar, const DiagonalBase& other)
-    {
-      return DiagonalWrapper<const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,DiagonalVectorType,product) >(scalar * other.diagonal());
+    template <typename OtherDerived>
+    using DiagonalProductReturnType = DiagonalWrapper<const EIGEN_CWISE_BINARY_RETURN_TYPE(
+        DiagonalVectorType, typename OtherDerived::DiagonalVectorType, product)>;
+
+    /** \returns the diagonal matrix product of \c *this by the diagonal matrix \a other */
+    template <typename OtherDerived>
+    EIGEN_DEVICE_FUNC const DiagonalProductReturnType<OtherDerived> operator*(
+        const DiagonalBase<OtherDerived>& other) const {
+      return diagonal().cwiseProduct(other.diagonal()).asDiagonal();
     }
 
-    template<typename OtherDerived>
+    using DiagonalInverseReturnType =
+        DiagonalWrapper<const CwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const DiagonalVectorType>>;
+
+    /** \returns the inverse \c *this. Computed as the coefficient-wise inverse of the diagonal. */
     EIGEN_DEVICE_FUNC
-    #ifdef EIGEN_PARSED_BY_DOXYGEN
-    inline unspecified_expression_type
-    #else
-    inline const DiagonalWrapper<const EIGEN_CWISE_BINARY_RETURN_TYPE(DiagonalVectorType,typename OtherDerived::DiagonalVectorType,sum) >
-    #endif
-    operator+(const DiagonalBase<OtherDerived>& other) const
-    {
+    inline const DiagonalInverseReturnType inverse() const { return diagonal().cwiseInverse().asDiagonal(); }
+
+    using DiagonalScaleReturnType =
+        DiagonalWrapper<const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(DiagonalVectorType, Scalar, product)>;
+
+    /** \returns the product of \c *this by the scalar \a scalar */
+    EIGEN_DEVICE_FUNC
+    inline const DiagonalScaleReturnType operator*(const Scalar& scalar) const {
+      return (diagonal() * scalar).asDiagonal();
+    }
+
+    using ScaleDiagonalReturnType =
+        DiagonalWrapper<const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar, DiagonalVectorType, product)>;
+
+    /** \returns the product of a scalar and the diagonal matrix \a other */
+    EIGEN_DEVICE_FUNC
+    friend inline const ScaleDiagonalReturnType operator*(const Scalar& scalar, const DiagonalBase& other) {
+      return (scalar * other.diagonal()).asDiagonal();
+    }
+
+    template <typename OtherDerived>
+    using DiagonalSumReturnType = DiagonalWrapper<const EIGEN_CWISE_BINARY_RETURN_TYPE(
+        DiagonalVectorType, typename OtherDerived::DiagonalVectorType, sum)>;
+
+    /** \returns the sum of \c *this and the diagonal matrix \a other */
+    template <typename OtherDerived>
+    EIGEN_DEVICE_FUNC inline const DiagonalSumReturnType<OtherDerived> operator+(
+        const DiagonalBase<OtherDerived>& other) const {
       return (diagonal() + other.diagonal()).asDiagonal();
     }
 
-    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC
-    #ifdef EIGEN_PARSED_BY_DOXYGEN
-    inline unspecified_expression_type
-    #else
-    inline const DiagonalWrapper<const EIGEN_CWISE_BINARY_RETURN_TYPE(DiagonalVectorType,typename OtherDerived::DiagonalVectorType,difference) >
-    #endif
-    operator-(const DiagonalBase<OtherDerived>& other) const
-    {
+    template <typename OtherDerived>
+    using DiagonalDifferenceReturnType = DiagonalWrapper<const EIGEN_CWISE_BINARY_RETURN_TYPE(
+        DiagonalVectorType, typename OtherDerived::DiagonalVectorType, difference)>;
+
+    /** \returns the difference of \c *this and the diagonal matrix \a other */
+    template <typename OtherDerived>
+    EIGEN_DEVICE_FUNC inline const DiagonalDifferenceReturnType<OtherDerived> operator-(
+        const DiagonalBase<OtherDerived>& other) const {
       return (diagonal() - other.diagonal()).asDiagonal();
     }
 };
 
-#endif
-
 /** \class DiagonalMatrix
-  * \ingroup Core_Module
-  *
-  * \brief Represents a diagonal matrix with its storage
-  *
-  * \param _Scalar the type of coefficients
-  * \param SizeAtCompileTime the dimension of the matrix, or Dynamic
-  * \param MaxSizeAtCompileTime the dimension of the matrix, or Dynamic. This parameter is optional and defaults
-  *        to SizeAtCompileTime. Most of the time, you do not need to specify it.
-  *
-  * \sa class DiagonalWrapper
-  */
+ * \ingroup Core_Module
+ *
+ * \brief Represents a diagonal matrix with its storage
+ *
+ * \tparam Scalar_ the type of coefficients
+ * \tparam SizeAtCompileTime the dimension of the matrix, or Dynamic
+ * \tparam MaxSizeAtCompileTime the dimension of the matrix, or Dynamic. This parameter is optional and defaults
+ *        to SizeAtCompileTime. Most of the time, you do not need to specify it.
+ *
+ * \sa class DiagonalBase, class DiagonalWrapper
+ */
 
 namespace internal {
-template<typename _Scalar, int SizeAtCompileTime, int MaxSizeAtCompileTime>
-struct traits<DiagonalMatrix<_Scalar,SizeAtCompileTime,MaxSizeAtCompileTime> >
- : traits<Matrix<_Scalar,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >
+template<typename Scalar_, int SizeAtCompileTime, int MaxSizeAtCompileTime>
+struct traits<DiagonalMatrix<Scalar_,SizeAtCompileTime,MaxSizeAtCompileTime> >
+ : traits<Matrix<Scalar_,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >
 {
-  typedef Matrix<_Scalar,SizeAtCompileTime,1,0,MaxSizeAtCompileTime,1> DiagonalVectorType;
+  typedef Matrix<Scalar_,SizeAtCompileTime,1,0,MaxSizeAtCompileTime,1> DiagonalVectorType;
   typedef DiagonalShape StorageKind;
   enum {
-    Flags = LvalueBit | NoPreferredStorageOrderBit
+    Flags = LvalueBit | NoPreferredStorageOrderBit | NestByRefBit
   };
 };
 }
-template<typename _Scalar, int SizeAtCompileTime, int MaxSizeAtCompileTime>
+template<typename Scalar_, int SizeAtCompileTime, int MaxSizeAtCompileTime>
 class DiagonalMatrix
-  : public DiagonalBase<DiagonalMatrix<_Scalar,SizeAtCompileTime,MaxSizeAtCompileTime> >
+  : public DiagonalBase<DiagonalMatrix<Scalar_,SizeAtCompileTime,MaxSizeAtCompileTime> >
 {
   public:
     #ifndef EIGEN_PARSED_BY_DOXYGEN
     typedef typename internal::traits<DiagonalMatrix>::DiagonalVectorType DiagonalVectorType;
     typedef const DiagonalMatrix& Nested;
-    typedef _Scalar Scalar;
+    typedef Scalar_ Scalar;
     typedef typename internal::traits<DiagonalMatrix>::StorageKind StorageKind;
     typedef typename internal::traits<DiagonalMatrix>::StorageIndex StorageIndex;
     #endif
@@ -178,10 +214,7 @@ class DiagonalMatrix
     EIGEN_DEVICE_FUNC
     inline DiagonalMatrix(const Scalar& x, const Scalar& y, const Scalar& z) : m_diagonal(x,y,z) {}
 
-    #if EIGEN_HAS_CXX11
-    /** \brief Construct a diagonal matrix with fixed size from an arbitrary number of coefficients. \cpp11
-      * 
-      * There exists C++98 anologue constructors for fixed-size diagonal matrices having 2 or 3 coefficients.
+    /** \brief Construct a diagonal matrix with fixed size from an arbitrary number of coefficients.
       * 
       * \warning To construct a diagonal matrix of fixed size, the number of values passed to this 
       * constructor must match the fixed dimension of \c *this.
@@ -200,7 +233,10 @@ class DiagonalMatrix
     EIGEN_DEVICE_FUNC
     explicit EIGEN_STRONG_INLINE DiagonalMatrix(const std::initializer_list<std::initializer_list<Scalar>>& list)
       : m_diagonal(list) {}
-    #endif  // EIGEN_HAS_CXX11
+
+    /** \brief Constructs a DiagonalMatrix from an r-value diagonal vector type */
+    EIGEN_DEVICE_FUNC
+    explicit inline DiagonalMatrix(DiagonalVectorType&& diag) : m_diagonal(std::move(diag)) {}
 
     /** Copy constructor. */
     template<typename OtherDerived>
@@ -239,6 +275,22 @@ class DiagonalMatrix
     }
     #endif
 
+    typedef DiagonalWrapper<const CwiseNullaryOp<internal::scalar_constant_op<Scalar>, DiagonalVectorType>>
+        InitializeReturnType;
+
+    /** Initializes a diagonal matrix of size SizeAtCompileTime with coefficients set to zero */
+    EIGEN_DEVICE_FUNC
+    static const InitializeReturnType Zero() { return DiagonalVectorType::Zero().asDiagonal(); }
+    /** Initializes a diagonal matrix of size dim with coefficients set to zero */
+    EIGEN_DEVICE_FUNC
+    static const InitializeReturnType Zero(Index size) { return DiagonalVectorType::Zero(size).asDiagonal(); }
+    /** Initializes a identity matrix of size SizeAtCompileTime */
+    EIGEN_DEVICE_FUNC
+    static const InitializeReturnType Identity() { return DiagonalVectorType::Ones().asDiagonal(); }
+    /** Initializes a identity matrix of size dim */
+    EIGEN_DEVICE_FUNC
+    static const InitializeReturnType Identity(Index size) { return DiagonalVectorType::Ones(size).asDiagonal(); }
+
     /** Resizes to given size. */
     EIGEN_DEVICE_FUNC
     inline void resize(Index size) { m_diagonal.resize(size); }
@@ -261,7 +313,7 @@ class DiagonalMatrix
   *
   * \brief Expression of a diagonal matrix
   *
-  * \param _DiagonalVectorType the type of the vector of diagonal coefficients
+  * \tparam DiagonalVectorType_ the type of the vector of diagonal coefficients
   *
   * This class is an expression of a diagonal matrix, but not storing its own vector of diagonal coefficients,
   * instead wrapping an existing vector expression. It is the return type of MatrixBase::asDiagonal()
@@ -271,10 +323,10 @@ class DiagonalMatrix
   */
 
 namespace internal {
-template<typename _DiagonalVectorType>
-struct traits<DiagonalWrapper<_DiagonalVectorType> >
+template<typename DiagonalVectorType_>
+struct traits<DiagonalWrapper<DiagonalVectorType_> >
 {
-  typedef _DiagonalVectorType DiagonalVectorType;
+  typedef DiagonalVectorType_ DiagonalVectorType;
   typedef typename DiagonalVectorType::Scalar Scalar;
   typedef typename DiagonalVectorType::StorageIndex StorageIndex;
   typedef DiagonalShape StorageKind;
@@ -289,13 +341,13 @@ struct traits<DiagonalWrapper<_DiagonalVectorType> >
 };
 }
 
-template<typename _DiagonalVectorType>
+template<typename DiagonalVectorType_>
 class DiagonalWrapper
-  : public DiagonalBase<DiagonalWrapper<_DiagonalVectorType> >, internal::no_assignment_operator
+  : public DiagonalBase<DiagonalWrapper<DiagonalVectorType_> >, internal::no_assignment_operator
 {
   public:
     #ifndef EIGEN_PARSED_BY_DOXYGEN
-    typedef _DiagonalVectorType DiagonalVectorType;
+    typedef DiagonalVectorType_ DiagonalVectorType;
     typedef DiagonalWrapper Nested;
     #endif
 
@@ -386,6 +438,6 @@ struct Assignment<DstXprType, SrcXprType, Functor, Diagonal2Dense>
 
 } // namespace internal
 
-} // end namespace Eigen
+}  // end namespace Eigen
 
 #endif // EIGEN_DIAGONALMATRIX_H
diff --git a/libs/eigen/Eigen/src/Core/DiagonalProduct.h b/libs/eigen/Eigen/src/Core/DiagonalProduct.h
index 7911d1c..3cd34ba 100644
--- a/libs/eigen/Eigen/src/Core/DiagonalProduct.h
+++ b/libs/eigen/Eigen/src/Core/DiagonalProduct.h
@@ -11,6 +11,8 @@
 #ifndef EIGEN_DIAGONALPRODUCT_H
 #define EIGEN_DIAGONALPRODUCT_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 /** \returns the diagonal matrix product of \c *this by the diagonal matrix \a diagonal.
diff --git a/libs/eigen/Eigen/src/Core/Dot.h b/libs/eigen/Eigen/src/Core/Dot.h
index 5c3441b..0c13192 100644
--- a/libs/eigen/Eigen/src/Core/Dot.h
+++ b/libs/eigen/Eigen/src/Core/Dot.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_DOT_H
 #define EIGEN_DOT_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
@@ -18,14 +20,9 @@ namespace internal {
 // with mismatched types, the compiler emits errors about failing to instantiate cwiseProduct BEFORE
 // looking at the static assertions. Thus this is a trick to get better compile errors.
 template<typename T, typename U,
-// the NeedToTranspose condition here is taken straight from Assign.h
-         bool NeedToTranspose = T::IsVectorAtCompileTime
-                && U::IsVectorAtCompileTime
-                && ((int(T::RowsAtCompileTime) == 1 && int(U::ColsAtCompileTime) == 1)
-                      |  // FIXME | instead of || to please GCC 4.4.0 stupid warning "suggest parentheses around &&".
-                         // revert to || as soon as not needed anymore.
-                    (int(T::ColsAtCompileTime) == 1 && int(U::RowsAtCompileTime) == 1))
->
+         bool NeedToTranspose = T::IsVectorAtCompileTime && U::IsVectorAtCompileTime &&
+                ((int(T::RowsAtCompileTime) == 1 && int(U::ColsAtCompileTime) == 1) ||
+                 (int(T::ColsAtCompileTime) == 1 && int(U::RowsAtCompileTime) == 1))>
 struct dot_nocheck
 {
   typedef scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> conj_prod;
@@ -123,8 +120,8 @@ template<typename Derived>
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::PlainObject
 MatrixBase<Derived>::normalized() const
 {
-  typedef typename internal::nested_eval<Derived,2>::type _Nested;
-  _Nested n(derived());
+  typedef typename internal::nested_eval<Derived,2>::type Nested_;
+  Nested_ n(derived());
   RealScalar z = n.squaredNorm();
   // NOTE: after extensive benchmarking, this conditional does not impact performance, at least on recent x86 CPU
   if(z>RealScalar(0))
@@ -166,8 +163,8 @@ template<typename Derived>
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::PlainObject
 MatrixBase<Derived>::stableNormalized() const
 {
-  typedef typename internal::nested_eval<Derived,3>::type _Nested;
-  _Nested n(derived());
+  typedef typename internal::nested_eval<Derived,3>::type Nested_;
+  Nested_ n(derived());
   RealScalar w = n.cwiseAbs().maxCoeff();
   RealScalar z = (n/w).squaredNorm();
   if(z>RealScalar(0))
diff --git a/libs/eigen/Eigen/src/Core/EigenBase.h b/libs/eigen/Eigen/src/Core/EigenBase.h
index 6b3c7d3..105488d 100644
--- a/libs/eigen/Eigen/src/Core/EigenBase.h
+++ b/libs/eigen/Eigen/src/Core/EigenBase.h
@@ -11,6 +11,8 @@
 #ifndef EIGEN_EIGENBASE_H
 #define EIGEN_EIGENBASE_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /** \class EigenBase
diff --git a/libs/eigen/Eigen/src/Core/ForceAlignedAccess.h b/libs/eigen/Eigen/src/Core/ForceAlignedAccess.h
index 817a43a..b00785e 100644
--- a/libs/eigen/Eigen/src/Core/ForceAlignedAccess.h
+++ b/libs/eigen/Eigen/src/Core/ForceAlignedAccess.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_FORCEALIGNEDACCESS_H
 #define EIGEN_FORCEALIGNEDACCESS_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /** \class ForceAlignedAccess
@@ -128,7 +130,7 @@ MatrixBase<Derived>::forceAlignedAccess()
   */
 template<typename Derived>
 template<bool Enable>
-inline typename internal::add_const_on_value_type<typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type>::type
+inline add_const_on_value_type_t<std::conditional_t<Enable,ForceAlignedAccess<Derived>,Derived&>>
 MatrixBase<Derived>::forceAlignedAccessIf() const
 {
   return derived();  // FIXME This should not work but apparently is never used
@@ -139,7 +141,7 @@ MatrixBase<Derived>::forceAlignedAccessIf() const
   */
 template<typename Derived>
 template<bool Enable>
-inline typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type
+inline std::conditional_t<Enable,ForceAlignedAccess<Derived>,Derived&>
 MatrixBase<Derived>::forceAlignedAccessIf()
 {
   return derived();  // FIXME This should not work but apparently is never used
diff --git a/libs/eigen/Eigen/src/Core/Fuzzy.h b/libs/eigen/Eigen/src/Core/Fuzzy.h
index 43aa49b..b16b2da 100644
--- a/libs/eigen/Eigen/src/Core/Fuzzy.h
+++ b/libs/eigen/Eigen/src/Core/Fuzzy.h
@@ -11,6 +11,8 @@
 #ifndef EIGEN_FUZZY_H
 #define EIGEN_FUZZY_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal
diff --git a/libs/eigen/Eigen/src/Core/GeneralProduct.h b/libs/eigen/Eigen/src/Core/GeneralProduct.h
index 6906aa7..661a3c4 100644
--- a/libs/eigen/Eigen/src/Core/GeneralProduct.h
+++ b/libs/eigen/Eigen/src/Core/GeneralProduct.h
@@ -11,6 +11,8 @@
 #ifndef EIGEN_GENERAL_PRODUCT_H
 #define EIGEN_GENERAL_PRODUCT_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 enum {
@@ -50,17 +52,17 @@ template<int Size, int MaxSize> struct product_size_category
 
 template<typename Lhs, typename Rhs> struct product_type
 {
-  typedef typename remove_all<Lhs>::type _Lhs;
-  typedef typename remove_all<Rhs>::type _Rhs;
+  typedef remove_all_t<Lhs> Lhs_;
+  typedef remove_all_t<Rhs> Rhs_;
   enum {
-    MaxRows = traits<_Lhs>::MaxRowsAtCompileTime,
-    Rows    = traits<_Lhs>::RowsAtCompileTime,
-    MaxCols = traits<_Rhs>::MaxColsAtCompileTime,
-    Cols    = traits<_Rhs>::ColsAtCompileTime,
-    MaxDepth = EIGEN_SIZE_MIN_PREFER_FIXED(traits<_Lhs>::MaxColsAtCompileTime,
-                                           traits<_Rhs>::MaxRowsAtCompileTime),
-    Depth = EIGEN_SIZE_MIN_PREFER_FIXED(traits<_Lhs>::ColsAtCompileTime,
-                                        traits<_Rhs>::RowsAtCompileTime)
+    MaxRows = traits<Lhs_>::MaxRowsAtCompileTime,
+    Rows    = traits<Lhs_>::RowsAtCompileTime,
+    MaxCols = traits<Rhs_>::MaxColsAtCompileTime,
+    Cols    = traits<Rhs_>::ColsAtCompileTime,
+    MaxDepth = min_size_prefer_fixed(traits<Lhs_>::MaxColsAtCompileTime,
+                                     traits<Rhs_>::MaxRowsAtCompileTime),
+    Depth = min_size_prefer_fixed(traits<Lhs_>::ColsAtCompileTime,
+                                  traits<Rhs_>::RowsAtCompileTime)
   };
 
   // the splitting into different lines of code here, introducing the _select enums and the typedef below,
@@ -180,12 +182,13 @@ struct gemv_static_vector_if<Scalar,Size,MaxSize,true>
     PacketSize      = internal::packet_traits<Scalar>::size
   };
   #if EIGEN_MAX_STATIC_ALIGN_BYTES!=0
-  internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize),0,EIGEN_PLAIN_ENUM_MIN(AlignedMax,PacketSize)> m_data;
+  internal::plain_array<Scalar, internal::min_size_prefer_fixed(Size, MaxSize), 0,
+                        internal::plain_enum_min(AlignedMax, PacketSize)> m_data;
   EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; }
   #else
   // Some architectures cannot align on the stack,
   // => let's manually enforce alignment by allocating more data and return the address of the first aligned element.
-  internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize)+(ForceAlignment?EIGEN_MAX_ALIGN_BYTES:0),0> m_data;
+  internal::plain_array<Scalar, internal::min_size_prefer_fixed(Size, MaxSize)+(ForceAlignment?EIGEN_MAX_ALIGN_BYTES:0),0> m_data;
   EIGEN_STRONG_INLINE Scalar* data() {
     return ForceAlignment
             ? reinterpret_cast<Scalar*>((internal::UIntPtr(m_data.array) & ~(std::size_t(EIGEN_MAX_ALIGN_BYTES-1))) + EIGEN_MAX_ALIGN_BYTES)
@@ -216,14 +219,13 @@ template<> struct gemv_dense_selector<OnTheRight,ColMajor,true>
     typedef typename Lhs::Scalar   LhsScalar;
     typedef typename Rhs::Scalar   RhsScalar;
     typedef typename Dest::Scalar  ResScalar;
-    typedef typename Dest::RealScalar  RealScalar;
     
     typedef internal::blas_traits<Lhs> LhsBlasTraits;
     typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
     typedef internal::blas_traits<Rhs> RhsBlasTraits;
     typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
   
-    typedef Map<Matrix<ResScalar,Dynamic,1>, EIGEN_PLAIN_ENUM_MIN(AlignedMax,internal::packet_traits<ResScalar>::size)> MappedDest;
+    typedef Map<Matrix<ResScalar,Dynamic,1>, plain_enum_min(AlignedMax, internal::packet_traits<ResScalar>::size)> MappedDest;
 
     ActualLhsType actualLhs = LhsBlasTraits::extract(lhs);
     ActualRhsType actualRhs = RhsBlasTraits::extract(rhs);
@@ -231,7 +233,7 @@ template<> struct gemv_dense_selector<OnTheRight,ColMajor,true>
     ResScalar actualAlpha = combine_scalar_factors(alpha, lhs, rhs);
 
     // make sure Dest is a compile-time vector type (bug 1166)
-    typedef typename conditional<Dest::IsVectorAtCompileTime, Dest, typename Dest::ColXpr>::type ActualDest;
+    typedef std::conditional_t<Dest::IsVectorAtCompileTime, Dest, typename Dest::ColXpr> ActualDest;
 
     enum {
       // FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
@@ -261,7 +263,7 @@ template<> struct gemv_dense_selector<OnTheRight,ColMajor,true>
     {
       gemv_static_vector_if<ResScalar,ActualDest::SizeAtCompileTime,ActualDest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest;
 
-      const bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0));
+      const bool alphaIsCompatible = (!ComplexByReal) || (numext::is_exactly_zero(numext::imag(actualAlpha)));
       const bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible;
 
       ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(),
@@ -314,10 +316,10 @@ template<> struct gemv_dense_selector<OnTheRight,RowMajor,true>
     typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
     typedef internal::blas_traits<Rhs> RhsBlasTraits;
     typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
-    typedef typename internal::remove_all<ActualRhsType>::type ActualRhsTypeCleaned;
+    typedef internal::remove_all_t<ActualRhsType> ActualRhsTypeCleaned;
 
-    typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs);
-    typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs);
+    std::add_const_t<ActualLhsType> actualLhs = LhsBlasTraits::extract(lhs);
+    std::add_const_t<ActualRhsType> actualRhs = RhsBlasTraits::extract(rhs);
 
     ResScalar actualAlpha = combine_scalar_factors(alpha, lhs, rhs);
 
diff --git a/libs/eigen/Eigen/src/Core/GenericPacketMath.h b/libs/eigen/Eigen/src/Core/GenericPacketMath.h
index cf677a1..af773dd 100644
--- a/libs/eigen/Eigen/src/Core/GenericPacketMath.h
+++ b/libs/eigen/Eigen/src/Core/GenericPacketMath.h
@@ -11,6 +11,8 @@
 #ifndef EIGEN_GENERIC_PACKET_MATH_H
 #define EIGEN_GENERIC_PACKET_MATH_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
@@ -57,12 +59,14 @@ struct default_packet_traits
     HasMax       = 1,
     HasConj      = 1,
     HasSetLinear = 1,
+    HasSign      = 1,
     HasBlend     = 0,
     // This flag is used to indicate whether packet comparison is supported.
     // pcmp_eq, pcmp_lt and pcmp_le should be defined for it to be true.
     HasCmp       = 0,
 
     HasDiv    = 0,
+    HasReciprocal = 0,
     HasSqrt   = 0,
     HasRsqrt  = 0,
     HasExp    = 0,
@@ -98,8 +102,7 @@ struct default_packet_traits
     HasRound  = 0,
     HasRint   = 0,
     HasFloor  = 0,
-    HasCeil   = 0,
-    HasSign   = 0
+    HasCeil   = 0
   };
 };
 
@@ -160,7 +163,7 @@ struct eigen_packet_wrapper
 {
   EIGEN_ALWAYS_INLINE operator T&() { return m_val; }
   EIGEN_ALWAYS_INLINE operator const T&() const { return m_val; }
-  EIGEN_ALWAYS_INLINE eigen_packet_wrapper() {}
+  EIGEN_ALWAYS_INLINE eigen_packet_wrapper() = default;
   EIGEN_ALWAYS_INLINE eigen_packet_wrapper(const T &v) : m_val(v) {}
   EIGEN_ALWAYS_INLINE eigen_packet_wrapper& operator=(const T &v) {
     m_val = v;
@@ -176,7 +179,7 @@ struct eigen_packet_wrapper
  */
 template<typename Packet>
 struct is_scalar {
-  typedef typename unpacket_traits<Packet>::type Scalar;
+  using Scalar = typename unpacket_traits<Packet>::type;
   enum {
     value = internal::is_same<Packet, Scalar>::value
   };
@@ -217,6 +220,15 @@ padd(const Packet& a, const Packet& b) { return a+b; }
 template<> EIGEN_DEVICE_FUNC inline bool
 padd(const bool& a, const bool& b) { return a || b; }
 
+/** \internal \returns a packet version of \a *from, (un-aligned masked add)
+ * There is no generic implementation. We only have implementations for specialized
+ * cases. Generic case should not be called.
+ */
+template<typename Packet> EIGEN_DEVICE_FUNC inline
+std::enable_if_t<unpacket_traits<Packet>::masked_fpops_available, Packet>
+padd(const Packet& a, const Packet& b, typename unpacket_traits<Packet>::mask_t umask);
+
+
 /** \internal \returns a - b (coeff-wise) */
 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 psub(const Packet& a, const Packet& b) { return a-b; }
@@ -259,7 +271,7 @@ struct ptrue_impl {
 // have another option, since the scalar type requires initialization.
 template<typename T>
 struct ptrue_impl<T, 
-    typename internal::enable_if<is_scalar<T>::value && NumTraits<T>::RequireInitialization>::type > {
+    std::enable_if_t<is_scalar<T>::value && NumTraits<T>::RequireInitialization> > {
   static EIGEN_DEVICE_FUNC inline T run(const T& /*a*/){
     return T(1);
   }
@@ -285,7 +297,7 @@ struct pzero_impl {
 // for zero may not consist of all-zero bits.
 template<typename T>
 struct pzero_impl<T,
-    typename internal::enable_if<is_scalar<T>::value>::type> {
+    std::enable_if_t<is_scalar<T>::value>> {
   static EIGEN_DEVICE_FUNC inline T run(const T& /*a*/) {
     return T(0);
   }
@@ -356,16 +368,16 @@ struct bytewise_bitwise_helper {
   EIGEN_DEVICE_FUNC static inline T bitwise_and(const T& a, const T& b) {
     return binary(a, b, bit_and<unsigned char>());
   }
-  EIGEN_DEVICE_FUNC static inline T bitwise_or(const T& a, const T& b) { 
+  EIGEN_DEVICE_FUNC static inline T bitwise_or(const T& a, const T& b) {
     return binary(a, b, bit_or<unsigned char>());
    }
   EIGEN_DEVICE_FUNC static inline T bitwise_xor(const T& a, const T& b) {
     return binary(a, b, bit_xor<unsigned char>());
   }
-  EIGEN_DEVICE_FUNC static inline T bitwise_not(const T& a) { 
+  EIGEN_DEVICE_FUNC static inline T bitwise_not(const T& a) {
     return unary(a,bit_not<unsigned char>());
    }
-  
+
  private:
   template<typename Op>
   EIGEN_DEVICE_FUNC static inline T unary(const T& a, Op op) {
@@ -398,8 +410,8 @@ struct bitwise_helper : public bytewise_bitwise_helper<T> {};
 // For integers or non-trivial scalars, use binary operators.
 template<typename T>
 struct bitwise_helper<T,
-  typename internal::enable_if<
-    is_scalar<T>::value && (NumTraits<T>::IsInteger || NumTraits<T>::RequireInitialization)>::type
+  typename std::enable_if_t<
+    is_scalar<T>::value && (NumTraits<T>::IsInteger || NumTraits<T>::RequireInitialization)>
   > : public operator_bitwise_helper<T> {};
 
 /** \internal \returns the bitwise and of \a a and \a b */
@@ -441,7 +453,7 @@ struct pselect_impl {
 // For scalars, use ternary select.
 template<typename Packet>
 struct pselect_impl<Packet, 
-    typename internal::enable_if<is_scalar<Packet>::value>::type > {
+    std::enable_if_t<is_scalar<Packet>::value> > {
   static EIGEN_DEVICE_FUNC inline Packet run(const Packet& mask, const Packet& a, const Packet& b) {
     return numext::equal_strict(mask, Packet(0)) ? b : a;
   }
@@ -551,13 +563,13 @@ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 parg(const Packet& a) { using numext::arg; return arg(a); }
 
 
-/** \internal \returns \a a logically shifted by N bits to the right */
+/** \internal \returns \a a arithmetically shifted by N bits to the right */
 template<int N> EIGEN_DEVICE_FUNC inline int
 parithmetic_shift_right(const int& a) { return a >> N; }
 template<int N> EIGEN_DEVICE_FUNC inline long int
 parithmetic_shift_right(const long int& a) { return a >> N; }
 
-/** \internal \returns \a a arithmetically shifted by N bits to the right */
+/** \internal \returns \a a logically shifted by N bits to the right */
 template<int N> EIGEN_DEVICE_FUNC inline int
 plogical_shift_right(const int& a) { return static_cast<int>(static_cast<unsigned int>(a) >> N); }
 template<int N> EIGEN_DEVICE_FUNC inline long int
@@ -594,20 +606,52 @@ pldexp(const Packet &a, const Packet &exponent) {
 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 pabsdiff(const Packet& a, const Packet& b) { return pselect(pcmp_lt(a, b), psub(b, a), psub(a, b)); }
 
-/** \internal \returns a packet version of \a *from, from must be 16 bytes aligned */
+/** \internal \returns a packet version of \a *from, from must be properly aligned */
 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 pload(const typename unpacket_traits<Packet>::type* from) { return *from; }
 
+/** \internal \returns n elements of a packet version of \a *from, from must be properly aligned
+  * offset indicates the starting element in which to load and
+  * offset + n <= unpacket_traits::size
+  * All elements before offset and after the last element loaded will initialized with zero */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pload_partial(const typename unpacket_traits<Packet>::type* from, const Index n, const Index offset = 0)
+{
+  const Index packet_size = unpacket_traits<Packet>::size;
+  eigen_assert(n + offset <= packet_size && "number of elements plus offset will read past end of packet");
+  typedef typename unpacket_traits<Packet>::type Scalar;
+  EIGEN_ALIGN_MAX Scalar elements[packet_size] = { Scalar(0) };
+  for (Index i = offset; i < numext::mini(n+offset,packet_size); i++) {
+    elements[i] = from[i-offset];
+  }
+  return pload<Packet>(elements);
+}
+
 /** \internal \returns a packet version of \a *from, (un-aligned load) */
 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 ploadu(const typename unpacket_traits<Packet>::type* from) { return *from; }
 
+/** \internal \returns n elements of a packet version of \a *from, (un-aligned load)
+  * All elements after the last element loaded will initialized with zero */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+ploadu_partial(const typename unpacket_traits<Packet>::type* from, const Index n)
+{
+  const Index packet_size = unpacket_traits<Packet>::size;
+  eigen_assert(n <= packet_size && "number of elements will read past end of packet");
+  typedef typename unpacket_traits<Packet>::type Scalar;
+  EIGEN_ALIGN_MAX Scalar elements[packet_size] = { Scalar(0) };
+  for (Index i = 0; i < numext::mini(n,packet_size); i++) {
+    elements[i] = from[i];
+  }
+  return pload<Packet>(elements);
+}
+
 /** \internal \returns a packet version of \a *from, (un-aligned masked load)
  * There is no generic implementation. We only have implementations for specialized
  * cases. Generic case should not be called.
  */
 template<typename Packet> EIGEN_DEVICE_FUNC inline
-typename enable_if<unpacket_traits<Packet>::masked_load_available, Packet>::type
+std::enable_if_t<unpacket_traits<Packet>::masked_load_available, Packet>
 ploadu(const typename unpacket_traits<Packet>::type* from, typename unpacket_traits<Packet>::mask_t umask);
 
 /** \internal \returns a packet with constant coefficients \a a, e.g.: (a,a,a,a) */
@@ -692,28 +736,74 @@ peven_mask(const Packet& /*a*/) {
 }
 
 
-/** \internal copy the packet \a from to \a *to, \a to must be 16 bytes aligned */
+/** \internal copy the packet \a from to \a *to, \a to must be properly aligned */
 template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstore(Scalar* to, const Packet& from)
 { (*to) = from; }
 
+/** \internal copy n elements of the packet \a from to \a *to, \a to must be properly aligned
+ * offset indicates the starting element in which to store and
+ * offset + n <= unpacket_traits::size */
+template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstore_partial(Scalar* to, const Packet& from, const Index n, const Index offset = 0)
+{
+  const Index packet_size = unpacket_traits<Packet>::size;
+  eigen_assert(n + offset <= packet_size && "number of elements plus offset will write past end of packet");
+  EIGEN_ALIGN_MAX Scalar elements[packet_size];
+  pstore<Scalar>(elements, from);
+  for (Index i = 0; i < numext::mini(n,packet_size-offset); i++) {
+    to[i] = elements[i + offset];
+  }
+}
+
 /** \internal copy the packet \a from to \a *to, (un-aligned store) */
 template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstoreu(Scalar* to, const Packet& from)
 {  (*to) = from; }
 
+/** \internal copy n elements of the packet \a from to \a *to, (un-aligned store) */
+template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstoreu_partial(Scalar* to, const Packet& from, const Index n)
+{
+  const Index packet_size = unpacket_traits<Packet>::size;
+  eigen_assert(n <= packet_size && "number of elements will write past end of packet");
+  EIGEN_ALIGN_MAX Scalar elements[packet_size];
+  pstore<Scalar>(elements, from);
+  for (Index i = 0; i < numext::mini(n,packet_size); i++) {
+    to[i] = elements[i];
+  }
+}
+
 /** \internal copy the packet \a from to \a *to, (un-aligned store with a mask)
  * There is no generic implementation. We only have implementations for specialized
  * cases. Generic case should not be called.
  */
 template<typename Scalar, typename Packet>
 EIGEN_DEVICE_FUNC inline
-typename enable_if<unpacket_traits<Packet>::masked_store_available, void>::type
+std::enable_if_t<unpacket_traits<Packet>::masked_store_available, void>
 pstoreu(Scalar* to, const Packet& from, typename unpacket_traits<Packet>::mask_t umask);
 
- template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline Packet pgather(const Scalar* from, Index /*stride*/)
- { return ploadu<Packet>(from); }
+template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline Packet pgather(const Scalar* from, Index /*stride*/)
+{ return ploadu<Packet>(from); }
 
- template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pscatter(Scalar* to, const Packet& from, Index /*stride*/)
- { pstore(to, from); }
+template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline Packet pgather_partial(const Scalar* from, Index stride, const Index n)
+{
+  const Index packet_size = unpacket_traits<Packet>::size;
+  EIGEN_ALIGN_MAX Scalar elements[packet_size] = { Scalar(0) };
+  for (Index i = 0; i < numext::mini(n,packet_size); i++) {
+    elements[i] = from[i*stride];
+  }
+  return pload<Packet>(elements);
+}
+
+template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pscatter(Scalar* to, const Packet& from, Index /*stride*/)
+{ pstore(to, from); }
+
+template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pscatter_partial(Scalar* to, const Packet& from, Index stride, const Index n)
+{
+  const Index packet_size = unpacket_traits<Packet>::size;
+  EIGEN_ALIGN_MAX Scalar elements[packet_size];
+  pstore<Scalar>(elements, from);
+  for (Index i = 0; i < numext::mini(n,packet_size); i++) {
+    to[i*stride] = elements[i];
+  }
+}
 
 /** \internal tries to do cache prefetching of \a addr */
 template<typename Scalar> EIGEN_DEVICE_FUNC inline void prefetch(const Scalar* addr)
@@ -807,20 +897,13 @@ Packet plog10(const Packet& a) { EIGEN_USING_STD(log10); return log10(a); }
 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet plog2(const Packet& a) {
   typedef typename internal::unpacket_traits<Packet>::type Scalar;
-  return pmul(pset1<Packet>(Scalar(EIGEN_LOG2E)), plog(a)); 
+  return pmul(pset1<Packet>(Scalar(EIGEN_LOG2E)), plog(a));
 }
 
 /** \internal \returns the square-root of \a a (coeff-wise) */
 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet psqrt(const Packet& a) { return numext::sqrt(a); }
 
-/** \internal \returns the reciprocal square-root of \a a (coeff-wise) */
-template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
-Packet prsqrt(const Packet& a) {
-  typedef typename internal::unpacket_traits<Packet>::type Scalar;
-  return pdiv(pset1<Packet>(Scalar(1)), psqrt(a));
-}
-
 /** \internal \returns the rounded value of \a a (coeff-wise) */
 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet pround(const Packet& a) { using numext::round; return round(a); }
@@ -838,6 +921,24 @@ Packet print(const Packet& a) { using numext::rint; return rint(a); }
 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet pceil(const Packet& a) { using numext::ceil; return ceil(a); }
 
+template<typename Packet, typename EnableIf = void>
+struct psign_impl {
+  static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a) {
+    return numext::sign(a);
+  }
+};
+
+/** \internal \returns the sign of \a a (coeff-wise) */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+psign(const Packet& a) {
+  return psign_impl<Packet>::run(a);
+}
+
+template<> EIGEN_DEVICE_FUNC inline bool
+psign(const bool& a) {
+  return a;
+}
+
 /** \internal \returns the first element of a packet */
 template<typename Packet>
 EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type
@@ -849,7 +950,7 @@ pfirst(const Packet& a)
   * For packet-size smaller or equal to 4, this boils down to a noop.
   */
 template<typename Packet>
-EIGEN_DEVICE_FUNC inline typename conditional<(unpacket_traits<Packet>::size%8)==0,typename unpacket_traits<Packet>::half,Packet>::type
+EIGEN_DEVICE_FUNC inline std::conditional_t<(unpacket_traits<Packet>::size%8)==0,typename unpacket_traits<Packet>::half,Packet>
 predux_half_dowto4(const Packet& a)
 { return a; }
 
@@ -881,7 +982,7 @@ predux(const Packet& a)
 template <typename Packet>
 EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_mul(
     const Packet& a) {
-  typedef typename unpacket_traits<Packet>::type Scalar; 
+  typedef typename unpacket_traits<Packet>::type Scalar;
   return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmul<Scalar>)));
 }
 
@@ -889,14 +990,14 @@ EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_mul(
 template <typename Packet>
 EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(
     const Packet &a) {
-  typedef typename unpacket_traits<Packet>::type Scalar; 
+  typedef typename unpacket_traits<Packet>::type Scalar;
   return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmin<PropagateFast, Scalar>)));
 }
 
 template <int NaNPropagation, typename Packet>
 EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(
     const Packet& a) {
-  typedef typename unpacket_traits<Packet>::type Scalar; 
+  typedef typename unpacket_traits<Packet>::type Scalar;
   return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmin<NaNPropagation, Scalar>)));
 }
 
@@ -904,14 +1005,14 @@ EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(
 template <typename Packet>
 EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(
     const Packet &a) {
-  typedef typename unpacket_traits<Packet>::type Scalar; 
+  typedef typename unpacket_traits<Packet>::type Scalar;
   return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmax<PropagateFast, Scalar>)));
 }
 
 template <int NaNPropagation, typename Packet>
 EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(
     const Packet& a) {
-  typedef typename unpacket_traits<Packet>::type Scalar; 
+  typedef typename unpacket_traits<Packet>::type Scalar;
   return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmax<NaNPropagation, Scalar>)));
 }
 
@@ -943,6 +1044,35 @@ template<typename Packet> EIGEN_DEVICE_FUNC inline bool predux_any(const Packet&
 * The following functions might not have to be overwritten for vectorized types
 ***************************************************************************/
 
+// FMA instructions.
+/** \internal \returns a * b + c (coeff-wise) */
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline Packet pmadd(const Packet& a, const Packet& b,
+                                      const Packet& c) {
+  return padd(pmul(a, b), c);
+}
+
+/** \internal \returns a * b - c (coeff-wise) */
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline Packet pmsub(const Packet& a, const Packet& b,
+                                      const Packet& c) {
+  return psub(pmul(a, b), c);
+}
+
+/** \internal \returns -(a * b) + c (coeff-wise) */
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline Packet pnmadd(const Packet& a, const Packet& b,
+                                       const Packet& c) {
+  return padd(pnegate(pmul(a, b)), c);
+}
+
+/** \internal \returns -(a * b) - c (coeff-wise) */
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline Packet pnmsub(const Packet& a, const Packet& b,
+                                       const Packet& c) {
+  return psub(pnegate(pmul(a, b)), c);
+}
+
 /** \internal copy a packet with constant coefficient \a a (e.g., [a,a,a,a]) to \a *to. \a to must be 16 bytes aligned */
 // NOTE: this function must really be templated on the packet type (think about different packet types for the same scalar type)
 template<typename Packet>
@@ -951,13 +1081,6 @@ inline void pstore1(typename unpacket_traits<Packet>::type* to, const typename u
   pstore(to, pset1<Packet>(a));
 }
 
-/** \internal \returns a * b + c (coeff-wise) */
-template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
-pmadd(const Packet&  a,
-         const Packet&  b,
-         const Packet&  c)
-{ return padd(pmul(a, b),c); }
-
 /** \internal \returns a packet version of \a *from.
   * The pointer \a from must be aligned on a \a Alignment bytes boundary. */
 template<typename Packet, int Alignment>
@@ -969,6 +1092,17 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt(const typename unpacket_trai
     return ploadu<Packet>(from);
 }
 
+/** \internal \returns n elements of a packet version of \a *from.
+  * The pointer \a from must be aligned on a \a Alignment bytes boundary. */
+template<typename Packet, int Alignment>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt_partial(const typename unpacket_traits<Packet>::type* from, const Index n, const Index offset = 0)
+{
+  if(Alignment >= unpacket_traits<Packet>::alignment)
+    return pload_partial<Packet>(from, n, offset);
+  else
+    return ploadu_partial<Packet>(from, n);
+}
+
 /** \internal copy the packet \a from to \a *to.
   * The pointer \a from must be aligned on a \a Alignment bytes boundary. */
 template<typename Scalar, typename Packet, int Alignment>
@@ -980,6 +1114,17 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(Scalar* to, const Packet& fro
     pstoreu(to, from);
 }
 
+/** \internal copy n elements of the packet \a from to \a *to.
+  * The pointer \a from must be aligned on a \a Alignment bytes boundary. */
+template<typename Scalar, typename Packet, int Alignment>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret_partial(Scalar* to, const Packet& from, const Index n, const Index offset = 0)
+{
+  if(Alignment >= unpacket_traits<Packet>::alignment)
+    pstore_partial(to, from, n, offset);
+  else
+    pstoreu_partial(to, from, n);
+}
+
 /** \internal \returns a packet version of \a *from.
   * Unlike ploadt, ploadt_ro takes advantage of the read-only memory path on the
   * hardware if available to speedup the loading of data that won't be modified
@@ -1033,6 +1178,47 @@ pblend(const Selector<unpacket_traits<Packet>::size>& ifPacket, const Packet& th
   return ifPacket.select[0] ? thenPacket : elsePacket;
 }
 
+/** \internal \returns 1 / a (coeff-wise) */
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline Packet preciprocal(const Packet& a) {
+  using Scalar = typename unpacket_traits<Packet>::type;
+  return pdiv(pset1<Packet>(Scalar(1)), a);
+}
+
+/** \internal \returns the reciprocal square-root of \a a (coeff-wise) */
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet prsqrt(const Packet& a) {
+  return preciprocal<Packet>(psqrt(a));
+}
+
+template <typename Packet, bool IsScalar = is_scalar<Packet>::value,
+    bool IsInteger = NumTraits<typename unpacket_traits<Packet>::type>::IsInteger>
+    struct psignbit_impl;
+template <typename Packet, bool IsInteger>
+struct psignbit_impl<Packet, true, IsInteger> {
+     EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static constexpr Packet run(const Packet& a) { return numext::signbit(a); }
+};
+template <typename Packet>
+struct psignbit_impl<Packet, false, false> {
+    // generic implementation if not specialized in PacketMath.h
+    // slower than arithmetic shift
+    typedef typename unpacket_traits<Packet>::type Scalar;
+    EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static Packet run(const Packet& a) {
+        const Packet cst_pos_one = pset1<Packet>(Scalar(1));
+        const Packet cst_neg_one = pset1<Packet>(Scalar(-1));
+        return pcmp_eq(por(pand(a, cst_neg_one), cst_pos_one), cst_neg_one);
+    }
+};
+template <typename Packet>
+struct psignbit_impl<Packet, false, true> {
+    // generic implementation for integer packets
+    EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static constexpr Packet run(const Packet& a) { return pcmp_lt(a, pzero(a)); }
+};
+/** \internal \returns the sign bit of \a a as a bitmask*/
+template <typename Packet>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE constexpr Packet
+psignbit(const Packet& a) { return psignbit_impl<Packet>::run(a); }
+
 } // end namespace internal
 
 } // end namespace Eigen
diff --git a/libs/eigen/Eigen/src/Core/GlobalFunctions.h b/libs/eigen/Eigen/src/Core/GlobalFunctions.h
index 629af94..18792cb 100644
--- a/libs/eigen/Eigen/src/Core/GlobalFunctions.h
+++ b/libs/eigen/Eigen/src/Core/GlobalFunctions.h
@@ -51,6 +51,8 @@
     } \
   };
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen
 {
   EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(real,scalar_real_op,real part,\sa ArrayBase::real)
@@ -66,11 +68,9 @@ namespace Eigen
   EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sinh,scalar_sinh_op,hyperbolic sine,\sa ArrayBase::sinh)
   EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cosh,scalar_cosh_op,hyperbolic cosine,\sa ArrayBase::cosh)
   EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tanh,scalar_tanh_op,hyperbolic tangent,\sa ArrayBase::tanh)
-#if EIGEN_HAS_CXX11_MATH
   EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(asinh,scalar_asinh_op,inverse hyperbolic sine,\sa ArrayBase::asinh)
   EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(acosh,scalar_acosh_op,inverse hyperbolic cosine,\sa ArrayBase::acosh)
   EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(atanh,scalar_atanh_op,inverse hyperbolic tangent,\sa ArrayBase::atanh)
-#endif
   EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(logistic,scalar_logistic_op,logistic function,\sa ArrayBase::logistic)
   EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(lgamma,scalar_lgamma_op,natural logarithm of the gamma function,\sa ArrayBase::lgamma)
   EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(digamma,scalar_digamma_op,derivative of lgamma,\sa ArrayBase::digamma)
@@ -99,31 +99,31 @@ namespace Eigen
   EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isfinite,scalar_isfinite_op,finite value test,\sa Eigen::isinf DOXCOMMA Eigen::isnan DOXCOMMA ArrayBase::isfinite)
   EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sign,scalar_sign_op,sign (or 0),\sa ArrayBase::sign)
 
+  template <typename Derived, typename ScalarExponent>
+  using GlobalUnaryPowReturnType = std::enable_if_t<
+      !internal::is_arithmetic<typename NumTraits<Derived>::Real>::value &&
+          internal::is_arithmetic<typename NumTraits<ScalarExponent>::Real>::value,
+      CwiseUnaryOp<internal::scalar_unary_pow_op<typename Derived::Scalar, ScalarExponent>, const Derived> >;
+
   /** \returns an expression of the coefficient-wise power of \a x to the given constant \a exponent.
-    *
-    * \tparam ScalarExponent is the scalar type of \a exponent. It must be compatible with the scalar type of the given expression (\c Derived::Scalar).
-    *
-    * \sa ArrayBase::pow()
-    *
-    * \relates ArrayBase
-    */
+   *
+   * \tparam ScalarExponent is the scalar type of \a exponent. It must be compatible with the scalar type of the given
+   * expression (\c Derived::Scalar).
+   *
+   * \sa ArrayBase::pow()
+   *
+   * \relates ArrayBase
+   */
 #ifdef EIGEN_PARSED_BY_DOXYGEN
-  template<typename Derived,typename ScalarExponent>
-  inline const CwiseBinaryOp<internal::scalar_pow_op<Derived::Scalar,ScalarExponent>,Derived,Constant<ScalarExponent> >
-  pow(const Eigen::ArrayBase<Derived>& x, const ScalarExponent& exponent);
+  template <typename Derived, typename ScalarExponent>
+  EIGEN_DEVICE_FUNC inline const GlobalUnaryPowReturnType<Derived, ScalarExponent> pow(
+      const Eigen::ArrayBase<Derived>& x, const ScalarExponent& exponent);
 #else
-  template <typename Derived,typename ScalarExponent>
-  EIGEN_DEVICE_FUNC inline
-  EIGEN_MSVC10_WORKAROUND_BINARYOP_RETURN_TYPE(
-    const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,typename internal::promote_scalar_arg<typename Derived::Scalar
-                                                 EIGEN_COMMA ScalarExponent EIGEN_COMMA
-                                                 EIGEN_SCALAR_BINARY_SUPPORTED(pow,typename Derived::Scalar,ScalarExponent)>::type,pow))
-  pow(const Eigen::ArrayBase<Derived>& x, const ScalarExponent& exponent)
-  {
-    typedef typename internal::promote_scalar_arg<typename Derived::Scalar,ScalarExponent,
-                                                  EIGEN_SCALAR_BINARY_SUPPORTED(pow,typename Derived::Scalar,ScalarExponent)>::type PromotedExponent;
-    return EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,PromotedExponent,pow)(x.derived(),
-           typename internal::plain_constant_type<Derived,PromotedExponent>::type(x.derived().rows(), x.derived().cols(), internal::scalar_constant_op<PromotedExponent>(exponent)));
+  template <typename Derived, typename ScalarExponent>
+  EIGEN_DEVICE_FUNC inline const GlobalUnaryPowReturnType<Derived, ScalarExponent> pow(
+      const Eigen::ArrayBase<Derived>& x, const ScalarExponent& exponent) {
+    return GlobalUnaryPowReturnType<Derived, ScalarExponent>(
+        x.derived(), internal::scalar_unary_pow_op<typename Derived::Scalar, ScalarExponent>(exponent));
   }
 #endif
 
@@ -168,10 +168,9 @@ namespace Eigen
 #else
   template <typename Scalar, typename Derived>
   EIGEN_DEVICE_FUNC inline
-  EIGEN_MSVC10_WORKAROUND_BINARYOP_RETURN_TYPE(
     const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(typename internal::promote_scalar_arg<typename Derived::Scalar
                                                  EIGEN_COMMA Scalar EIGEN_COMMA
-                                                 EIGEN_SCALAR_BINARY_SUPPORTED(pow,Scalar,typename Derived::Scalar)>::type,Derived,pow))
+                                                 EIGEN_SCALAR_BINARY_SUPPORTED(pow,Scalar,typename Derived::Scalar)>::type,Derived,pow)
   pow(const Scalar& x, const Eigen::ArrayBase<Derived>& exponents) {
     typedef typename internal::promote_scalar_arg<typename Derived::Scalar,Scalar,
                                                   EIGEN_SCALAR_BINARY_SUPPORTED(pow,Scalar,typename Derived::Scalar)>::type PromotedScalar;
@@ -180,6 +179,25 @@ namespace Eigen
   }
 #endif
 
+  /** \returns an expression of the coefficient-wise atan2(\a x, \a y). \a x and \a y must be of the same type.
+    *
+    * This function computes the coefficient-wise atan2().
+    *
+    * \sa ArrayBase::atan2()
+    *
+    * \relates ArrayBase
+    */
+  template <typename LhsDerived, typename RhsDerived>
+  inline const std::enable_if_t<
+      std::is_same<typename LhsDerived::Scalar, typename RhsDerived::Scalar>::value,
+      Eigen::CwiseBinaryOp<Eigen::internal::scalar_atan2_op<typename LhsDerived::Scalar, typename RhsDerived::Scalar>, const LhsDerived, const RhsDerived>
+      >
+  atan2(const Eigen::ArrayBase<LhsDerived>& x, const Eigen::ArrayBase<RhsDerived>& exponents) {
+    return Eigen::CwiseBinaryOp<Eigen::internal::scalar_atan2_op<typename LhsDerived::Scalar, typename RhsDerived::Scalar>, const LhsDerived, const RhsDerived>(
+      x.derived(),
+      exponents.derived()
+    );
+  }
 
   namespace internal
   {
diff --git a/libs/eigen/Eigen/src/Core/IO.h b/libs/eigen/Eigen/src/Core/IO.h
index e81c315..897d7b0 100644
--- a/libs/eigen/Eigen/src/Core/IO.h
+++ b/libs/eigen/Eigen/src/Core/IO.h
@@ -11,6 +11,8 @@
 #ifndef EIGEN_IO_H
 #define EIGEN_IO_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 enum { DontAlignCols = 1 };
@@ -131,7 +133,6 @@ template<typename Derived>
 std::ostream & print_matrix(std::ostream & s, const Derived& _m, const IOFormat& fmt)
 {
   using internal::is_same;
-  using internal::conditional;
 
   if(_m.size() == 0)
   {
@@ -141,22 +142,21 @@ std::ostream & print_matrix(std::ostream & s, const Derived& _m, const IOFormat&
   
   typename Derived::Nested m = _m;
   typedef typename Derived::Scalar Scalar;
-  typedef typename
-      conditional<
+  typedef std::conditional_t<
           is_same<Scalar, char>::value ||
             is_same<Scalar, unsigned char>::value ||
             is_same<Scalar, numext::int8_t>::value ||
             is_same<Scalar, numext::uint8_t>::value,
           int,
-          typename conditional<
+          std::conditional_t<
               is_same<Scalar, std::complex<char> >::value ||
                 is_same<Scalar, std::complex<unsigned char> >::value ||
                 is_same<Scalar, std::complex<numext::int8_t> >::value ||
                 is_same<Scalar, std::complex<numext::uint8_t> >::value,
               std::complex<int>,
               const Scalar&
-            >::type
-        >::type PrintType;
+            >
+        > PrintType;
 
   Index width = 0;
 
diff --git a/libs/eigen/Eigen/src/Core/IndexedView.h b/libs/eigen/Eigen/src/Core/IndexedView.h
index 0847625..f967301 100644
--- a/libs/eigen/Eigen/src/Core/IndexedView.h
+++ b/libs/eigen/Eigen/src/Core/IndexedView.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_INDEXED_VIEW_H
 #define EIGEN_INDEXED_VIEW_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
@@ -21,8 +23,8 @@ struct traits<IndexedView<XprType, RowIndices, ColIndices> >
   enum {
     RowsAtCompileTime = int(array_size<RowIndices>::value),
     ColsAtCompileTime = int(array_size<ColIndices>::value),
-    MaxRowsAtCompileTime = RowsAtCompileTime != Dynamic ? int(RowsAtCompileTime) : Dynamic,
-    MaxColsAtCompileTime = ColsAtCompileTime != Dynamic ? int(ColsAtCompileTime) : Dynamic,
+    MaxRowsAtCompileTime = RowsAtCompileTime,
+    MaxColsAtCompileTime = ColsAtCompileTime,
 
     XprTypeIsRowMajor = (int(traits<XprType>::Flags)&RowMajorBit) != 0,
     IsRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1
@@ -40,10 +42,10 @@ struct traits<IndexedView<XprType, RowIndices, ColIndices> >
 
     InnerSize = XprTypeIsRowMajor ? ColsAtCompileTime : RowsAtCompileTime,
     IsBlockAlike = InnerIncr==1 && OuterIncr==1,
-    IsInnerPannel = HasSameStorageOrderAsXprType && is_same<AllRange<InnerSize>,typename conditional<XprTypeIsRowMajor,ColIndices,RowIndices>::type>::value,
+    IsInnerPannel = HasSameStorageOrderAsXprType && is_same<AllRange<InnerSize>,std::conditional_t<XprTypeIsRowMajor,ColIndices,RowIndices>>::value,
 
-    InnerStrideAtCompileTime = InnerIncr<0 || InnerIncr==DynamicIndex || XprInnerStride==Dynamic ? Dynamic : XprInnerStride * InnerIncr,
-    OuterStrideAtCompileTime = OuterIncr<0 || OuterIncr==DynamicIndex || XprOuterstride==Dynamic ? Dynamic : XprOuterstride * OuterIncr,
+    InnerStrideAtCompileTime = InnerIncr<0 || InnerIncr==DynamicIndex || XprInnerStride==Dynamic || InnerIncr==UndefinedIncr ? Dynamic : XprInnerStride * InnerIncr,
+    OuterStrideAtCompileTime = OuterIncr<0 || OuterIncr==DynamicIndex || XprOuterstride==Dynamic || OuterIncr==UndefinedIncr ? Dynamic : XprOuterstride * OuterIncr,
 
     ReturnAsScalar = is_same<RowIndices,SingleRange>::value && is_same<ColIndices,SingleRange>::value,
     ReturnAsBlock = (!ReturnAsScalar) && IsBlockAlike,
@@ -96,7 +98,7 @@ class IndexedViewImpl;
   *  - decltype(ArrayXi::LinSpaced(...))
   *  - Any view/expressions of the previous types
   *  - Eigen::ArithmeticSequence
-  *  - Eigen::internal::AllRange      (helper for Eigen::all)
+  *  - Eigen::internal::AllRange     (helper for Eigen::placeholders::all)
   *  - Eigen::internal::SingleRange  (helper for single index)
   *  - etc.
   *
@@ -114,7 +116,7 @@ public:
   EIGEN_INHERIT_ASSIGNMENT_OPERATORS(IndexedView)
 
   typedef typename internal::ref_selector<XprType>::non_const_type MatrixTypeNested;
-  typedef typename internal::remove_all<XprType>::type NestedExpression;
+  typedef internal::remove_all_t<XprType> NestedExpression;
 
   template<typename T0, typename T1>
   IndexedView(XprType& xpr, const T0& rowIndices, const T1& colIndices)
@@ -122,17 +124,17 @@ public:
   {}
 
   /** \returns number of rows */
-  Index rows() const { return internal::size(m_rowIndices); }
+  Index rows() const { return internal::index_list_size(m_rowIndices); }
 
   /** \returns number of columns */
-  Index cols() const { return internal::size(m_colIndices); }
+  Index cols() const { return internal::index_list_size(m_colIndices); }
 
   /** \returns the nested expression */
-  const typename internal::remove_all<XprType>::type&
+  const internal::remove_all_t<XprType>&
   nestedExpression() const { return m_xpr; }
 
   /** \returns the nested expression */
-  typename internal::remove_reference<XprType>::type&
+  std::remove_reference_t<XprType>&
   nestedExpression() { return m_xpr; }
 
   /** \returns a const reference to the object storing/generating the row indices */
@@ -189,12 +191,16 @@ struct unary_evaluator<IndexedView<ArgType, RowIndices, ColIndices>, IndexBased>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   CoeffReturnType coeff(Index row, Index col) const
   {
+    eigen_assert(m_xpr.rowIndices()[row] >= 0 && m_xpr.rowIndices()[row] < m_xpr.nestedExpression().rows()
+                 && m_xpr.colIndices()[col] >= 0 && m_xpr.colIndices()[col] < m_xpr.nestedExpression().cols());
     return m_argImpl.coeff(m_xpr.rowIndices()[row], m_xpr.colIndices()[col]);
   }
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   Scalar& coeffRef(Index row, Index col)
   {
+    eigen_assert(m_xpr.rowIndices()[row] >= 0 && m_xpr.rowIndices()[row] < m_xpr.nestedExpression().rows()
+                 && m_xpr.colIndices()[col] >= 0 && m_xpr.colIndices()[col] < m_xpr.nestedExpression().cols());
     return m_argImpl.coeffRef(m_xpr.rowIndices()[row], m_xpr.colIndices()[col]);
   }
 
@@ -204,6 +210,8 @@ struct unary_evaluator<IndexedView<ArgType, RowIndices, ColIndices>, IndexBased>
     EIGEN_STATIC_ASSERT_LVALUE(XprType)
     Index row = XprType::RowsAtCompileTime == 1 ? 0 : index;
     Index col = XprType::RowsAtCompileTime == 1 ? index : 0;
+    eigen_assert(m_xpr.rowIndices()[row] >= 0 && m_xpr.rowIndices()[row] < m_xpr.nestedExpression().rows()
+                 && m_xpr.colIndices()[col] >= 0 && m_xpr.colIndices()[col] < m_xpr.nestedExpression().cols());
     return m_argImpl.coeffRef( m_xpr.rowIndices()[row], m_xpr.colIndices()[col]);
   }
 
@@ -212,6 +220,8 @@ struct unary_evaluator<IndexedView<ArgType, RowIndices, ColIndices>, IndexBased>
   {
     Index row = XprType::RowsAtCompileTime == 1 ? 0 : index;
     Index col = XprType::RowsAtCompileTime == 1 ? index : 0;
+    eigen_assert(m_xpr.rowIndices()[row] >= 0 && m_xpr.rowIndices()[row] < m_xpr.nestedExpression().rows()
+                 && m_xpr.colIndices()[col] >= 0 && m_xpr.colIndices()[col] < m_xpr.nestedExpression().cols());
     return m_argImpl.coeffRef( m_xpr.rowIndices()[row], m_xpr.colIndices()[col]);
   }
 
@@ -220,6 +230,8 @@ struct unary_evaluator<IndexedView<ArgType, RowIndices, ColIndices>, IndexBased>
   {
     Index row = XprType::RowsAtCompileTime == 1 ? 0 : index;
     Index col = XprType::RowsAtCompileTime == 1 ? index : 0;
+    eigen_assert(m_xpr.rowIndices()[row] >= 0 && m_xpr.rowIndices()[row] < m_xpr.nestedExpression().rows()
+                 && m_xpr.colIndices()[col] >= 0 && m_xpr.colIndices()[col] < m_xpr.nestedExpression().cols());
     return m_argImpl.coeff( m_xpr.rowIndices()[row], m_xpr.colIndices()[col]);
   }
 
diff --git a/libs/eigen/Eigen/src/Core/InternalHeaderCheck.h b/libs/eigen/Eigen/src/Core/InternalHeaderCheck.h
new file mode 100644
index 0000000..1cea572
--- /dev/null
+++ b/libs/eigen/Eigen/src/Core/InternalHeaderCheck.h
@@ -0,0 +1,3 @@
+#ifndef EIGEN_CORE_MODULE_H
+#error "Please include Eigen/Core instead of including headers inside the src directory directly."
+#endif
diff --git a/libs/eigen/Eigen/src/Core/Inverse.h b/libs/eigen/Eigen/src/Core/Inverse.h
index c514438..9c70733 100644
--- a/libs/eigen/Eigen/src/Core/Inverse.h
+++ b/libs/eigen/Eigen/src/Core/Inverse.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_INVERSE_H
 #define EIGEN_INVERSE_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 template<typename XprType,typename StorageKind> class InverseImpl;
@@ -46,9 +48,9 @@ public:
   typedef typename XprType::StorageIndex StorageIndex;
   typedef typename XprType::Scalar                            Scalar;
   typedef typename internal::ref_selector<XprType>::type      XprTypeNested;
-  typedef typename internal::remove_all<XprTypeNested>::type  XprTypeNestedCleaned;
+  typedef internal::remove_all_t<XprTypeNested>  XprTypeNestedCleaned;
   typedef typename internal::ref_selector<Inverse>::type Nested;
-  typedef typename internal::remove_all<XprType>::type NestedExpression;
+  typedef internal::remove_all_t<XprType> NestedExpression;
 
   explicit EIGEN_DEVICE_FUNC Inverse(const XprType &xpr)
     : m_xpr(xpr)
@@ -102,7 +104,7 @@ struct unary_evaluator<Inverse<ArgType> >
   unary_evaluator(const InverseType& inv_xpr)
     : m_result(inv_xpr.rows(), inv_xpr.cols())
   {
-    ::new (static_cast<Base*>(this)) Base(m_result);
+    internal::construct_at<Base>(this, m_result);
     internal::call_assignment_no_alias(m_result, inv_xpr);
   }
 
diff --git a/libs/eigen/Eigen/src/Core/Map.h b/libs/eigen/Eigen/src/Core/Map.h
index 218cc15..56d1ff8 100644
--- a/libs/eigen/Eigen/src/Core/Map.h
+++ b/libs/eigen/Eigen/src/Core/Map.h
@@ -11,6 +11,8 @@
 #ifndef EIGEN_MAP_H
 #define EIGEN_MAP_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
@@ -129,7 +131,6 @@ template<typename PlainObjectType, int MapOptions, typename StrideType> class Ma
     explicit inline Map(PointerArgType dataPtr, const StrideType& stride = StrideType())
       : Base(cast_to_pointer_type(dataPtr)), m_stride(stride)
     {
-      PlainObjectType::Base::_check_template_params();
     }
 
     /** Constructor in the dynamic-size vector case.
@@ -142,7 +143,6 @@ template<typename PlainObjectType, int MapOptions, typename StrideType> class Ma
     inline Map(PointerArgType dataPtr, Index size, const StrideType& stride = StrideType())
       : Base(cast_to_pointer_type(dataPtr), size), m_stride(stride)
     {
-      PlainObjectType::Base::_check_template_params();
     }
 
     /** Constructor in the dynamic-size matrix case.
@@ -156,7 +156,6 @@ template<typename PlainObjectType, int MapOptions, typename StrideType> class Ma
     inline Map(PointerArgType dataPtr, Index rows, Index cols, const StrideType& stride = StrideType())
       : Base(cast_to_pointer_type(dataPtr), rows, cols), m_stride(stride)
     {
-      PlainObjectType::Base::_check_template_params();
     }
 
     EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Map)
diff --git a/libs/eigen/Eigen/src/Core/MapBase.h b/libs/eigen/Eigen/src/Core/MapBase.h
index d856447..bf8c163 100644
--- a/libs/eigen/Eigen/src/Core/MapBase.h
+++ b/libs/eigen/Eigen/src/Core/MapBase.h
@@ -15,6 +15,8 @@
       EIGEN_STATIC_ASSERT((int(internal::evaluator<Derived>::Flags) & LinearAccessBit) || Derived::IsVectorAtCompileTime, \
                           YOU_ARE_TRYING_TO_USE_AN_INDEX_BASED_ACCESSOR_ON_AN_EXPRESSION_THAT_DOES_NOT_SUPPORT_THAT)
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /** \ingroup Core_Module
@@ -51,11 +53,11 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
     typedef typename internal::traits<Derived>::Scalar Scalar;
     typedef typename internal::packet_traits<Scalar>::type PacketScalar;
     typedef typename NumTraits<Scalar>::Real RealScalar;
-    typedef typename internal::conditional<
-                         bool(internal::is_lvalue<Derived>::value),
-                         Scalar *,
-                         const Scalar *>::type
-                     PointerType;
+    typedef std::conditional_t<
+                bool(internal::is_lvalue<Derived>::value),
+                Scalar *,
+                const Scalar *>
+            PointerType;
 
     using Base::derived;
 //    using Base::RowsAtCompileTime;
@@ -189,7 +191,7 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
 
     template<typename T>
     EIGEN_DEVICE_FUNC
-    void checkSanity(typename internal::enable_if<(internal::traits<T>::Alignment>0),void*>::type = 0) const
+    void checkSanity(std::enable_if_t<(internal::traits<T>::Alignment>0),void*> = 0) const
     {
 #if EIGEN_MAX_ALIGN_BYTES>0
       // innerStride() is not set yet when this function is called, so we optimistically assume the lowest plausible value:
@@ -202,7 +204,7 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
 
     template<typename T>
     EIGEN_DEVICE_FUNC
-    void checkSanity(typename internal::enable_if<internal::traits<T>::Alignment==0,void*>::type = 0) const
+    void checkSanity(std::enable_if_t<internal::traits<T>::Alignment==0,void*> = 0) const
     {}
 
     PointerType m_data;
@@ -245,11 +247,11 @@ template<typename Derived> class MapBase<Derived, WriteAccessors>
     using Base::rowStride;
     using Base::colStride;
 
-    typedef typename internal::conditional<
+    typedef std::conditional_t<
                     internal::is_lvalue<Derived>::value,
                     Scalar,
                     const Scalar
-                  >::type ScalarWithConstIfNotLvalue;
+                  > ScalarWithConstIfNotLvalue;
 
     EIGEN_DEVICE_FUNC
     inline const Scalar* data() const { return this->m_data; }
diff --git a/libs/eigen/Eigen/src/Core/MathFunctions.h b/libs/eigen/Eigen/src/Core/MathFunctions.h
index 61b78f4..b194353 100644
--- a/libs/eigen/Eigen/src/Core/MathFunctions.h
+++ b/libs/eigen/Eigen/src/Core/MathFunctions.h
@@ -17,16 +17,9 @@
 #define EIGEN_LOG2E 1.442695040888963407359924681001892137426645954152985934135449406931109219L
 #define EIGEN_LN2   0.693147180559945309417232121458176568075500134360255254120680009493393621L
 
-namespace Eigen {
+#include "./InternalHeaderCheck.h"
 
-// On WINCE, std::abs is defined for int only, so let's defined our own overloads:
-// This issue has been confirmed with MSVC 2008 only, but the issue might exist for more recent versions too.
-#if EIGEN_OS_WINCE && EIGEN_COMP_MSVC && EIGEN_COMP_MSVC<=1500
-long        abs(long        x) { return (labs(x));  }
-double      abs(double      x) { return (fabs(x));  }
-float       abs(float       x) { return (fabsf(x)); }
-long double abs(long double x) { return (fabsl(x)); }
-#endif
+namespace Eigen {
 
 namespace internal {
 
@@ -236,6 +229,63 @@ struct imag_ref_retval
   typedef typename NumTraits<Scalar>::Real & type;
 };
 
+
+/****************************************************************************
+* Implementation of sign                                                 *
+****************************************************************************/
+template<typename Scalar, bool IsComplex = (NumTraits<Scalar>::IsComplex!=0),
+    bool IsInteger = (NumTraits<Scalar>::IsInteger!=0)>
+struct sign_impl
+{
+  EIGEN_DEVICE_FUNC
+  static inline Scalar run(const Scalar& a)
+  {
+    return Scalar( (a>Scalar(0)) - (a<Scalar(0)) );
+  }
+};
+
+template<typename Scalar>
+struct sign_impl<Scalar, false, false>
+{
+  EIGEN_DEVICE_FUNC
+  static inline Scalar run(const Scalar& a)
+  {
+    return (std::isnan)(a) ? a : Scalar( (a>Scalar(0)) - (a<Scalar(0)) );
+  }
+};
+
+template<typename Scalar, bool IsInteger>
+struct sign_impl<Scalar, true, IsInteger>
+{
+  EIGEN_DEVICE_FUNC
+  static inline Scalar run(const Scalar& a)
+  {
+    using real_type = typename NumTraits<Scalar>::Real;
+    real_type aa = std::abs(a);
+    if (aa==real_type(0))
+      return Scalar(0);
+    aa = real_type(1)/aa;
+    return Scalar(a.real()*aa, a.imag()*aa );
+  }
+};
+
+// The sign function for bool is the identity.
+template<>
+struct sign_impl<bool, false, true>
+{
+  EIGEN_DEVICE_FUNC
+  static inline bool run(const bool& a)
+  {
+    return a;
+  }
+};
+
+template<typename Scalar>
+struct sign_retval
+{
+  typedef Scalar type;
+};
+
 /****************************************************************************
 * Implementation of conj                                                 *
 ****************************************************************************/
@@ -441,9 +491,9 @@ struct cast_impl
 // generating warnings on clang.  Here we explicitly cast the real component.
 template<typename OldType, typename NewType>
 struct cast_impl<OldType, NewType,
-  typename internal::enable_if<
+  typename std::enable_if_t<
     !NumTraits<OldType>::IsComplex && NumTraits<NewType>::IsComplex
-  >::type>
+  >>
 {
   EIGEN_DEVICE_FUNC
   static inline NewType run(const OldType& x)
@@ -469,57 +519,16 @@ inline NewType cast(const OldType& x)
 template<typename Scalar>
 struct round_impl
 {
+  EIGEN_STATIC_ASSERT((!NumTraits<Scalar>::IsComplex), NUMERIC_TYPE_MUST_BE_REAL)
+
   EIGEN_DEVICE_FUNC
   static inline Scalar run(const Scalar& x)
   {
-    EIGEN_STATIC_ASSERT((!NumTraits<Scalar>::IsComplex), NUMERIC_TYPE_MUST_BE_REAL)
-#if EIGEN_HAS_CXX11_MATH
     EIGEN_USING_STD(round);
-#endif
     return Scalar(round(x));
   }
 };
 
-#if !EIGEN_HAS_CXX11_MATH
-#if EIGEN_HAS_C99_MATH
-// Use ::roundf for float.
-template<>
-struct round_impl<float> {
-  EIGEN_DEVICE_FUNC
-  static inline float run(const float& x)
-  {
-    return ::roundf(x);
-  }
-};
-#else
-template<typename Scalar>
-struct round_using_floor_ceil_impl
-{
-  EIGEN_DEVICE_FUNC
-  static inline Scalar run(const Scalar& x)
-  {
-    EIGEN_STATIC_ASSERT((!NumTraits<Scalar>::IsComplex), NUMERIC_TYPE_MUST_BE_REAL)
-    // Without C99 round/roundf, resort to floor/ceil.
-    EIGEN_USING_STD(floor);
-    EIGEN_USING_STD(ceil);
-    // If not enough precision to resolve a decimal at all, return the input.
-    // Otherwise, adding 0.5 can trigger an increment by 1.
-    const Scalar limit = Scalar(1ull << (NumTraits<Scalar>::digits() - 1));
-    if (x >= limit || x <= -limit) {
-      return x;
-    }
-    return (x > Scalar(0)) ? Scalar(floor(x + Scalar(0.5))) : Scalar(ceil(x - Scalar(0.5)));
-  }
-};
-
-template<>
-struct round_impl<float> : round_using_floor_ceil_impl<float> {};
-
-template<>
-struct round_impl<double> : round_using_floor_ceil_impl<double> {};
-#endif // EIGEN_HAS_C99_MATH
-#endif // !EIGEN_HAS_CXX11_MATH
-
 template<typename Scalar>
 struct round_retval
 {
@@ -532,36 +541,16 @@ struct round_retval
 
 template<typename Scalar>
 struct rint_impl {
+  EIGEN_STATIC_ASSERT((!NumTraits<Scalar>::IsComplex), NUMERIC_TYPE_MUST_BE_REAL)
+
   EIGEN_DEVICE_FUNC
   static inline Scalar run(const Scalar& x)
   {
-    EIGEN_STATIC_ASSERT((!NumTraits<Scalar>::IsComplex), NUMERIC_TYPE_MUST_BE_REAL)
-#if EIGEN_HAS_CXX11_MATH
-      EIGEN_USING_STD(rint);
-#endif
+    EIGEN_USING_STD(rint);
     return rint(x);
   }
 };
 
-#if !EIGEN_HAS_CXX11_MATH
-template<>
-struct rint_impl<double> {
-  EIGEN_DEVICE_FUNC
-  static inline double run(const double& x)
-  {
-    return ::rint(x);
-  }
-};
-template<>
-struct rint_impl<float> {
-  EIGEN_DEVICE_FUNC
-  static inline float run(const float& x)
-  {
-    return ::rintf(x);
-  }
-};
-#endif
-
 template<typename Scalar>
 struct rint_retval
 {
@@ -574,7 +563,7 @@ struct rint_retval
 
 // Visual Studio 2017 has a bug where arg(float) returns 0 for negative inputs.
 // This seems to be fixed in VS 2019.
-#if EIGEN_HAS_CXX11_MATH && (!EIGEN_COMP_MSVC || EIGEN_COMP_MSVC >= 1920)
+#if (!EIGEN_COMP_MSVC || EIGEN_COMP_MSVC >= 1920)
 // std::arg is only defined for types of std::complex, or integer types or float/double/long double
 template<typename Scalar,
           bool HasStdImpl = NumTraits<Scalar>::IsComplex || is_integral<Scalar>::value
@@ -675,11 +664,7 @@ struct expm1_impl {
   EIGEN_DEVICE_FUNC static inline Scalar run(const Scalar& x)
   {
     EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
-    #if EIGEN_HAS_CXX11_MATH
     using std::expm1;
-    #else
-    using std_fallback::expm1;
-    #endif
     return expm1(x);
   }
 };
@@ -736,14 +721,11 @@ namespace std_fallback {
 
 template<typename Scalar>
 struct log1p_impl {
+  EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
+
   EIGEN_DEVICE_FUNC static inline Scalar run(const Scalar& x)
   {
-    EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
-    #if EIGEN_HAS_CXX11_MATH
     using std::log1p;
-    #else
-    using std_fallback::log1p;
-    #endif
     return log1p(x);
   }
 };
@@ -751,9 +733,10 @@ struct log1p_impl {
 // Specialization for complex types that are not supported by std::log1p.
 template <typename RealScalar>
 struct log1p_impl<std::complex<RealScalar> > {
+  EIGEN_STATIC_ASSERT_NON_INTEGER(RealScalar)
+
   EIGEN_DEVICE_FUNC static inline std::complex<RealScalar> run(
       const std::complex<RealScalar>& x) {
-    EIGEN_STATIC_ASSERT_NON_INTEGER(RealScalar)
     return std_fallback::log1p(x);
   }
 };
@@ -893,7 +876,7 @@ struct random_default_impl<Scalar, false, true>
     // ScalarX is the widest of ScalarU and unsigned int.
     // We'll deal only with ScalarX and unsigned int below thus avoiding signed
     // types and arithmetic and signed overflows (which are undefined behavior).
-    typedef typename conditional<(ScalarU(-1) > unsigned(-1)), ScalarU, unsigned>::type ScalarX;
+    typedef std::conditional_t<(ScalarU(-1) > unsigned(-1)), ScalarU, unsigned> ScalarX;
     // The following difference doesn't overflow, provided our integer types are two's
     // complement and have the same number of padding bits in signed and unsigned variants.
     // This is the case in most modern implementations of C++.
@@ -918,8 +901,8 @@ struct random_default_impl<Scalar, false, true>
 #else
     enum { rand_bits = meta_floor_log2<(unsigned int)(RAND_MAX)+1>::value,
            scalar_bits = sizeof(Scalar) * CHAR_BIT,
-           shift = EIGEN_PLAIN_ENUM_MAX(0, int(rand_bits) - int(scalar_bits)),
-           offset = NumTraits<Scalar>::IsSigned ? (1 << (EIGEN_PLAIN_ENUM_MIN(rand_bits,scalar_bits)-1)) : 0
+           shift = plain_enum_max(0, int(rand_bits) - int(scalar_bits)),
+           offset = NumTraits<Scalar>::IsSigned ? (1 << (plain_enum_min(rand_bits, scalar_bits)-1)) : 0
     };
     return Scalar((std::rand() >> shift) - offset);
 #endif
@@ -956,7 +939,7 @@ inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random()
 // Implementation of is* functions
 
 // std::is* do not work with fast-math and gcc, std::is* are available on MSVC 2013 and newer, as well as in clang.
-#if (EIGEN_HAS_CXX11_MATH && !(EIGEN_COMP_GNUC_STRICT && __FINITE_MATH_ONLY__)) || (EIGEN_COMP_MSVC>=1800) || (EIGEN_COMP_CLANG)
+#if (!(EIGEN_COMP_GNUC_STRICT && __FINITE_MATH_ONLY__)) || (EIGEN_COMP_MSVC) || (EIGEN_COMP_CLANG)
 #define EIGEN_USE_STD_FPCLASSIFY 1
 #else
 #define EIGEN_USE_STD_FPCLASSIFY 0
@@ -964,22 +947,22 @@ inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random()
 
 template<typename T>
 EIGEN_DEVICE_FUNC
-typename internal::enable_if<internal::is_integral<T>::value,bool>::type
+std::enable_if_t<internal::is_integral<T>::value,bool>
 isnan_impl(const T&) { return false; }
 
 template<typename T>
 EIGEN_DEVICE_FUNC
-typename internal::enable_if<internal::is_integral<T>::value,bool>::type
+std::enable_if_t<internal::is_integral<T>::value,bool>
 isinf_impl(const T&) { return false; }
 
 template<typename T>
 EIGEN_DEVICE_FUNC
-typename internal::enable_if<internal::is_integral<T>::value,bool>::type
+std::enable_if_t<internal::is_integral<T>::value,bool>
 isfinite_impl(const T&) { return true; }
 
 template<typename T>
 EIGEN_DEVICE_FUNC
-typename internal::enable_if<(!internal::is_integral<T>::value)&&(!NumTraits<T>::IsComplex),bool>::type
+std::enable_if_t<(!internal::is_integral<T>::value)&&(!NumTraits<T>::IsComplex),bool>
 isfinite_impl(const T& x)
 {
   #if defined(EIGEN_GPU_COMPILE_PHASE)
@@ -994,7 +977,7 @@ isfinite_impl(const T& x)
 
 template<typename T>
 EIGEN_DEVICE_FUNC
-typename internal::enable_if<(!internal::is_integral<T>::value)&&(!NumTraits<T>::IsComplex),bool>::type
+std::enable_if_t<(!internal::is_integral<T>::value)&&(!NumTraits<T>::IsComplex),bool>
 isinf_impl(const T& x)
 {
   #if defined(EIGEN_GPU_COMPILE_PHASE)
@@ -1009,7 +992,7 @@ isinf_impl(const T& x)
 
 template<typename T>
 EIGEN_DEVICE_FUNC
-typename internal::enable_if<(!internal::is_integral<T>::value)&&(!NumTraits<T>::IsComplex),bool>::type
+std::enable_if_t<(!internal::is_integral<T>::value)&&(!NumTraits<T>::IsComplex),bool>
 isnan_impl(const T& x)
 {
   #if defined(EIGEN_GPU_COMPILE_PHASE)
@@ -1042,7 +1025,7 @@ EIGEN_DEVICE_FUNC inline bool isinf_impl(const float& x)       { return isinf_ms
 
 #elif (defined __FINITE_MATH_ONLY__ && __FINITE_MATH_ONLY__ && EIGEN_COMP_GNUC)
 
-#if EIGEN_GNUC_AT_LEAST(5,0)
+#if EIGEN_COMP_GNUC
   #define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC inline __attribute__((optimize("no-finite-math-only")))
 #else
   // NOTE the inline qualifier and noinline attribute are both needed: the former is to avoid linking issue (duplicate symbol),
@@ -1234,7 +1217,7 @@ inline EIGEN_MATHFUNC_RETVAL(real, Scalar) real(const Scalar& x)
 
 template<typename Scalar>
 EIGEN_DEVICE_FUNC
-inline typename internal::add_const_on_value_type< EIGEN_MATHFUNC_RETVAL(real_ref, Scalar) >::type real_ref(const Scalar& x)
+inline internal::add_const_on_value_type_t< EIGEN_MATHFUNC_RETVAL(real_ref, Scalar) > real_ref(const Scalar& x)
 {
   return internal::real_ref_impl<Scalar>::run(x);
 }
@@ -1262,7 +1245,7 @@ inline EIGEN_MATHFUNC_RETVAL(arg, Scalar) arg(const Scalar& x)
 
 template<typename Scalar>
 EIGEN_DEVICE_FUNC
-inline typename internal::add_const_on_value_type< EIGEN_MATHFUNC_RETVAL(imag_ref, Scalar) >::type imag_ref(const Scalar& x)
+inline internal::add_const_on_value_type_t< EIGEN_MATHFUNC_RETVAL(imag_ref, Scalar) > imag_ref(const Scalar& x)
 {
   return internal::imag_ref_impl<Scalar>::run(x);
 }
@@ -1281,6 +1264,13 @@ inline EIGEN_MATHFUNC_RETVAL(conj, Scalar) conj(const Scalar& x)
   return EIGEN_MATHFUNC_IMPL(conj, Scalar)::run(x);
 }
 
+template<typename Scalar>
+EIGEN_DEVICE_FUNC
+inline EIGEN_MATHFUNC_RETVAL(sign, Scalar) sign(const Scalar& x)
+{
+  return EIGEN_MATHFUNC_IMPL(sign, Scalar)::run(x);
+}
+
 template<typename Scalar>
 EIGEN_DEVICE_FUNC
 inline EIGEN_MATHFUNC_RETVAL(abs2, Scalar) abs2(const Scalar& x)
@@ -1505,7 +1495,7 @@ double log(const double &x) { return ::log(x); }
 
 template<typename T>
 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
-typename internal::enable_if<NumTraits<T>::IsSigned || NumTraits<T>::IsComplex,typename NumTraits<T>::Real>::type
+std::enable_if_t<NumTraits<T>::IsSigned || NumTraits<T>::IsComplex,typename NumTraits<T>::Real>
 abs(const T &x) {
   EIGEN_USING_STD(abs);
   return abs(x);
@@ -1513,7 +1503,7 @@ abs(const T &x) {
 
 template<typename T>
 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
-typename internal::enable_if<!(NumTraits<T>::IsSigned || NumTraits<T>::IsComplex),typename NumTraits<T>::Real>::type
+std::enable_if_t<!(NumTraits<T>::IsSigned || NumTraits<T>::IsComplex),typename NumTraits<T>::Real>
 abs(const T &x) {
   return x;
 }
@@ -1541,6 +1531,37 @@ double abs(const std::complex<double>& x) {
 }
 #endif
 
+template <typename Scalar, bool IsInteger = NumTraits<Scalar>::IsInteger, bool IsSigned = NumTraits<Scalar>::IsSigned>
+struct signbit_impl;
+template <typename Scalar>
+struct signbit_impl<Scalar, false, true> {
+  static constexpr size_t Size = sizeof(Scalar);
+  static constexpr size_t Shift = (CHAR_BIT * Size) - 1;
+  using intSize_t = typename get_integer_by_size<Size>::signed_type;
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static Scalar run(const Scalar& x) {
+    intSize_t a = bit_cast<intSize_t, Scalar>(x);
+    a = a >> Shift;
+    Scalar result = bit_cast<Scalar, intSize_t>(a);
+    return result;
+  }
+};
+template <typename Scalar>
+struct signbit_impl<Scalar, true, true> {
+  static constexpr size_t Size = sizeof(Scalar);
+  static constexpr size_t Shift = (CHAR_BIT * Size) - 1;
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static constexpr Scalar run(const Scalar& x) { return x >> Shift; }
+};
+template <typename Scalar>
+struct signbit_impl<Scalar, true, false> {
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static constexpr Scalar run(const Scalar&  ) {
+    return Scalar(0);
+  }
+};
+template <typename Scalar>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static constexpr Scalar signbit(const Scalar& x) {
+  return signbit_impl<Scalar>::run(x);
+}
+
 template<typename T>
 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 T exp(const T &x) {
@@ -1659,14 +1680,12 @@ T acos(const T &x) {
   return acos(x);
 }
 
-#if EIGEN_HAS_CXX11_MATH
 template<typename T>
 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 T acosh(const T &x) {
   EIGEN_USING_STD(acosh);
   return static_cast<T>(acosh(x));
 }
-#endif
 
 #if defined(SYCL_DEVICE_ONLY)
 SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(acos, acos)
@@ -1688,14 +1707,12 @@ T asin(const T &x) {
   return asin(x);
 }
 
-#if EIGEN_HAS_CXX11_MATH
 template<typename T>
 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 T asinh(const T &x) {
   EIGEN_USING_STD(asinh);
   return static_cast<T>(asinh(x));
 }
-#endif
 
 #if defined(SYCL_DEVICE_ONLY)
 SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(asin, asin)
@@ -1717,14 +1734,12 @@ T atan(const T &x) {
   return static_cast<T>(atan(x));
 }
 
-#if EIGEN_HAS_CXX11_MATH
 template<typename T>
 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 T atanh(const T &x) {
   EIGEN_USING_STD(atanh);
   return static_cast<T>(atanh(x));
 }
-#endif
 
 #if defined(SYCL_DEVICE_ONLY)
 SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(atan, atan)
@@ -2006,9 +2021,10 @@ namespace internal {
 // Specialization for complex types that are not supported by std::expm1.
 template <typename RealScalar>
 struct expm1_impl<std::complex<RealScalar> > {
+  EIGEN_STATIC_ASSERT_NON_INTEGER(RealScalar)
+
   EIGEN_DEVICE_FUNC static inline std::complex<RealScalar> run(
       const std::complex<RealScalar>& x) {
-    EIGEN_STATIC_ASSERT_NON_INTEGER(RealScalar)
     RealScalar xr = x.real();
     RealScalar xi = x.imag();
     // expm1(z) = exp(z) - 1
diff --git a/libs/eigen/Eigen/src/Core/MathFunctionsImpl.h b/libs/eigen/Eigen/src/Core/MathFunctionsImpl.h
index 4eaaaa7..642e5d6 100644
--- a/libs/eigen/Eigen/src/Core/MathFunctionsImpl.h
+++ b/libs/eigen/Eigen/src/Core/MathFunctionsImpl.h
@@ -11,17 +11,152 @@
 #ifndef EIGEN_MATHFUNCTIONSIMPL_H
 #define EIGEN_MATHFUNCTIONSIMPL_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
 
+/** \internal Fast reciprocal using Newton-Raphson's method.
+
+ Preconditions:
+   1. The starting guess provided in approx_a_recip must have at least half
+      the leading mantissa bits in the correct result, such that a single
+      Newton-Raphson step is sufficient to get within 1-2 ulps of the currect
+      result.
+   2. If a is zero, approx_a_recip must be infinite with the same sign as a.
+   3. If a is infinite, approx_a_recip must be zero with the same sign as a.
+
+   If the preconditions are satisfied, which they are for for the _*_rcp_ps
+   instructions on x86, the result has a maximum relative error of 2 ulps,
+   and correctly handles reciprocals of zero, infinity, and NaN.
+*/
+template <typename Packet, int Steps>
+struct generic_reciprocal_newton_step {
+  static_assert(Steps > 0, "Steps must be at least 1.");
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE  Packet
+  run(const Packet& a, const Packet& approx_a_recip) {
+    using Scalar = typename unpacket_traits<Packet>::type;
+    const Packet two = pset1<Packet>(Scalar(2));
+    // Refine the approximation using one Newton-Raphson step:
+    //   x_{i} = x_{i-1} * (2 - a * x_{i-1})
+     const Packet x =
+         generic_reciprocal_newton_step<Packet,Steps - 1>::run(a, approx_a_recip);
+     const Packet tmp = pnmadd(a, x, two);
+     // If tmp is NaN, it means that a is either +/-0 or +/-Inf.
+     // In this case return the approximation directly.
+     const Packet is_not_nan = pcmp_eq(tmp, tmp);
+     return pselect(is_not_nan, pmul(x, tmp), x);
+  }
+};
+
+template<typename Packet>
+struct generic_reciprocal_newton_step<Packet, 0> {
+   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Packet
+   run(const Packet& /*unused*/, const Packet& approx_rsqrt) {
+    return approx_rsqrt;
+  }
+};
+
+
+/** \internal Fast reciprocal sqrt using Newton-Raphson's method.
+
+ Preconditions:
+   1. The starting guess provided in approx_a_recip must have at least half
+      the leading mantissa bits in the correct result, such that a single
+      Newton-Raphson step is sufficient to get within 1-2 ulps of the currect
+      result.
+   2. If a is zero, approx_a_recip must be infinite with the same sign as a.
+   3. If a is infinite, approx_a_recip must be zero with the same sign as a.
+
+   If the preconditions are satisfied, which they are for for the _*_rcp_ps
+   instructions on x86, the result has a maximum relative error of 2 ulps,
+   and correctly handles zero, infinity, and NaN. Positive denormals are
+   treated as zero.
+*/
+template <typename Packet, int Steps>
+struct generic_rsqrt_newton_step {
+  static_assert(Steps > 0, "Steps must be at least 1.");
+
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE  Packet
+  run(const Packet& a, const Packet& approx_rsqrt) {
+    using Scalar = typename unpacket_traits<Packet>::type;
+    const Packet one_point_five = pset1<Packet>(Scalar(1.5));
+    const Packet minus_half = pset1<Packet>(Scalar(-0.5));
+    
+    // Refine the approximation using one Newton-Raphson step:
+    //   x_{n+1} = x_n * (1.5 + (-0.5 * x_n) * (a * x_n)).
+    // The approximation is expressed this way to avoid over/under-flows.  
+    Packet x_newton  = pmul(approx_rsqrt, pmadd(pmul(minus_half, approx_rsqrt), pmul(a, approx_rsqrt), one_point_five));
+    for (int step = 1; step < Steps; ++step) {
+      x_newton  = pmul(x_newton, pmadd(pmul(minus_half, x_newton), pmul(a, x_newton), one_point_five));
+    }
+    
+    // If approx_rsqrt is 0 or +/-inf, we should return it as is.  Note:
+    // on intel, approx_rsqrt can be inf for small denormal values.
+    const Packet return_approx = por(pcmp_eq(approx_rsqrt, pzero(a)),
+                                     pcmp_eq(pabs(approx_rsqrt), pset1<Packet>(NumTraits<Scalar>::infinity())));
+    return pselect(return_approx, approx_rsqrt, x_newton);
+  }
+};
+
+template<typename Packet>
+struct generic_rsqrt_newton_step<Packet, 0> {
+   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Packet
+   run(const Packet& /*unused*/, const Packet& approx_rsqrt) {
+    return approx_rsqrt;
+  }
+};
+
+
+/** \internal Fast sqrt using Newton-Raphson's method.
+
+ Preconditions:
+   1. The starting guess for the reciprocal sqrt provided in approx_rsqrt must
+      have at least half the leading mantissa bits in the correct result, such
+      that a single Newton-Raphson step is sufficient to get within 1-2 ulps of
+      the currect result.
+   2. If a is zero, approx_rsqrt must be infinite.
+   3. If a is infinite, approx_rsqrt must be zero.
+
+   If the preconditions are satisfied, which they are for for the _*_rsqrt_ps
+   instructions on x86, the result has a maximum relative error of 2 ulps,
+   and correctly handles zero and infinity, and NaN. Positive denormal inputs
+   are treated as zero.
+*/
+template <typename Packet, int Steps=1>
+struct generic_sqrt_newton_step {
+  static_assert(Steps > 0, "Steps must be at least 1.");
+
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE  Packet
+  run(const Packet& a, const Packet& approx_rsqrt) {
+    using Scalar = typename unpacket_traits<Packet>::type;
+    const Packet one_point_five = pset1<Packet>(Scalar(1.5));
+    const Packet minus_half = pset1<Packet>(Scalar(-0.5));
+    // If a is inf or zero, return a directly.
+    const Packet inf_mask = pcmp_eq(a, pset1<Packet>(NumTraits<Scalar>::infinity()));
+    const Packet return_a = por(pcmp_eq(a, pzero(a)), inf_mask);
+    // Do a single step of Newton's iteration for reciprocal square root:
+    //   x_{n+1} = x_n * (1.5 + (-0.5 * x_n) * (a * x_n))).
+    // The Newton's step is computed this way to avoid over/under-flows.
+    Packet rsqrt = pmul(approx_rsqrt, pmadd(pmul(minus_half, approx_rsqrt), pmul(a, approx_rsqrt), one_point_five));
+    for (int step = 1; step < Steps; ++step) {
+      rsqrt = pmul(rsqrt, pmadd(pmul(minus_half, rsqrt), pmul(a, rsqrt), one_point_five));
+    }
+
+    // Return sqrt(x) = x * rsqrt(x) for non-zero finite positive arguments.
+    // Return a itself for 0 or +inf, NaN for negative arguments.
+    return pselect(return_a, a, pmul(a, rsqrt));
+  }
+};
+
 /** \internal \returns the hyperbolic tan of \a a (coeff-wise)
     Doesn't do anything fancy, just a 13/6-degree rational interpolant which
     is accurate up to a couple of ulps in the (approximate) range [-8, 8],
     outside of which tanh(x) = +/-1 in single precision. The input is clamped
     to the range [-c, c]. The value c is chosen as the smallest value where
     the approximation evaluates to exactly 1. In the reange [-0.0004, 0.0004]
-    the approxmation tanh(x) ~= x is used for better accuracy as x tends to zero.
+    the approximation tanh(x) ~= x is used for better accuracy as x tends to zero.
 
     This implementation works on both scalars and packets.
 */
@@ -88,7 +223,7 @@ RealScalar positive_real_hypot(const RealScalar& x, const RealScalar& y)
   EIGEN_USING_STD(sqrt);
   RealScalar p, qp;
   p = numext::maxi(x,y);
-  if(p==RealScalar(0)) return RealScalar(0);
+  if(numext::is_exactly_zero(p)) return RealScalar(0);
   qp = numext::mini(y,x) / p;
   return p * sqrt(RealScalar(1) + qp*qp);
 }
@@ -138,8 +273,8 @@ EIGEN_DEVICE_FUNC std::complex<T> complex_sqrt(const std::complex<T>& z) {
 
   return
     (numext::isinf)(y) ? std::complex<T>(NumTraits<T>::infinity(), y)
-      : x == zero ? std::complex<T>(w, y < zero ? -w : w)
-      : x > zero ? std::complex<T>(w, y / (2 * w))
+      : numext::is_exactly_zero(x) ? std::complex<T>(w, y < zero ? -w : w)
+                                   : x > zero ? std::complex<T>(w, y / (2 * w))
       : std::complex<T>(numext::abs(y) / (2 * w), y < zero ? -w : w );
 }
 
@@ -177,10 +312,10 @@ EIGEN_DEVICE_FUNC std::complex<T> complex_rsqrt(const std::complex<T>& z) {
   const T woz = w / abs_z;
   // Corner cases consistent with 1/sqrt(z) on gcc/clang.
   return
-    abs_z == zero ? std::complex<T>(NumTraits<T>::infinity(), NumTraits<T>::quiet_NaN())
-      : ((numext::isinf)(x) || (numext::isinf)(y)) ? std::complex<T>(zero, zero)
-      : x == zero ? std::complex<T>(woz, y < zero ? woz : -woz)
-      : x > zero ? std::complex<T>(woz, -y / (2 * w * abs_z))
+          numext::is_exactly_zero(abs_z) ? std::complex<T>(NumTraits<T>::infinity(), NumTraits<T>::quiet_NaN())
+                                         : ((numext::isinf)(x) || (numext::isinf)(y)) ? std::complex<T>(zero, zero)
+      : numext::is_exactly_zero(x) ? std::complex<T>(woz, y < zero ? woz : -woz)
+                                   : x > zero ? std::complex<T>(woz, -y / (2 * w * abs_z))
       : std::complex<T>(numext::abs(y) / (2 * w * abs_z), y < zero ? woz : -woz );
 }
 
diff --git a/libs/eigen/Eigen/src/Core/Matrix.h b/libs/eigen/Eigen/src/Core/Matrix.h
index f0e59a9..c7747f1 100644
--- a/libs/eigen/Eigen/src/Core/Matrix.h
+++ b/libs/eigen/Eigen/src/Core/Matrix.h
@@ -11,37 +11,39 @@
 #ifndef EIGEN_MATRIX_H
 #define EIGEN_MATRIX_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
-template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
-struct traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
+template<typename Scalar_, int Rows_, int Cols_, int Options_, int MaxRows_, int MaxCols_>
+struct traits<Matrix<Scalar_, Rows_, Cols_, Options_, MaxRows_, MaxCols_> >
 {
 private:
-  enum { size = internal::size_at_compile_time<_Rows,_Cols>::ret };
-  typedef typename find_best_packet<_Scalar,size>::type PacketScalar;
+  constexpr static int size = internal::size_at_compile_time(Rows_,Cols_);
+  typedef typename find_best_packet<Scalar_,size>::type PacketScalar;
   enum {
-      row_major_bit = _Options&RowMajor ? RowMajorBit : 0,
-      is_dynamic_size_storage = _MaxRows==Dynamic || _MaxCols==Dynamic,
-      max_size = is_dynamic_size_storage ? Dynamic : _MaxRows*_MaxCols,
-      default_alignment = compute_default_alignment<_Scalar,max_size>::value,
-      actual_alignment = ((_Options&DontAlign)==0) ? default_alignment : 0,
+      row_major_bit = Options_&RowMajor ? RowMajorBit : 0,
+      is_dynamic_size_storage = MaxRows_==Dynamic || MaxCols_==Dynamic,
+      max_size = is_dynamic_size_storage ? Dynamic : MaxRows_*MaxCols_,
+      default_alignment = compute_default_alignment<Scalar_,max_size>::value,
+      actual_alignment = ((Options_&DontAlign)==0) ? default_alignment : 0,
       required_alignment = unpacket_traits<PacketScalar>::alignment,
-      packet_access_bit = (packet_traits<_Scalar>::Vectorizable && (EIGEN_UNALIGNED_VECTORIZE || (actual_alignment>=required_alignment))) ? PacketAccessBit : 0
+      packet_access_bit = (packet_traits<Scalar_>::Vectorizable && (EIGEN_UNALIGNED_VECTORIZE || (actual_alignment>=required_alignment))) ? PacketAccessBit : 0
     };
 
 public:
-  typedef _Scalar Scalar;
+  typedef Scalar_ Scalar;
   typedef Dense StorageKind;
   typedef Eigen::Index StorageIndex;
   typedef MatrixXpr XprKind;
   enum {
-    RowsAtCompileTime = _Rows,
-    ColsAtCompileTime = _Cols,
-    MaxRowsAtCompileTime = _MaxRows,
-    MaxColsAtCompileTime = _MaxCols,
-    Flags = compute_matrix_flags<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>::ret,
-    Options = _Options,
+    RowsAtCompileTime = Rows_,
+    ColsAtCompileTime = Cols_,
+    MaxRowsAtCompileTime = MaxRows_,
+    MaxColsAtCompileTime = MaxCols_,
+    Flags = compute_matrix_flags(Options_),
+    Options = Options_,
     InnerStrideAtCompileTime = 1,
     OuterStrideAtCompileTime = (Options&RowMajor) ? ColsAtCompileTime : RowsAtCompileTime,
 
@@ -63,18 +65,18 @@ public:
   * The %Matrix class encompasses \em both fixed-size and dynamic-size objects (\ref fixedsize "note").
   *
   * The first three template parameters are required:
-  * \tparam _Scalar Numeric type, e.g. float, double, int or std::complex<float>.
+  * \tparam Scalar_ Numeric type, e.g. float, double, int or std::complex<float>.
   *                 User defined scalar types are supported as well (see \ref user_defined_scalars "here").
-  * \tparam _Rows Number of rows, or \b Dynamic
-  * \tparam _Cols Number of columns, or \b Dynamic
+  * \tparam Rows_ Number of rows, or \b Dynamic
+  * \tparam Cols_ Number of columns, or \b Dynamic
   *
   * The remaining template parameters are optional -- in most cases you don't have to worry about them.
-  * \tparam _Options A combination of either \b #RowMajor or \b #ColMajor, and of either
+  * \tparam Options_ A combination of either \b #RowMajor or \b #ColMajor, and of either
   *                 \b #AutoAlign or \b #DontAlign.
   *                 The former controls \ref TopicStorageOrders "storage order", and defaults to column-major. The latter controls alignment, which is required
   *                 for vectorization. It defaults to aligning matrices except for fixed sizes that aren't a multiple of the packet size.
-  * \tparam _MaxRows Maximum number of rows. Defaults to \a _Rows (\ref maxrows "note").
-  * \tparam _MaxCols Maximum number of columns. Defaults to \a _Cols (\ref maxrows "note").
+  * \tparam MaxRows_ Maximum number of rows. Defaults to \a Rows_ (\ref maxrows "note").
+  * \tparam MaxCols_ Maximum number of columns. Defaults to \a Cols_ (\ref maxrows "note").
   *
   * Eigen provides a number of typedefs covering the usual cases. Here are some examples:
   *
@@ -128,12 +130,12 @@ public:
   * Note that \em dense matrices, be they Fixed-size or Dynamic-size, <em>do not</em> expand dynamically in the sense of a std::map.
   * If you want this behavior, see the Sparse module.</dd>
   *
-  * <dt><b>\anchor maxrows _MaxRows and _MaxCols:</b></dt>
+  * <dt><b>\anchor maxrows MaxRows_ and MaxCols_:</b></dt>
   * <dd>In most cases, one just leaves these parameters to the default values.
   * These parameters mean the maximum size of rows and columns that the matrix may have. They are useful in cases
   * when the exact numbers of rows and columns are not known are compile-time, but it is known at compile-time that they cannot
-  * exceed a certain value. This happens when taking dynamic-size blocks inside fixed-size matrices: in this case _MaxRows and _MaxCols
-  * are the dimensions of the original matrix, while _Rows and _Cols are Dynamic.</dd>
+  * exceed a certain value. This happens when taking dynamic-size blocks inside fixed-size matrices: in this case MaxRows_ and MaxCols_
+  * are the dimensions of the original matrix, while Rows_ and Cols_ are Dynamic.</dd>
   * </dl>
   *
   * <i><b>ABI and storage layout</b></i>
@@ -174,9 +176,9 @@ public:
   * \ref TopicStorageOrders
   */
 
-template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
+template<typename Scalar_, int Rows_, int Cols_, int Options_, int MaxRows_, int MaxCols_>
 class Matrix
-  : public PlainObjectBase<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
+  : public PlainObjectBase<Matrix<Scalar_, Rows_, Cols_, Options_, MaxRows_, MaxCols_> >
 {
   public:
 
@@ -185,7 +187,7 @@ class Matrix
       */
     typedef PlainObjectBase<Matrix> Base;
 
-    enum { Options = _Options };
+    enum { Options = Options_ };
 
     EIGEN_DENSE_PUBLIC_INTERFACE(Matrix)
 
@@ -258,7 +260,6 @@ class Matrix
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     Matrix() : Base()
     {
-      Base::_check_template_params();
       EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
     }
 
@@ -266,24 +267,18 @@ class Matrix
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     explicit Matrix(internal::constructor_without_unaligned_array_assert)
       : Base(internal::constructor_without_unaligned_array_assert())
-    { Base::_check_template_params(); EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED }
+    { EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED }
 
-#if EIGEN_HAS_RVALUE_REFERENCES
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     Matrix(Matrix&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_constructible<Scalar>::value)
-      : Base(std::move(other))
-    {
-      Base::_check_template_params();
-    }
+      : Base(std::move(other)) {}
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     Matrix& operator=(Matrix&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_assignable<Scalar>::value)
     {
       Base::operator=(std::move(other));
       return *this;
     }
-#endif
 
-#if EIGEN_HAS_CXX11
     /** \copydoc PlainObjectBase(const Scalar&, const Scalar&, const Scalar&,  const Scalar&, const ArgTypes&... args)
      *
      * Example: \include Matrix_variadic_ctor_cxx11.cpp
@@ -317,9 +312,9 @@ class Matrix
       *
       * \sa Matrix(const Scalar& a0, const Scalar& a1, const Scalar& a2,  const Scalar& a3, const ArgTypes&... args)
       */
-    EIGEN_DEVICE_FUNC
-    explicit EIGEN_STRONG_INLINE Matrix(const std::initializer_list<std::initializer_list<Scalar>>& list) : Base(list) {}
-#endif // end EIGEN_HAS_CXX11
+    EIGEN_DEVICE_FUNC explicit constexpr EIGEN_STRONG_INLINE Matrix(
+        const std::initializer_list<std::initializer_list<Scalar>>& list)
+        : Base(list) {}
 
 #ifndef EIGEN_PARSED_BY_DOXYGEN
 
@@ -328,7 +323,6 @@ class Matrix
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     explicit Matrix(const T& x)
     {
-      Base::_check_template_params();
       Base::template _init1<T>(x);
     }
 
@@ -336,7 +330,6 @@ class Matrix
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     Matrix(const T0& x, const T1& y)
     {
-      Base::_check_template_params();
       Base::template _init2<T0,T1>(x, y);
     }
 
@@ -388,7 +381,6 @@ class Matrix
     EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Matrix(const Scalar& x, const Scalar& y, const Scalar& z)
     {
-      Base::_check_template_params();
       EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Matrix, 3)
       m_storage.data()[0] = x;
       m_storage.data()[1] = y;
@@ -400,7 +392,6 @@ class Matrix
     EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Matrix(const Scalar& x, const Scalar& y, const Scalar& z, const Scalar& w)
     {
-      Base::_check_template_params();
       EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Matrix, 4)
       m_storage.data()[0] = x;
       m_storage.data()[1] = y;
@@ -480,16 +471,21 @@ class Matrix
 
 #define EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, Size, SizeSuffix)   \
 /** \ingroup matrixtypedefs */                                    \
+/** \brief `Size`&times;`Size` matrix of type `Type`. */          \
 typedef Matrix<Type, Size, Size> Matrix##SizeSuffix##TypeSuffix;  \
 /** \ingroup matrixtypedefs */                                    \
+/** \brief `Size`&times;`1` vector of type `Type`. */             \
 typedef Matrix<Type, Size, 1>    Vector##SizeSuffix##TypeSuffix;  \
 /** \ingroup matrixtypedefs */                                    \
+/** \brief `1`&times;`Size` vector of type `Type`. */             \
 typedef Matrix<Type, 1, Size>    RowVector##SizeSuffix##TypeSuffix;
 
 #define EIGEN_MAKE_FIXED_TYPEDEFS(Type, TypeSuffix, Size)         \
 /** \ingroup matrixtypedefs */                                    \
+/** \brief `Size`&times;`Dynamic` matrix of type `Type`. */       \
 typedef Matrix<Type, Size, Dynamic> Matrix##Size##X##TypeSuffix;  \
 /** \ingroup matrixtypedefs */                                    \
+/** \brief `Dynamic`&times;`Size` matrix of type `Type`. */       \
 typedef Matrix<Type, Dynamic, Size> Matrix##X##Size##TypeSuffix;
 
 #define EIGEN_MAKE_TYPEDEFS_ALL_SIZES(Type, TypeSuffix) \
@@ -511,30 +507,28 @@ EIGEN_MAKE_TYPEDEFS_ALL_SIZES(std::complex<double>, cd)
 #undef EIGEN_MAKE_TYPEDEFS
 #undef EIGEN_MAKE_FIXED_TYPEDEFS
 
-#if EIGEN_HAS_CXX11
-
-#define EIGEN_MAKE_TYPEDEFS(Size, SizeSuffix)                     \
-/** \ingroup matrixtypedefs */                                    \
-/** \brief \cpp11 */                                              \
-template <typename Type>                                          \
-using Matrix##SizeSuffix = Matrix<Type, Size, Size>;              \
-/** \ingroup matrixtypedefs */                                    \
-/** \brief \cpp11 */                                              \
-template <typename Type>                                          \
-using Vector##SizeSuffix = Matrix<Type, Size, 1>;                 \
-/** \ingroup matrixtypedefs */                                    \
-/** \brief \cpp11 */                                              \
-template <typename Type>                                          \
+#define EIGEN_MAKE_TYPEDEFS(Size, SizeSuffix)                        \
+/** \ingroup matrixtypedefs */                                       \
+/** \brief \cpp11 `Size`&times;`Size` matrix of type `Type`.*/       \
+template <typename Type>                                             \
+using Matrix##SizeSuffix = Matrix<Type, Size, Size>;                 \
+/** \ingroup matrixtypedefs */                                       \
+/** \brief \cpp11 `Size`&times;`1` vector of type `Type`.*/          \
+template <typename Type>                                             \
+using Vector##SizeSuffix = Matrix<Type, Size, 1>;                    \
+/** \ingroup matrixtypedefs */                                       \
+/** \brief \cpp11 `1`&times;`Size` vector of type `Type`.*/          \
+template <typename Type>                                             \
 using RowVector##SizeSuffix = Matrix<Type, 1, Size>;
 
-#define EIGEN_MAKE_FIXED_TYPEDEFS(Size)                           \
-/** \ingroup matrixtypedefs */                                    \
-/** \brief \cpp11 */                                              \
-template <typename Type>                                          \
-using Matrix##Size##X = Matrix<Type, Size, Dynamic>;              \
-/** \ingroup matrixtypedefs */                                    \
-/** \brief \cpp11 */                                              \
-template <typename Type>                                          \
+#define EIGEN_MAKE_FIXED_TYPEDEFS(Size)                            \
+/** \ingroup matrixtypedefs */                                     \
+/** \brief \cpp11 `Size`&times;`Dynamic` matrix of type `Type` */  \
+template <typename Type>                                           \
+using Matrix##Size##X = Matrix<Type, Size, Dynamic>;               \
+/** \ingroup matrixtypedefs */                                     \
+/** \brief \cpp11 `Dynamic`&times;`Size` matrix of type `Type`. */ \
+template <typename Type>                                           \
 using Matrix##X##Size = Matrix<Type, Dynamic, Size>;
 
 EIGEN_MAKE_TYPEDEFS(2, 2)
@@ -546,20 +540,18 @@ EIGEN_MAKE_FIXED_TYPEDEFS(3)
 EIGEN_MAKE_FIXED_TYPEDEFS(4)
 
 /** \ingroup matrixtypedefs
-  * \brief \cpp11 */
+  * \brief \cpp11 `Size`&times;`1` vector of type `Type`. */
 template <typename Type, int Size>
 using Vector = Matrix<Type, Size, 1>;
 
 /** \ingroup matrixtypedefs
-  * \brief \cpp11 */
+  * \brief \cpp11 `1`&times;`Size` vector of type `Type`. */
 template <typename Type, int Size>
 using RowVector = Matrix<Type, 1, Size>;
 
 #undef EIGEN_MAKE_TYPEDEFS
 #undef EIGEN_MAKE_FIXED_TYPEDEFS
 
-#endif // EIGEN_HAS_CXX11
-
 } // end namespace Eigen
 
 #endif // EIGEN_MATRIX_H
diff --git a/libs/eigen/Eigen/src/Core/MatrixBase.h b/libs/eigen/Eigen/src/Core/MatrixBase.h
index 45c3a59..ea2178f 100644
--- a/libs/eigen/Eigen/src/Core/MatrixBase.h
+++ b/libs/eigen/Eigen/src/Core/MatrixBase.h
@@ -11,6 +11,8 @@
 #ifndef EIGEN_MATRIXBASE_H
 #define EIGEN_MATRIXBASE_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /** \class MatrixBase
@@ -92,8 +94,8 @@ template<typename Derived> class MatrixBase
 
 #ifndef EIGEN_PARSED_BY_DOXYGEN
     /** type of the equivalent square matrix */
-    typedef Matrix<Scalar,EIGEN_SIZE_MAX(RowsAtCompileTime,ColsAtCompileTime),
-                          EIGEN_SIZE_MAX(RowsAtCompileTime,ColsAtCompileTime)> SquareMatrixType;
+    typedef Matrix<Scalar, internal::max_size_prefer_dynamic(RowsAtCompileTime, ColsAtCompileTime),
+                           internal::max_size_prefer_dynamic(RowsAtCompileTime, ColsAtCompileTime)> SquareMatrixType;
 #endif // not EIGEN_PARSED_BY_DOXYGEN
 
     /** \returns the size of the main diagonal, which is min(rows(),cols()).
@@ -107,10 +109,10 @@ template<typename Derived> class MatrixBase
     /** \internal Represents a matrix with all coefficients equal to one another*/
     typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,PlainObject> ConstantReturnType;
     /** \internal the return type of MatrixBase::adjoint() */
-    typedef typename internal::conditional<NumTraits<Scalar>::IsComplex,
-                        CwiseUnaryOp<internal::scalar_conjugate_op<Scalar>, ConstTransposeReturnType>,
-                        ConstTransposeReturnType
-                     >::type AdjointReturnType;
+    typedef std::conditional_t<NumTraits<Scalar>::IsComplex,
+               CwiseUnaryOp<internal::scalar_conjugate_op<Scalar>, ConstTransposeReturnType>,
+               ConstTransposeReturnType
+            > AdjointReturnType;
     /** \internal Return type of eigenvalues() */
     typedef Matrix<std::complex<RealScalar>, internal::traits<Derived>::ColsAtCompileTime, 1, ColMajor> EigenvaluesReturnType;
     /** \internal the return type of identity */
@@ -184,6 +186,11 @@ template<typename Derived> class MatrixBase
     const Product<Derived, DiagonalDerived, LazyProduct>
     operator*(const DiagonalBase<DiagonalDerived> &diagonal) const;
 
+    template<typename SkewDerived>
+    EIGEN_DEVICE_FUNC
+    const Product<Derived, SkewDerived, LazyProduct>
+    operator*(const SkewSymmetricBase<SkewDerived> &skew) const;
+
     template<typename OtherDerived>
     EIGEN_DEVICE_FUNC
     typename ScalarBinaryOpTraits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType
@@ -206,28 +213,22 @@ template<typename Derived> class MatrixBase
     EIGEN_DEVICE_FUNC
     DiagonalReturnType diagonal();
 
-    typedef typename internal::add_const<Diagonal<const Derived> >::type ConstDiagonalReturnType;
+    typedef Diagonal<const Derived> ConstDiagonalReturnType;
     EIGEN_DEVICE_FUNC
-    ConstDiagonalReturnType diagonal() const;
-
-    template<int Index> struct DiagonalIndexReturnType { typedef Diagonal<Derived,Index> Type; };
-    template<int Index> struct ConstDiagonalIndexReturnType { typedef const Diagonal<const Derived,Index> Type; };
+    const ConstDiagonalReturnType diagonal() const;
 
     template<int Index>
     EIGEN_DEVICE_FUNC
-    typename DiagonalIndexReturnType<Index>::Type diagonal();
+    Diagonal<Derived, Index> diagonal();
 
     template<int Index>
     EIGEN_DEVICE_FUNC
-    typename ConstDiagonalIndexReturnType<Index>::Type diagonal() const;
-
-    typedef Diagonal<Derived,DynamicIndex> DiagonalDynamicIndexReturnType;
-    typedef typename internal::add_const<Diagonal<const Derived,DynamicIndex> >::type ConstDiagonalDynamicIndexReturnType;
+    const Diagonal<const Derived, Index> diagonal() const;
 
     EIGEN_DEVICE_FUNC
-    DiagonalDynamicIndexReturnType diagonal(Index index);
+    Diagonal<Derived, DynamicIndex> diagonal(Index index);
     EIGEN_DEVICE_FUNC
-    ConstDiagonalDynamicIndexReturnType diagonal(Index index) const;
+    const Diagonal<const Derived, DynamicIndex> diagonal(Index index) const;
 
     template<unsigned int Mode> struct TriangularViewReturnType { typedef TriangularView<Derived, Mode> Type; };
     template<unsigned int Mode> struct ConstTriangularViewReturnType { typedef const TriangularView<const Derived, Mode> Type; };
@@ -263,6 +264,8 @@ template<typename Derived> class MatrixBase
     EIGEN_DEVICE_FUNC
     const DiagonalWrapper<const Derived> asDiagonal() const;
     const PermutationWrapper<const Derived> asPermutation() const;
+    EIGEN_DEVICE_FUNC
+    const SkewSymmetricWrapper<const Derived> asSkewSymmetric() const;
 
     EIGEN_DEVICE_FUNC
     Derived& setIdentity();
@@ -277,6 +280,8 @@ template<typename Derived> class MatrixBase
     bool isUpperTriangular(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
     bool isLowerTriangular(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
 
+    bool isSkewSymmetric(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+
     template<typename OtherDerived>
     bool isOrthogonal(const MatrixBase<OtherDerived>& other,
                       const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
@@ -368,25 +373,23 @@ template<typename Derived> class MatrixBase
 
 /////////// SVD module ///////////
 
-    inline JacobiSVD<PlainObject> jacobiSvd(unsigned int computationOptions = 0) const;
-    inline BDCSVD<PlainObject>    bdcSvd(unsigned int computationOptions = 0) const;
+    template<int Options = 0>
+    inline JacobiSVD<PlainObject, Options> jacobiSvd() const;
+    template<int Options = 0>
+    EIGEN_DEPRECATED
+    inline JacobiSVD<PlainObject, Options> jacobiSvd(unsigned int computationOptions) const;
+
+    template<int Options = 0>
+    inline BDCSVD<PlainObject, Options> bdcSvd() const;
+    template<int Options = 0>
+    EIGEN_DEPRECATED
+    inline BDCSVD<PlainObject, Options> bdcSvd(unsigned int computationOptions) const;
 
 /////////// Geometry module ///////////
 
-    #ifndef EIGEN_PARSED_BY_DOXYGEN
-    /// \internal helper struct to form the return type of the cross product
-    template<typename OtherDerived> struct cross_product_return_type {
-      typedef typename ScalarBinaryOpTraits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType Scalar;
-      typedef Matrix<Scalar,MatrixBase::RowsAtCompileTime,MatrixBase::ColsAtCompileTime> type;
-    };
-    #endif // EIGEN_PARSED_BY_DOXYGEN
     template<typename OtherDerived>
     EIGEN_DEVICE_FUNC
-#ifndef EIGEN_PARSED_BY_DOXYGEN
-    inline typename cross_product_return_type<OtherDerived>::type
-#else
-    inline PlainObject
-#endif
+    inline typename internal::cross_impl<Derived, OtherDerived>::return_type
     cross(const MatrixBase<OtherDerived>& other) const;
 
     template<typename OtherDerived>
@@ -468,11 +471,9 @@ template<typename Derived> class MatrixBase
     const MatrixFunctionReturnValue<Derived> matrixFunction(StemFunction f) const;
     EIGEN_MATRIX_FUNCTION(MatrixFunctionReturnValue, cosh, hyperbolic cosine)
     EIGEN_MATRIX_FUNCTION(MatrixFunctionReturnValue, sinh, hyperbolic sine)
-#if EIGEN_HAS_CXX11_MATH
     EIGEN_MATRIX_FUNCTION(MatrixFunctionReturnValue, atanh, inverse hyperbolic cosine)
     EIGEN_MATRIX_FUNCTION(MatrixFunctionReturnValue, acosh, inverse hyperbolic cosine)
     EIGEN_MATRIX_FUNCTION(MatrixFunctionReturnValue, asinh, inverse hyperbolic sine)
-#endif
     EIGEN_MATRIX_FUNCTION(MatrixFunctionReturnValue, cos, cosine)
     EIGEN_MATRIX_FUNCTION(MatrixFunctionReturnValue, sin, sine)
     EIGEN_MATRIX_FUNCTION(MatrixSquareRootReturnValue, sqrt, square root)
diff --git a/libs/eigen/Eigen/src/Core/NestByValue.h b/libs/eigen/Eigen/src/Core/NestByValue.h
index b427576..311cb5a 100644
--- a/libs/eigen/Eigen/src/Core/NestByValue.h
+++ b/libs/eigen/Eigen/src/Core/NestByValue.h
@@ -11,6 +11,8 @@
 #ifndef EIGEN_NESTBYVALUE_H
 #define EIGEN_NESTBYVALUE_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
@@ -41,6 +43,8 @@ template<typename ExpressionType> class NestByValue
   public:
 
     typedef typename internal::dense_xpr_base<NestByValue>::type Base;
+    static constexpr bool HasDirectAccess = internal::has_direct_access<ExpressionType>::ret;
+    
     EIGEN_DENSE_PUBLIC_INTERFACE(NestByValue)
 
     EIGEN_DEVICE_FUNC explicit inline NestByValue(const ExpressionType& matrix) : m_expression(matrix) {}
@@ -52,6 +56,18 @@ template<typename ExpressionType> class NestByValue
 
     EIGEN_DEVICE_FUNC const ExpressionType& nestedExpression() const { return m_expression; }
 
+    EIGEN_DEVICE_FUNC typename std::enable_if<HasDirectAccess, const Scalar*>::type data() const {
+      return m_expression.data();
+    }
+    
+    EIGEN_DEVICE_FUNC typename std::enable_if<HasDirectAccess, Index>::type innerStride() const {
+      return m_expression.innerStride();
+    }
+    
+    EIGEN_DEVICE_FUNC typename std::enable_if<HasDirectAccess, Index>::type outerStride() const {
+      return m_expression.outerStride();
+    }
+
   protected:
     const ExpressionType m_expression;
 };
diff --git a/libs/eigen/Eigen/src/Core/NoAlias.h b/libs/eigen/Eigen/src/Core/NoAlias.h
index 570283d..09c0aac 100644
--- a/libs/eigen/Eigen/src/Core/NoAlias.h
+++ b/libs/eigen/Eigen/src/Core/NoAlias.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_NOALIAS_H
 #define EIGEN_NOALIAS_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /** \class NoAlias
diff --git a/libs/eigen/Eigen/src/Core/NumTraits.h b/libs/eigen/Eigen/src/Core/NumTraits.h
index 72eac5a..53362ef 100644
--- a/libs/eigen/Eigen/src/Core/NumTraits.h
+++ b/libs/eigen/Eigen/src/Core/NumTraits.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_NUMTRAITS_H
 #define EIGEN_NUMTRAITS_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
@@ -61,10 +63,10 @@ struct default_digits_impl<T,false,false> // Floating point
 {
   EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
   static int run() {
-    using std::log;
+    using std::log2;
     using std::ceil;
     typedef typename NumTraits<T>::Real Real;
-    return int(ceil(-log(NumTraits<Real>::epsilon())/log(static_cast<Real>(2))));
+    return int(ceil(-log2(NumTraits<Real>::epsilon())));
   }
 };
 
@@ -83,17 +85,17 @@ namespace numext {
 // TODO: Replace by std::bit_cast (available in C++20)
 template <typename Tgt, typename Src>
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Tgt bit_cast(const Src& src) {
-#if EIGEN_HAS_TYPE_TRAITS
   // The behaviour of memcpy is not specified for non-trivially copyable types
   EIGEN_STATIC_ASSERT(std::is_trivially_copyable<Src>::value, THIS_TYPE_IS_NOT_SUPPORTED);
   EIGEN_STATIC_ASSERT(std::is_trivially_copyable<Tgt>::value && std::is_default_constructible<Tgt>::value,
                       THIS_TYPE_IS_NOT_SUPPORTED);
-#endif
-
   EIGEN_STATIC_ASSERT(sizeof(Src) == sizeof(Tgt), THIS_TYPE_IS_NOT_SUPPORTED);
+
   Tgt tgt;
+  // Load src into registers first. This allows the memcpy to be elided by CUDA.
+  const Src staged = src;
   EIGEN_USING_STD(memcpy)
-  memcpy(&tgt, &src, sizeof(Tgt));
+  memcpy(static_cast<void*>(&tgt),static_cast<const void*>(&staged), sizeof(Tgt));
   return tgt;
 }
 }  // namespace numext
@@ -162,11 +164,7 @@ template<typename T> struct GenericNumTraits
   };
 
   typedef T Real;
-  typedef typename internal::conditional<
-                     IsInteger,
-                     typename internal::conditional<sizeof(T)<=2, float, double>::type,
-                     T
-                   >::type NonInteger;
+  typedef std::conditional_t<IsInteger, std::conditional_t<sizeof(T)<=2, float, double>, T> NonInteger;
   typedef T Nested;
   typedef T Literal;
 
@@ -252,15 +250,15 @@ template<> struct NumTraits<long double>
   static inline long double dummy_precision() { return 1e-15l; }
 };
 
-template<typename _Real> struct NumTraits<std::complex<_Real> >
-  : GenericNumTraits<std::complex<_Real> >
+template<typename Real_> struct NumTraits<std::complex<Real_> >
+  : GenericNumTraits<std::complex<Real_> >
 {
-  typedef _Real Real;
-  typedef typename NumTraits<_Real>::Literal Literal;
+  typedef Real_ Real;
+  typedef typename NumTraits<Real_>::Literal Literal;
   enum {
     IsComplex = 1,
-    RequireInitialization = NumTraits<_Real>::RequireInitialization,
-    ReadCost = 2 * NumTraits<_Real>::ReadCost,
+    RequireInitialization = NumTraits<Real_>::RequireInitialization,
+    ReadCost = 2 * NumTraits<Real_>::ReadCost,
     AddCost = 2 * NumTraits<Real>::AddCost,
     MulCost = 4 * NumTraits<Real>::MulCost + 2 * NumTraits<Real>::AddCost
   };
diff --git a/libs/eigen/Eigen/src/Core/PartialReduxEvaluator.h b/libs/eigen/Eigen/src/Core/PartialReduxEvaluator.h
index 29abf35..693fc35 100644
--- a/libs/eigen/Eigen/src/Core/PartialReduxEvaluator.h
+++ b/libs/eigen/Eigen/src/Core/PartialReduxEvaluator.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_PARTIALREDUX_H
 #define EIGEN_PARTIALREDUX_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
@@ -29,7 +31,7 @@ namespace internal {
 *    some (optional) processing of the outcome, e.g., division by n for mean.
 *
 * For the vectorized path let's observe that the packet-size and outer-unrolling
-* are both decided by the assignement logic. So all we have to do is to decide
+* are both decided by the assignment logic. So all we have to do is to decide
 * on the inner unrolling.
 *
 * For the unrolling, we can reuse "internal::redux_vec_unroller" from Redux.h,
@@ -54,12 +56,17 @@ struct packetwise_redux_traits
 /* Value to be returned when size==0 , by default let's return 0 */
 template<typename PacketType,typename Func>
 EIGEN_DEVICE_FUNC
-PacketType packetwise_redux_empty_value(const Func& ) { return pset1<PacketType>(0); }
+PacketType packetwise_redux_empty_value(const Func& ) {
+  const typename unpacket_traits<PacketType>::type zero(0);
+  return pset1<PacketType>(zero);
+}
 
 /* For products the default is 1 */
 template<typename PacketType,typename Scalar>
 EIGEN_DEVICE_FUNC
-PacketType packetwise_redux_empty_value(const scalar_product_op<Scalar,Scalar>& ) { return pset1<PacketType>(1); }
+PacketType packetwise_redux_empty_value(const scalar_product_op<Scalar,Scalar>& ) {
+  return pset1<PacketType>(Scalar(1));
+}
 
 /* Perform the actual reduction */
 template<typename Func, typename Evaluator,
@@ -134,8 +141,8 @@ struct evaluator<PartialReduxExpr<ArgType, MemberOp, Direction> >
 {
   typedef PartialReduxExpr<ArgType, MemberOp, Direction> XprType;
   typedef typename internal::nested_eval<ArgType,1>::type ArgTypeNested;
-  typedef typename internal::add_const_on_value_type<ArgTypeNested>::type ConstArgTypeNested;
-  typedef typename internal::remove_all<ArgTypeNested>::type ArgTypeNestedCleaned;
+  typedef add_const_on_value_type_t<ArgTypeNested> ConstArgTypeNested;
+  typedef internal::remove_all_t<ArgTypeNested> ArgTypeNestedCleaned;
   typedef typename ArgType::Scalar InputScalar;
   typedef typename XprType::Scalar Scalar;
   enum {
@@ -147,16 +154,16 @@ struct evaluator<PartialReduxExpr<ArgType, MemberOp, Direction> >
                   : TraversalSize==0 ? 1
                   : int(TraversalSize) * int(evaluator<ArgType>::CoeffReadCost) + int(CostOpType::value),
     
-    _ArgFlags = evaluator<ArgType>::Flags,
+    ArgFlags_ = evaluator<ArgType>::Flags,
 
-    _Vectorizable =  bool(int(_ArgFlags)&PacketAccessBit)
+    Vectorizable_ =  bool(int(ArgFlags_)&PacketAccessBit)
                   && bool(MemberOp::Vectorizable)
-                  && (Direction==int(Vertical) ? bool(_ArgFlags&RowMajorBit) : (_ArgFlags&RowMajorBit)==0)
+                  && (Direction==int(Vertical) ? bool(ArgFlags_&RowMajorBit) : (ArgFlags_&RowMajorBit)==0)
                   && (TraversalSize!=0),
                   
     Flags = (traits<XprType>::Flags&RowMajorBit)
           | (evaluator<ArgType>::Flags&(HereditaryBits&(~RowMajorBit)))
-          | (_Vectorizable ? PacketAccessBit : 0)
+          | (Vectorizable_ ? PacketAccessBit : 0)
           | LinearAccessBit,
     
     Alignment = 0 // FIXME this will need to be improved once PartialReduxExpr is vectorized
diff --git a/libs/eigen/Eigen/src/Core/PermutationMatrix.h b/libs/eigen/Eigen/src/Core/PermutationMatrix.h
index 69401bf..73a7300 100644
--- a/libs/eigen/Eigen/src/Core/PermutationMatrix.h
+++ b/libs/eigen/Eigen/src/Core/PermutationMatrix.h
@@ -11,6 +11,8 @@
 #ifndef EIGEN_PERMUTATIONMATRIX_H
 #define EIGEN_PERMUTATIONMATRIX_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
@@ -269,13 +271,13 @@ class PermutationBase : public EigenBase<Derived>
 };
 
 namespace internal {
-template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex>
-struct traits<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndex> >
- : traits<Matrix<_StorageIndex,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >
+template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename StorageIndex_>
+struct traits<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, StorageIndex_> >
+ : traits<Matrix<StorageIndex_,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >
 {
   typedef PermutationStorage StorageKind;
-  typedef Matrix<_StorageIndex, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1> IndicesType;
-  typedef _StorageIndex StorageIndex;
+  typedef Matrix<StorageIndex_, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1> IndicesType;
+  typedef StorageIndex_ StorageIndex;
   typedef void Scalar;
 };
 }
@@ -287,14 +289,14 @@ struct traits<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _Storag
   *
   * \tparam SizeAtCompileTime the number of rows/cols, or Dynamic
   * \tparam MaxSizeAtCompileTime the maximum number of rows/cols, or Dynamic. This optional parameter defaults to SizeAtCompileTime. Most of the time, you should not have to specify it.
-  * \tparam _StorageIndex the integer type of the indices
+  * \tparam StorageIndex_ the integer type of the indices
   *
   * This class represents a permutation matrix, internally stored as a vector of integers.
   *
   * \sa class PermutationBase, class PermutationWrapper, class DiagonalMatrix
   */
-template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex>
-class PermutationMatrix : public PermutationBase<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndex> >
+template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename StorageIndex_>
+class PermutationMatrix : public PermutationBase<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, StorageIndex_> >
 {
     typedef PermutationBase<PermutationMatrix> Base;
     typedef internal::traits<PermutationMatrix> Traits;
@@ -389,20 +391,20 @@ class PermutationMatrix : public PermutationBase<PermutationMatrix<SizeAtCompile
 
 
 namespace internal {
-template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex, int _PacketAccess>
-struct traits<Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndex>,_PacketAccess> >
- : traits<Matrix<_StorageIndex,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >
+template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename StorageIndex_, int PacketAccess_>
+struct traits<Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, StorageIndex_>,PacketAccess_> >
+ : traits<Matrix<StorageIndex_,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >
 {
   typedef PermutationStorage StorageKind;
-  typedef Map<const Matrix<_StorageIndex, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1>, _PacketAccess> IndicesType;
-  typedef _StorageIndex StorageIndex;
+  typedef Map<const Matrix<StorageIndex_, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1>, PacketAccess_> IndicesType;
+  typedef StorageIndex_ StorageIndex;
   typedef void Scalar;
 };
 }
 
-template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex, int _PacketAccess>
-class Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndex>,_PacketAccess>
-  : public PermutationBase<Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndex>,_PacketAccess> >
+template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename StorageIndex_, int PacketAccess_>
+class Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, StorageIndex_>,PacketAccess_>
+  : public PermutationBase<Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, StorageIndex_>,PacketAccess_> >
 {
     typedef PermutationBase<Map> Base;
     typedef internal::traits<Map> Traits;
@@ -452,18 +454,18 @@ class Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageInd
     IndicesType m_indices;
 };
 
-template<typename _IndicesType> class TranspositionsWrapper;
+template<typename IndicesType_> class TranspositionsWrapper;
 namespace internal {
-template<typename _IndicesType>
-struct traits<PermutationWrapper<_IndicesType> >
+template<typename IndicesType_>
+struct traits<PermutationWrapper<IndicesType_> >
 {
   typedef PermutationStorage StorageKind;
   typedef void Scalar;
-  typedef typename _IndicesType::Scalar StorageIndex;
-  typedef _IndicesType IndicesType;
+  typedef typename IndicesType_::Scalar StorageIndex;
+  typedef IndicesType_ IndicesType;
   enum {
-    RowsAtCompileTime = _IndicesType::SizeAtCompileTime,
-    ColsAtCompileTime = _IndicesType::SizeAtCompileTime,
+    RowsAtCompileTime = IndicesType_::SizeAtCompileTime,
+    ColsAtCompileTime = IndicesType_::SizeAtCompileTime,
     MaxRowsAtCompileTime = IndicesType::MaxSizeAtCompileTime,
     MaxColsAtCompileTime = IndicesType::MaxSizeAtCompileTime,
     Flags = 0
@@ -476,14 +478,14 @@ struct traits<PermutationWrapper<_IndicesType> >
   *
   * \brief Class to view a vector of integers as a permutation matrix
   *
-  * \tparam _IndicesType the type of the vector of integer (can be any compatible expression)
+  * \tparam IndicesType_ the type of the vector of integer (can be any compatible expression)
   *
   * This class allows to view any vector expression of integers as a permutation matrix.
   *
   * \sa class PermutationBase, class PermutationMatrix
   */
-template<typename _IndicesType>
-class PermutationWrapper : public PermutationBase<PermutationWrapper<_IndicesType> >
+template<typename IndicesType_>
+class PermutationWrapper : public PermutationBase<PermutationWrapper<IndicesType_> >
 {
     typedef PermutationBase<PermutationWrapper> Base;
     typedef internal::traits<PermutationWrapper> Traits;
@@ -498,7 +500,7 @@ class PermutationWrapper : public PermutationBase<PermutationWrapper<_IndicesTyp
     {}
 
     /** const version of indices(). */
-    const typename internal::remove_all<typename IndicesType::Nested>::type&
+    const internal::remove_all_t<typename IndicesType::Nested>&
     indices() const { return m_indices; }
 
   protected:
diff --git a/libs/eigen/Eigen/src/Core/PlainObjectBase.h b/libs/eigen/Eigen/src/Core/PlainObjectBase.h
index e2ddbd1..60a75b1 100644
--- a/libs/eigen/Eigen/src/Core/PlainObjectBase.h
+++ b/libs/eigen/Eigen/src/Core/PlainObjectBase.h
@@ -22,23 +22,20 @@
 # define EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
 #endif
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
 
 template<int MaxSizeAtCompileTime> struct check_rows_cols_for_overflow {
-  template<typename Index>
-  EIGEN_DEVICE_FUNC
-  static EIGEN_ALWAYS_INLINE void run(Index, Index)
-  {
-  }
+  template <typename Index>
+  EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE constexpr void run(Index, Index) {}
 };
 
 template<> struct check_rows_cols_for_overflow<Dynamic> {
-  template<typename Index>
-  EIGEN_DEVICE_FUNC
-  static EIGEN_ALWAYS_INLINE void run(Index rows, Index cols)
-  {
+  template <typename Index>
+  EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE constexpr void run(Index rows, Index cols) {
     // http://hg.mozilla.org/mozilla-central/file/6c8a909977d3/xpcom/ds/CheckedInt.h#l242
     // we assume Index is signed
     Index max_index = (std::size_t(1) << (8 * sizeof(Index) - 1)) - 1; // assume Index is signed
@@ -64,18 +61,18 @@ namespace doxygen {
 // This is a workaround to doxygen not being able to understand the inheritance logic
 // when it is hidden by the dense_xpr_base helper struct.
 // Moreover, doxygen fails to include members that are not documented in the declaration body of
-// MatrixBase if we inherits MatrixBase<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >,
+// MatrixBase if we inherits MatrixBase<Matrix<Scalar_, Rows_, Cols_, Options_, MaxRows_, MaxCols_> >,
 // this is why we simply inherits MatrixBase, though this does not make sense.
 
 /** This class is just a workaround for Doxygen and it does not not actually exist. */
 template<typename Derived> struct dense_xpr_base_dispatcher;
 /** This class is just a workaround for Doxygen and it does not not actually exist. */
-template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
-struct dense_xpr_base_dispatcher<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
+template<typename Scalar_, int Rows_, int Cols_, int Options_, int MaxRows_, int MaxCols_>
+struct dense_xpr_base_dispatcher<Matrix<Scalar_, Rows_, Cols_, Options_, MaxRows_, MaxCols_> >
     : public MatrixBase {};
 /** This class is just a workaround for Doxygen and it does not not actually exist. */
-template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
-struct dense_xpr_base_dispatcher<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
+template<typename Scalar_, int Rows_, int Cols_, int Options_, int MaxRows_, int MaxCols_>
+struct dense_xpr_base_dispatcher<Array<Scalar_, Rows_, Cols_, Options_, MaxRows_, MaxCols_> >
     : public ArrayBase {};
 
 } // namespace doxygen
@@ -134,6 +131,16 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
     enum { NeedsToAlign = (SizeAtCompileTime != Dynamic) && (internal::traits<Derived>::Alignment>0) };
     EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
 
+    EIGEN_STATIC_ASSERT(internal::check_implication(MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1, (int(Options)&RowMajor)==RowMajor), INVALID_MATRIX_TEMPLATE_PARAMETERS)
+    EIGEN_STATIC_ASSERT(internal::check_implication(MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1, (int(Options)&RowMajor)==0), INVALID_MATRIX_TEMPLATE_PARAMETERS)
+    EIGEN_STATIC_ASSERT((RowsAtCompileTime == Dynamic) || (RowsAtCompileTime >= 0), INVALID_MATRIX_TEMPLATE_PARAMETERS)
+    EIGEN_STATIC_ASSERT((ColsAtCompileTime == Dynamic) || (ColsAtCompileTime >= 0), INVALID_MATRIX_TEMPLATE_PARAMETERS)
+    EIGEN_STATIC_ASSERT((MaxRowsAtCompileTime == Dynamic) || (MaxRowsAtCompileTime >= 0), INVALID_MATRIX_TEMPLATE_PARAMETERS)
+    EIGEN_STATIC_ASSERT((MaxColsAtCompileTime == Dynamic) || (MaxColsAtCompileTime >= 0), INVALID_MATRIX_TEMPLATE_PARAMETERS)
+    EIGEN_STATIC_ASSERT((MaxRowsAtCompileTime == RowsAtCompileTime || RowsAtCompileTime==Dynamic), INVALID_MATRIX_TEMPLATE_PARAMETERS)
+    EIGEN_STATIC_ASSERT((MaxColsAtCompileTime == ColsAtCompileTime || ColsAtCompileTime==Dynamic), INVALID_MATRIX_TEMPLATE_PARAMETERS)
+    EIGEN_STATIC_ASSERT(((Options & (DontAlign|RowMajor)) == Options), INVALID_MATRIX_TEMPLATE_PARAMETERS)
+
     EIGEN_DEVICE_FUNC
     Base& base() { return *static_cast<Base*>(this); }
     EIGEN_DEVICE_FUNC
@@ -148,12 +155,10 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
       * provided to by-pass the creation of an evaluator of the expression, thus saving compilation efforts.
       *
       * See DenseCoeffsBase<Derived,ReadOnlyAccessors>::coeff(Index) const for details. */
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const Scalar& coeff(Index rowId, Index colId) const
-    {
-      if(Flags & RowMajorBit)
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const Scalar& coeff(Index rowId, Index colId) const {
+      if (Flags & RowMajorBit)
         return m_storage.data()[colId + rowId * m_storage.cols()];
-      else // column-major
+      else  // column-major
         return m_storage.data()[rowId + colId * m_storage.rows()];
     }
 
@@ -171,12 +176,10 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
       * provided to by-pass the creation of an evaluator of the expression, thus saving compilation efforts.
       *
       * See DenseCoeffsBase<Derived,WriteAccessors>::coeffRef(Index,Index) const for details. */
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Scalar& coeffRef(Index rowId, Index colId)
-    {
-      if(Flags & RowMajorBit)
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Scalar& coeffRef(Index rowId, Index colId) {
+      if (Flags & RowMajorBit)
         return m_storage.data()[colId + rowId * m_storage.cols()];
-      else // column-major
+      else  // column-major
         return m_storage.data()[rowId + colId * m_storage.rows()];
     }
 
@@ -184,28 +187,20 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
       * provided to by-pass the creation of an evaluator of the expression, thus saving compilation efforts.
       *
       * See DenseCoeffsBase<Derived,WriteAccessors>::coeffRef(Index) const for details. */
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Scalar& coeffRef(Index index)
-    {
-      return m_storage.data()[index];
-    }
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Scalar& coeffRef(Index index) { return m_storage.data()[index]; }
 
     /** This is the const version of coeffRef(Index,Index) which is thus synonym of coeff(Index,Index).
       * It is provided for convenience. */
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const Scalar& coeffRef(Index rowId, Index colId) const
-    {
-      if(Flags & RowMajorBit)
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const Scalar& coeffRef(Index rowId, Index colId) const {
+      if (Flags & RowMajorBit)
         return m_storage.data()[colId + rowId * m_storage.cols()];
-      else // column-major
+      else  // column-major
         return m_storage.data()[rowId + colId * m_storage.rows()];
     }
 
     /** This is the const version of coeffRef(Index) which is thus synonym of coeff(Index).
       * It is provided for convenience. */
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const Scalar& coeffRef(Index index) const
-    {
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const Scalar& coeffRef(Index index) const {
       return m_storage.data()[index];
     }
 
@@ -267,13 +262,11 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
       *
       * \sa resize(Index) for vectors, resize(NoChange_t, Index), resize(Index, NoChange_t)
       */
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE void resize(Index rows, Index cols)
-    {
-      eigen_assert(   EIGEN_IMPLIES(RowsAtCompileTime!=Dynamic,rows==RowsAtCompileTime)
-                   && EIGEN_IMPLIES(ColsAtCompileTime!=Dynamic,cols==ColsAtCompileTime)
-                   && EIGEN_IMPLIES(RowsAtCompileTime==Dynamic && MaxRowsAtCompileTime!=Dynamic,rows<=MaxRowsAtCompileTime)
-                   && EIGEN_IMPLIES(ColsAtCompileTime==Dynamic && MaxColsAtCompileTime!=Dynamic,cols<=MaxColsAtCompileTime)
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize(Index rows, Index cols) {
+      eigen_assert(internal::check_implication(RowsAtCompileTime!=Dynamic, rows==RowsAtCompileTime)
+                   && internal::check_implication(ColsAtCompileTime!=Dynamic, cols==ColsAtCompileTime)
+                   && internal::check_implication(RowsAtCompileTime==Dynamic && MaxRowsAtCompileTime!=Dynamic, rows<=MaxRowsAtCompileTime)
+                   && internal::check_implication(ColsAtCompileTime==Dynamic && MaxColsAtCompileTime!=Dynamic, cols<=MaxColsAtCompileTime)
                    && rows>=0 && cols>=0 && "Invalid sizes when resizing a matrix or array.");
       internal::check_rows_cols_for_overflow<MaxSizeAtCompileTime>::run(rows, cols);
       #ifdef EIGEN_INITIALIZE_COEFFS
@@ -297,12 +290,13 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
       *
       * \sa resize(Index,Index), resize(NoChange_t, Index), resize(Index, NoChange_t)
       */
-    EIGEN_DEVICE_FUNC
-    inline void resize(Index size)
-    {
-      EIGEN_STATIC_ASSERT_VECTOR_ONLY(PlainObjectBase)
-      eigen_assert(((SizeAtCompileTime == Dynamic && (MaxSizeAtCompileTime==Dynamic || size<=MaxSizeAtCompileTime)) || SizeAtCompileTime == size) && size>=0);
-      #ifdef EIGEN_INITIALIZE_COEFFS
+    EIGEN_DEVICE_FUNC inline constexpr void resize(Index size) {
+        EIGEN_STATIC_ASSERT_VECTOR_ONLY(PlainObjectBase)
+        eigen_assert(
+            ((SizeAtCompileTime == Dynamic && (MaxSizeAtCompileTime == Dynamic || size <= MaxSizeAtCompileTime)) ||
+             SizeAtCompileTime == size) &&
+            size >= 0);
+#ifdef EIGEN_INITIALIZE_COEFFS
         bool size_changed = size != this->size();
       #endif
       if(RowsAtCompileTime == 1)
@@ -322,11 +316,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
       *
       * \sa resize(Index,Index)
       */
-    EIGEN_DEVICE_FUNC
-    inline void resize(NoChange_t, Index cols)
-    {
-      resize(rows(), cols);
-    }
+    EIGEN_DEVICE_FUNC inline constexpr void resize(NoChange_t, Index cols) { resize(rows(), cols); }
 
     /** Resizes the matrix, changing only the number of rows. For the parameter of type NoChange_t, just pass the special value \c NoChange
       * as in the example below.
@@ -336,11 +326,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
       *
       * \sa resize(Index,Index)
       */
-    EIGEN_DEVICE_FUNC
-    inline void resize(Index rows, NoChange_t)
-    {
-      resize(rows, cols());
-    }
+    EIGEN_DEVICE_FUNC inline constexpr void resize(Index rows, NoChange_t) { resize(rows, cols()); }
 
     /** Resizes \c *this to have the same dimensions as \a other.
       * Takes care of doing all the checking that's needed.
@@ -475,7 +461,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
     EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE PlainObjectBase() : m_storage()
     {
-//       _check_template_params();
 //       EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
     }
 
@@ -486,11 +471,10 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
     explicit PlainObjectBase(internal::constructor_without_unaligned_array_assert)
       : m_storage(internal::constructor_without_unaligned_array_assert())
     {
-//       _check_template_params(); EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
+      // EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
     }
 #endif
 
-#if EIGEN_HAS_RVALUE_REFERENCES
     EIGEN_DEVICE_FUNC
     PlainObjectBase(PlainObjectBase&& other) EIGEN_NOEXCEPT
       : m_storage( std::move(other.m_storage) )
@@ -500,11 +484,9 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
     EIGEN_DEVICE_FUNC
     PlainObjectBase& operator=(PlainObjectBase&& other) EIGEN_NOEXCEPT
     {
-      _check_template_params();
       m_storage = std::move(other.m_storage);
       return *this;
     }
-#endif
 
     /** Copy constructor */
     EIGEN_DEVICE_FUNC
@@ -514,17 +496,14 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
     EIGEN_STRONG_INLINE PlainObjectBase(Index size, Index rows, Index cols)
       : m_storage(size, rows, cols)
     {
-//       _check_template_params();
 //       EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
     }
 
-    #if EIGEN_HAS_CXX11
-    /** \brief Construct a row of column vector with fixed size from an arbitrary number of coefficients. \cpp11
+    /** \brief Construct a row of column vector with fixed size from an arbitrary number of coefficients.
       *
       * \only_for_vectors
       *
       * This constructor is for 1D array or vectors with more than 4 coefficients.
-      * There exists C++98 analogue constructors for fixed-size array/vector having 1, 2, 3, or 4 coefficients.
       *
       * \warning To construct a column (resp. row) vector of fixed length, the number of values passed to this
       * constructor must match the the fixed number of rows (resp. columns) of \c *this.
@@ -534,7 +513,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
     PlainObjectBase(const Scalar& a0, const Scalar& a1, const Scalar& a2,  const Scalar& a3, const ArgTypes&... args)
       : m_storage()
     {
-      _check_template_params();
       EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, sizeof...(args) + 4);
       m_storage.data()[0] = a0;
       m_storage.data()[1] = a1;
@@ -546,14 +524,11 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
     }
 
     /** \brief Constructs a Matrix or Array and initializes it by elements given by an initializer list of initializer
-      * lists \cpp11
+      * lists
       */
-    EIGEN_DEVICE_FUNC
-    explicit EIGEN_STRONG_INLINE PlainObjectBase(const std::initializer_list<std::initializer_list<Scalar>>& list)
-      : m_storage()
-    {
-      _check_template_params();
-
+    EIGEN_DEVICE_FUNC explicit constexpr EIGEN_STRONG_INLINE PlainObjectBase(
+        const std::initializer_list<std::initializer_list<Scalar>>& list)
+        : m_storage() {
       size_t list_size = 0;
       if (list.begin() != list.end()) {
         list_size = list.begin()->size();
@@ -581,7 +556,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
         }
       }
     }
-    #endif  // end EIGEN_HAS_CXX11
 
     /** \sa PlainObjectBase::operator=(const EigenBase<OtherDerived>&) */
     template<typename OtherDerived>
@@ -589,7 +563,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
     EIGEN_STRONG_INLINE PlainObjectBase(const DenseBase<OtherDerived> &other)
       : m_storage()
     {
-      _check_template_params();
       resizeLike(other);
       _set_noalias(other);
     }
@@ -600,7 +573,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
     EIGEN_STRONG_INLINE PlainObjectBase(const EigenBase<OtherDerived> &other)
       : m_storage()
     {
-      _check_template_params();
       resizeLike(other);
       *this = other.derived();
     }
@@ -609,7 +581,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
     EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE PlainObjectBase(const ReturnByValue<OtherDerived>& other)
     {
-      _check_template_params();
       // FIXME this does not automatically transpose vectors if necessary
       resize(other.rows(), other.cols());
       other.evalTo(this->derived());
@@ -640,7 +611,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
       *
       * \see class Map
       */
-    //@{
+    ///@{
     static inline ConstMapType Map(const Scalar* data)
     { return ConstMapType(data); }
     static inline MapType Map(Scalar* data)
@@ -704,7 +675,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
     template<int Outer, int Inner>
     static inline typename StridedAlignedMapType<Stride<Outer, Inner> >::type MapAligned(Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
     { return typename StridedAlignedMapType<Stride<Outer, Inner> >::type(data, rows, cols, stride); }
-    //@}
+    ///@}
 
     using Base::setConstant;
     EIGEN_DEVICE_FUNC Derived& setConstant(Index size, const Scalar& val);
@@ -800,7 +771,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
 
     template<typename T0, typename T1>
     EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE void _init2(Index rows, Index cols, typename internal::enable_if<Base::SizeAtCompileTime!=2,T0>::type* = 0)
+    EIGEN_STRONG_INLINE void _init2(Index rows, Index cols, std::enable_if_t<Base::SizeAtCompileTime!=2,T0>* = 0)
     {
       const bool t0_is_integer_alike = internal::is_valid_index_type<T0>::value;
       const bool t1_is_integer_alike = internal::is_valid_index_type<T1>::value;
@@ -812,7 +783,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
 
     template<typename T0, typename T1>
     EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE void _init2(const T0& val0, const T1& val1, typename internal::enable_if<Base::SizeAtCompileTime==2,T0>::type* = 0)
+    EIGEN_STRONG_INLINE void _init2(const T0& val0, const T1& val1, std::enable_if_t<Base::SizeAtCompileTime==2,T0>* = 0)
     {
       EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 2)
       m_storage.data()[0] = Scalar(val0);
@@ -822,10 +793,10 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
     template<typename T0, typename T1>
     EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE void _init2(const Index& val0, const Index& val1,
-                                    typename internal::enable_if<    (!internal::is_same<Index,Scalar>::value)
-                                                                  && (internal::is_same<T0,Index>::value)
-                                                                  && (internal::is_same<T1,Index>::value)
-                                                                  && Base::SizeAtCompileTime==2,T1>::type* = 0)
+                                    std::enable_if_t<    (!internal::is_same<Index,Scalar>::value)
+                                                      && (internal::is_same<T0,Index>::value)
+                                                      && (internal::is_same<T1,Index>::value)
+                                                      && Base::SizeAtCompileTime==2,T1>* = 0)
     {
       EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 2)
       m_storage.data()[0] = Scalar(val0);
@@ -836,8 +807,8 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
     // then the argument is meant to be the size of the object.
     template<typename T>
     EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE void _init1(Index size, typename internal::enable_if<    (Base::SizeAtCompileTime!=1 || !internal::is_convertible<T, Scalar>::value)
-                                                                              && ((!internal::is_same<typename internal::traits<Derived>::XprKind,ArrayXpr>::value || Base::SizeAtCompileTime==Dynamic)),T>::type* = 0)
+    EIGEN_STRONG_INLINE void _init1(Index size, std::enable_if_t<    (Base::SizeAtCompileTime!=1 || !internal::is_convertible<T, Scalar>::value)
+                                                                  && ((!internal::is_same<typename internal::traits<Derived>::XprKind,ArrayXpr>::value || Base::SizeAtCompileTime==Dynamic)),T>* = 0)
     {
       // NOTE MSVC 2008 complains if we directly put bool(NumTraits<T>::IsInteger) as the EIGEN_STATIC_ASSERT argument.
       const bool is_integer_alike = internal::is_valid_index_type<T>::value;
@@ -850,7 +821,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
     // We have a 1x1 matrix/array => the argument is interpreted as the value of the unique coefficient (case where scalar type can be implicitly converted)
     template<typename T>
     EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE void _init1(const Scalar& val0, typename internal::enable_if<Base::SizeAtCompileTime==1 && internal::is_convertible<T, Scalar>::value,T>::type* = 0)
+    EIGEN_STRONG_INLINE void _init1(const Scalar& val0, std::enable_if_t<Base::SizeAtCompileTime==1 && internal::is_convertible<T, Scalar>::value,T>* = 0)
     {
       EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 1)
       m_storage.data()[0] = val0;
@@ -860,10 +831,10 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
     template<typename T>
     EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE void _init1(const Index& val0,
-                                    typename internal::enable_if<    (!internal::is_same<Index,Scalar>::value)
-                                                                  && (internal::is_same<Index,T>::value)
-                                                                  && Base::SizeAtCompileTime==1
-                                                                  && internal::is_convertible<T, Scalar>::value,T*>::type* = 0)
+                                    std::enable_if_t<    (!internal::is_same<Index,Scalar>::value)
+                                                      && (internal::is_same<Index,T>::value)
+                                                      && Base::SizeAtCompileTime==1
+                                                      && internal::is_convertible<T, Scalar>::value,T*>* = 0)
     {
       EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 1)
       m_storage.data()[0] = Scalar(val0);
@@ -916,10 +887,10 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
     template<typename T>
     EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE void _init1(const Scalar& val0,
-                                    typename internal::enable_if<    Base::SizeAtCompileTime!=Dynamic
-                                                                  && Base::SizeAtCompileTime!=1
-                                                                  && internal::is_convertible<T, Scalar>::value
-                                                                  && internal::is_same<typename internal::traits<Derived>::XprKind,ArrayXpr>::value,T>::type* = 0)
+                                    std::enable_if_t<    Base::SizeAtCompileTime!=Dynamic
+                                                      && Base::SizeAtCompileTime!=1
+                                                      && internal::is_convertible<T, Scalar>::value
+                                                      && internal::is_same<typename internal::traits<Derived>::XprKind,ArrayXpr>::value,T>* = 0)
     {
       Base::setConstant(val0);
     }
@@ -928,12 +899,12 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
     template<typename T>
     EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE void _init1(const Index& val0,
-                                    typename internal::enable_if<    (!internal::is_same<Index,Scalar>::value)
-                                                                  && (internal::is_same<Index,T>::value)
-                                                                  && Base::SizeAtCompileTime!=Dynamic
-                                                                  && Base::SizeAtCompileTime!=1
-                                                                  && internal::is_convertible<T, Scalar>::value
-                                                                  && internal::is_same<typename internal::traits<Derived>::XprKind,ArrayXpr>::value,T*>::type* = 0)
+                                    std::enable_if_t<    (!internal::is_same<Index,Scalar>::value)
+                                                      && (internal::is_same<Index,T>::value)
+                                                      && Base::SizeAtCompileTime!=Dynamic
+                                                      && Base::SizeAtCompileTime!=1
+                                                      && internal::is_convertible<T, Scalar>::value
+                                                      && internal::is_same<typename internal::traits<Derived>::XprKind,ArrayXpr>::value,T*>* = 0)
     {
       Base::setConstant(val0);
     }
@@ -964,21 +935,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
     void swap(DenseBase<OtherDerived> const & other)
     { Base::swap(other.derived()); }
 
-    EIGEN_DEVICE_FUNC
-    static EIGEN_STRONG_INLINE void _check_template_params()
-    {
-      EIGEN_STATIC_ASSERT((EIGEN_IMPLIES(MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1, (int(Options)&RowMajor)==RowMajor)
-                        && EIGEN_IMPLIES(MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1, (int(Options)&RowMajor)==0)
-                        && ((RowsAtCompileTime == Dynamic) || (RowsAtCompileTime >= 0))
-                        && ((ColsAtCompileTime == Dynamic) || (ColsAtCompileTime >= 0))
-                        && ((MaxRowsAtCompileTime == Dynamic) || (MaxRowsAtCompileTime >= 0))
-                        && ((MaxColsAtCompileTime == Dynamic) || (MaxColsAtCompileTime >= 0))
-                        && (MaxRowsAtCompileTime == RowsAtCompileTime || RowsAtCompileTime==Dynamic)
-                        && (MaxColsAtCompileTime == ColsAtCompileTime || ColsAtCompileTime==Dynamic)
-                        && (Options & (DontAlign|RowMajor)) == Options),
-        INVALID_MATRIX_TEMPLATE_PARAMETERS)
-    }
-
     enum { IsPlainObjectBase = 1 };
 #endif
   public:
@@ -999,11 +955,7 @@ namespace internal {
 template <typename Derived, typename OtherDerived, bool IsVector>
 struct conservative_resize_like_impl
 {
-  #if EIGEN_HAS_TYPE_TRAITS
-  static const bool IsRelocatable = std::is_trivially_copyable<typename Derived::Scalar>::value;
-  #else
-  static const bool IsRelocatable = !NumTraits<typename Derived::Scalar>::RequireInitialization;
-  #endif
+  static constexpr bool IsRelocatable = std::is_trivially_copyable<typename Derived::Scalar>::value;
   static void run(DenseBase<Derived>& _this, Index rows, Index cols)
   {
     if (_this.rows() == rows && _this.cols() == cols) return;
diff --git a/libs/eigen/Eigen/src/Core/Product.h b/libs/eigen/Eigen/src/Core/Product.h
index 70a6c10..85842d1 100644
--- a/libs/eigen/Eigen/src/Core/Product.h
+++ b/libs/eigen/Eigen/src/Core/Product.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_PRODUCT_H
 #define EIGEN_PRODUCT_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 template<typename Lhs, typename Rhs, int Option, typename StorageKind> class ProductImpl;
@@ -19,8 +21,8 @@ namespace internal {
 template<typename Lhs, typename Rhs, int Option>
 struct traits<Product<Lhs, Rhs, Option> >
 {
-  typedef typename remove_all<Lhs>::type LhsCleaned;
-  typedef typename remove_all<Rhs>::type RhsCleaned;
+  typedef remove_all_t<Lhs> LhsCleaned;
+  typedef remove_all_t<Rhs> RhsCleaned;
   typedef traits<LhsCleaned> LhsTraits;
   typedef traits<RhsCleaned> RhsTraits;
 
@@ -40,7 +42,7 @@ struct traits<Product<Lhs, Rhs, Option> >
     MaxColsAtCompileTime = RhsTraits::MaxColsAtCompileTime,
 
     // FIXME: only needed by GeneralMatrixMatrixTriangular
-    InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(LhsTraits::ColsAtCompileTime, RhsTraits::RowsAtCompileTime),
+    InnerSize = min_size_prefer_fixed(LhsTraits::ColsAtCompileTime, RhsTraits::RowsAtCompileTime),
 
     // The storage order is somewhat arbitrary here. The correct one will be determined through the evaluator.
     Flags = (MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1) ? RowMajorBit
@@ -58,8 +60,8 @@ struct traits<Product<Lhs, Rhs, Option> >
   *
   * \brief Expression of the product of two arbitrary matrices or vectors
   *
-  * \tparam _Lhs the type of the left-hand side expression
-  * \tparam _Rhs the type of the right-hand side expression
+  * \tparam Lhs_ the type of the left-hand side expression
+  * \tparam Rhs_ the type of the right-hand side expression
   *
   * This class represents an expression of the product of two arbitrary matrices.
   *
@@ -67,16 +69,16 @@ struct traits<Product<Lhs, Rhs, Option> >
   * \tparam Option     can be DefaultProduct, AliasFreeProduct, or LazyProduct
   *
   */
-template<typename _Lhs, typename _Rhs, int Option>
-class Product : public ProductImpl<_Lhs,_Rhs,Option,
-                                   typename internal::product_promote_storage_type<typename internal::traits<_Lhs>::StorageKind,
-                                                                                   typename internal::traits<_Rhs>::StorageKind,
-                                                                                   internal::product_type<_Lhs,_Rhs>::ret>::ret>
+template<typename Lhs_, typename Rhs_, int Option>
+class Product : public ProductImpl<Lhs_,Rhs_,Option,
+                                   typename internal::product_promote_storage_type<typename internal::traits<Lhs_>::StorageKind,
+                                                                                   typename internal::traits<Rhs_>::StorageKind,
+                                                                                   internal::product_type<Lhs_,Rhs_>::ret>::ret>
 {
   public:
 
-    typedef _Lhs Lhs;
-    typedef _Rhs Rhs;
+    typedef Lhs_ Lhs;
+    typedef Rhs_ Rhs;
 
     typedef typename ProductImpl<
         Lhs, Rhs, Option,
@@ -87,8 +89,8 @@ class Product : public ProductImpl<_Lhs,_Rhs,Option,
 
     typedef typename internal::ref_selector<Lhs>::type LhsNested;
     typedef typename internal::ref_selector<Rhs>::type RhsNested;
-    typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
-    typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;
+    typedef internal::remove_all_t<LhsNested> LhsNestedCleaned;
+    typedef internal::remove_all_t<RhsNested> RhsNestedCleaned;
 
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     Product(const Lhs& lhs, const Rhs& rhs) : m_lhs(lhs), m_rhs(rhs)
diff --git a/libs/eigen/Eigen/src/Core/ProductEvaluators.h b/libs/eigen/Eigen/src/Core/ProductEvaluators.h
index 8cf294b..9da2406 100644
--- a/libs/eigen/Eigen/src/Core/ProductEvaluators.h
+++ b/libs/eigen/Eigen/src/Core/ProductEvaluators.h
@@ -13,6 +13,8 @@
 #ifndef EIGEN_PRODUCTEVALUATORS_H
 #define EIGEN_PRODUCTEVALUATORS_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
@@ -107,14 +109,14 @@ struct product_evaluator<Product<Lhs, Rhs, Options>, ProductTag, LhsShape, RhsSh
   explicit product_evaluator(const XprType& xpr)
     : m_result(xpr.rows(), xpr.cols())
   {
-    ::new (static_cast<Base*>(this)) Base(m_result);
+    internal::construct_at<Base>(this, m_result);
 
 // FIXME shall we handle nested_eval here?,
 // if so, then we must take care at removing the call to nested_eval in the specializations (e.g., in permutation_matrix_product, transposition_matrix_product, etc.)
 //     typedef typename internal::nested_eval<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
 //     typedef typename internal::nested_eval<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
-//     typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
-//     typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;
+//     typedef internal::remove_all_t<LhsNested> LhsNestedCleaned;
+//     typedef internal::remove_all_t<RhsNested> RhsNestedCleaned;
 //
 //     const LhsNested lhs(xpr.lhs());
 //     const RhsNested rhs(xpr.rhs());
@@ -134,7 +136,7 @@ protected:
 // Dense = Product
 template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>
 struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::assign_op<Scalar,Scalar>, Dense2Dense,
-  typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>
+  std::enable_if_t<(Options==DefaultProduct || Options==AliasFreeProduct)>>
 {
   typedef Product<Lhs,Rhs,Options> SrcXprType;
   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
@@ -152,7 +154,7 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::assign_op<Scal
 // Dense += Product
 template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>
 struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::add_assign_op<Scalar,Scalar>, Dense2Dense,
-  typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>
+  std::enable_if_t<(Options==DefaultProduct || Options==AliasFreeProduct)>>
 {
   typedef Product<Lhs,Rhs,Options> SrcXprType;
   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
@@ -167,7 +169,7 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::add_assign_op<
 // Dense -= Product
 template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>
 struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::sub_assign_op<Scalar,Scalar>, Dense2Dense,
-  typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>
+  std::enable_if_t<(Options==DefaultProduct || Options==AliasFreeProduct)>>
 {
   typedef Product<Lhs,Rhs,Options> SrcXprType;
   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
@@ -296,7 +298,7 @@ void EIGEN_DEVICE_FUNC outer_product_selector_run(Dst& dst, const Lhs &lhs, cons
 template<typename Lhs, typename Rhs>
 struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,OuterProduct>
 {
-  template<typename T> struct is_row_major : internal::conditional<(int(T::Flags)&RowMajorBit), internal::true_type, internal::false_type>::type {};
+  template<typename T> struct is_row_major : std::conditional_t<(int(T::Flags)&RowMajorBit), internal::true_type, internal::false_type> {};
   typedef typename Product<Lhs,Rhs>::Scalar Scalar;
 
   // TODO it would be nice to be able to exploit our *_assign_op functors for that purpose
@@ -370,7 +372,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct>
   typedef typename nested_eval<Rhs,1>::type RhsNested;
   typedef typename Product<Lhs,Rhs>::Scalar Scalar;
   enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight };
-  typedef typename internal::remove_all<typename internal::conditional<int(Side)==OnTheRight,LhsNested,RhsNested>::type>::type MatrixType;
+  typedef internal::remove_all_t<std::conditional_t<int(Side)==OnTheRight,LhsNested,RhsNested>> MatrixType;
 
   template<typename Dest>
   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
@@ -427,8 +429,8 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode>
   //  3 - it makes this fallback consistent with the heavy GEMM routine.
   //  4 - it fully by-passes huge stack allocation attempts when multiplying huge fixed-size matrices.
   //      (see https://stackoverflow.com/questions/54738495)
-  // For small fixed sizes matrices, howver, the gains are less obvious, it is sometimes x2 faster, but sometimes x3 slower,
-  // and the behavior depends also a lot on the compiler... This is why this re-writting strategy is currently
+  // For small fixed sizes matrices, however, the gains are less obvious, it is sometimes x2 faster, but sometimes x3 slower,
+  // and the behavior depends also a lot on the compiler... This is why this re-writing strategy is currently
   // enabled only when falling back from the main GEMM.
   template<typename Dst, typename Func>
   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
@@ -448,7 +450,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode>
                       blas_traits<Rhs>::extract(rhs).template conjugateIf<ConjRhs>(),
                       func,
                       actualAlpha,
-                      typename conditional<HasScalarFactor,true_type,false_type>::type());
+                      std::conditional_t<HasScalarFactor,true_type,false_type>());
   }
 
 protected:
@@ -458,7 +460,7 @@ protected:
   void eval_dynamic_impl(Dst& dst, const LhsT& lhs, const RhsT& rhs, const Func &func, const Scalar&  s /* == 1 */, false_type)
   {
     EIGEN_UNUSED_VARIABLE(s);
-    eigen_internal_assert(s==Scalar(1));
+    eigen_internal_assert(numext::is_exactly_one(s));
     call_restricted_packet_assignment_no_alias(dst, lhs.lazyProduct(rhs), func);
   }
 
@@ -526,8 +528,8 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
   typedef typename internal::nested_eval<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
   typedef typename internal::nested_eval<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
 
-  typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
-  typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;
+  typedef internal::remove_all_t<LhsNested> LhsNestedCleaned;
+  typedef internal::remove_all_t<RhsNested> RhsNestedCleaned;
 
   typedef evaluator<LhsNestedCleaned> LhsEtorType;
   typedef evaluator<RhsNestedCleaned> RhsEtorType;
@@ -535,7 +537,7 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
   enum {
     RowsAtCompileTime = LhsNestedCleaned::RowsAtCompileTime,
     ColsAtCompileTime = RhsNestedCleaned::ColsAtCompileTime,
-    InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(LhsNestedCleaned::ColsAtCompileTime, RhsNestedCleaned::RowsAtCompileTime),
+    InnerSize = min_size_prefer_fixed(LhsNestedCleaned::ColsAtCompileTime, RhsNestedCleaned::RowsAtCompileTime),
     MaxRowsAtCompileTime = LhsNestedCleaned::MaxRowsAtCompileTime,
     MaxColsAtCompileTime = RhsNestedCleaned::MaxColsAtCompileTime
   };
@@ -564,8 +566,8 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
     RhsVecPacketSize = unpacket_traits<RhsVecPacketType>::size,
 
     // Here, we don't care about alignment larger than the usable packet size.
-    LhsAlignment = EIGEN_PLAIN_ENUM_MIN(LhsEtorType::Alignment,LhsVecPacketSize*int(sizeof(typename LhsNestedCleaned::Scalar))),
-    RhsAlignment = EIGEN_PLAIN_ENUM_MIN(RhsEtorType::Alignment,RhsVecPacketSize*int(sizeof(typename RhsNestedCleaned::Scalar))),
+    LhsAlignment = plain_enum_min(LhsEtorType::Alignment, LhsVecPacketSize*int(sizeof(typename LhsNestedCleaned::Scalar))),
+    RhsAlignment = plain_enum_min(RhsEtorType::Alignment, RhsVecPacketSize*int(sizeof(typename RhsNestedCleaned::Scalar))),
 
     SameType = is_same<typename LhsNestedCleaned::Scalar,typename RhsNestedCleaned::Scalar>::value,
 
@@ -585,8 +587,8 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
     LhsOuterStrideBytes = int(LhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename LhsNestedCleaned::Scalar)),
     RhsOuterStrideBytes = int(RhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename RhsNestedCleaned::Scalar)),
 
-    Alignment = bool(CanVectorizeLhs) ? (LhsOuterStrideBytes<=0 || (int(LhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,LhsAlignment))!=0 ? 0 : LhsAlignment)
-              : bool(CanVectorizeRhs) ? (RhsOuterStrideBytes<=0 || (int(RhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,RhsAlignment))!=0 ? 0 : RhsAlignment)
+    Alignment = bool(CanVectorizeLhs) ? (LhsOuterStrideBytes<=0 || (int(LhsOuterStrideBytes) % plain_enum_max(1, LhsAlignment))!=0 ? 0 : LhsAlignment)
+              : bool(CanVectorizeRhs) ? (RhsOuterStrideBytes<=0 || (int(RhsOuterStrideBytes) % plain_enum_max(1, RhsAlignment))!=0 ? 0 : RhsAlignment)
               : 0,
 
     /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside
@@ -640,8 +642,8 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
   }
 
 protected:
-  typename internal::add_const_on_value_type<LhsNested>::type m_lhs;
-  typename internal::add_const_on_value_type<RhsNested>::type m_rhs;
+  add_const_on_value_type_t<LhsNested> m_lhs;
+  add_const_on_value_type_t<RhsNested> m_rhs;
 
   LhsEtorType m_lhsImpl;
   RhsEtorType m_rhsImpl;
@@ -836,22 +838,22 @@ public:
     MatrixFlags = evaluator<MatrixType>::Flags,
     DiagFlags = evaluator<DiagonalType>::Flags,
 
-    _StorageOrder = (Derived::MaxRowsAtCompileTime==1 && Derived::MaxColsAtCompileTime!=1) ? RowMajor
+    StorageOrder_ = (Derived::MaxRowsAtCompileTime==1 && Derived::MaxColsAtCompileTime!=1) ? RowMajor
                   : (Derived::MaxColsAtCompileTime==1 && Derived::MaxRowsAtCompileTime!=1) ? ColMajor
                   : MatrixFlags & RowMajorBit ? RowMajor : ColMajor,
-    _SameStorageOrder = _StorageOrder == (MatrixFlags & RowMajorBit ? RowMajor : ColMajor),
+    SameStorageOrder_ = StorageOrder_ == (MatrixFlags & RowMajorBit ? RowMajor : ColMajor),
 
-    _ScalarAccessOnDiag =  !((int(_StorageOrder) == ColMajor && int(ProductOrder) == OnTheLeft)
-                           ||(int(_StorageOrder) == RowMajor && int(ProductOrder) == OnTheRight)),
-    _SameTypes = is_same<typename MatrixType::Scalar, typename DiagonalType::Scalar>::value,
+    ScalarAccessOnDiag_ =  !((int(StorageOrder_) == ColMajor && int(ProductOrder) == OnTheLeft)
+                           ||(int(StorageOrder_) == RowMajor && int(ProductOrder) == OnTheRight)),
+    SameTypes_ = is_same<typename MatrixType::Scalar, typename DiagonalType::Scalar>::value,
     // FIXME currently we need same types, but in the future the next rule should be the one
-    //_Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagFlags)&PacketAccessBit))),
-    _Vectorizable =   bool(int(MatrixFlags)&PacketAccessBit)
-                  &&  _SameTypes
-                  && (_SameStorageOrder || (MatrixFlags&LinearAccessBit)==LinearAccessBit)
-                  && (_ScalarAccessOnDiag || (bool(int(DiagFlags)&PacketAccessBit))),
-    _LinearAccessMask = (MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1) ? LinearAccessBit : 0,
-    Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0),
+    //Vectorizable_ = bool(int(MatrixFlags)&PacketAccessBit) && ((!_PacketOnDiag) || (SameTypes_ && bool(int(DiagFlags)&PacketAccessBit))),
+    Vectorizable_ =   bool(int(MatrixFlags)&PacketAccessBit)
+                  &&  SameTypes_
+                  && (SameStorageOrder_ || (MatrixFlags&LinearAccessBit)==LinearAccessBit)
+                  && (ScalarAccessOnDiag_ || (bool(int(DiagFlags)&PacketAccessBit))),
+    LinearAccessMask_ = (MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1) ? LinearAccessBit : 0,
+    Flags = ((HereditaryBits|LinearAccessMask_) & (unsigned int)(MatrixFlags)) | (Vectorizable_ ? PacketAccessBit : 0),
     Alignment = evaluator<MatrixType>::Alignment,
 
     AsScalarProduct =     (DiagonalType::SizeAtCompileTime==1)
@@ -887,7 +889,7 @@ protected:
   {
     enum {
       InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime,
-      DiagonalPacketLoadMode = EIGEN_PLAIN_ENUM_MIN(LoadMode,((InnerSize%16) == 0) ? int(Aligned16) : int(evaluator<DiagonalType>::Alignment)) // FIXME hardcoded 16!!
+      DiagonalPacketLoadMode = plain_enum_min(LoadMode,((InnerSize%16) == 0) ? int(Aligned16) : int(evaluator<DiagonalType>::Alignment)) // FIXME hardcoded 16!!
     };
     return internal::pmul(m_matImpl.template packet<LoadMode,PacketType>(row, col),
                           m_diagImpl.template packet<DiagonalPacketLoadMode,PacketType>(id));
@@ -913,7 +915,7 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DiagonalSha
   typedef typename Lhs::DiagonalVectorType DiagonalType;
 
 
-  enum { StorageOrder = Base::_StorageOrder };
+  enum { StorageOrder = Base::StorageOrder_ };
 
   EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr)
     : Base(xpr.rhs(), xpr.lhs().diagonal())
@@ -932,7 +934,7 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DiagonalSha
     // FIXME: NVCC used to complain about the template keyword, but we have to check whether this is still the case.
     // See also similar calls below.
     return this->template packet_impl<LoadMode,PacketType>(row,col, row,
-                                 typename internal::conditional<int(StorageOrder)==RowMajor, internal::true_type, internal::false_type>::type());
+                                 std::conditional_t<int(StorageOrder)==RowMajor, internal::true_type, internal::false_type>());
   }
 
   template<int LoadMode,typename PacketType>
@@ -957,7 +959,7 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DenseShape,
   typedef Product<Lhs, Rhs, ProductKind> XprType;
   typedef typename XprType::PlainObject PlainObject;
 
-  enum { StorageOrder = Base::_StorageOrder };
+  enum { StorageOrder = Base::StorageOrder_ };
 
   EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr)
     : Base(xpr.lhs(), xpr.rhs().diagonal())
@@ -974,7 +976,7 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DenseShape,
   EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const
   {
     return this->template packet_impl<LoadMode,PacketType>(row,col, col,
-                                 typename internal::conditional<int(StorageOrder)==ColMajor, internal::true_type, internal::false_type>::type());
+                                 std::conditional_t<int(StorageOrder)==ColMajor, internal::true_type, internal::false_type>());
   }
 
   template<int LoadMode,typename PacketType>
@@ -1001,7 +1003,7 @@ template<typename ExpressionType, int Side, bool Transposed>
 struct permutation_matrix_product<ExpressionType, Side, Transposed, DenseShape>
 {
     typedef typename nested_eval<ExpressionType, 1>::type MatrixType;
-    typedef typename remove_all<MatrixType>::type MatrixTypeCleaned;
+    typedef remove_all_t<MatrixType> MatrixTypeCleaned;
 
     template<typename Dest, typename PermutationType>
     static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Dest& dst, const PermutationType& perm, const ExpressionType& xpr)
@@ -1109,7 +1111,7 @@ template<typename ExpressionType, int Side, bool Transposed, typename Expression
 struct transposition_matrix_product
 {
   typedef typename nested_eval<ExpressionType, 1>::type MatrixType;
-  typedef typename remove_all<MatrixType>::type MatrixTypeCleaned;
+  typedef remove_all_t<MatrixType> MatrixTypeCleaned;
 
   template<typename Dest, typename TranspositionType>
   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Dest& dst, const TranspositionType& tr, const ExpressionType& xpr)
@@ -1172,6 +1174,40 @@ struct generic_product_impl<Lhs, Transpose<Rhs>, MatrixShape, TranspositionsShap
   }
 };
 
+/***************************************************************************
+* skew symmetric products
+* for now we just call the generic implementation
+***************************************************************************/
+template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
+struct generic_product_impl<Lhs, Rhs, SkewSymmetricShape, MatrixShape, ProductTag>
+{
+  template<typename Dest>
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
+  {
+    generic_product_impl<typename Lhs::DenseMatrixType , Rhs, DenseShape, MatrixShape, ProductTag>::evalTo(dst, lhs, rhs);
+  }
+};
+
+template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
+struct generic_product_impl<Lhs, Rhs, MatrixShape, SkewSymmetricShape, ProductTag>
+{
+  template<typename Dest>
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
+  {
+    generic_product_impl<Lhs, typename Rhs::DenseMatrixType, MatrixShape, DenseShape, ProductTag>::evalTo(dst, lhs, rhs);
+  }
+};
+
+template<typename Lhs, typename Rhs, int ProductTag>
+struct generic_product_impl<Lhs, Rhs, SkewSymmetricShape, SkewSymmetricShape, ProductTag>
+{
+  template<typename Dest>
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
+  {
+    generic_product_impl<typename Lhs::DenseMatrixType, typename Rhs::DenseMatrixType, DenseShape, DenseShape, ProductTag>::evalTo(dst, lhs, rhs);
+  }
+};
+
 } // end namespace internal
 
 } // end namespace Eigen
diff --git a/libs/eigen/Eigen/src/Core/Random.h b/libs/eigen/Eigen/src/Core/Random.h
index dab2ac8..fab6889 100644
--- a/libs/eigen/Eigen/src/Core/Random.h
+++ b/libs/eigen/Eigen/src/Core/Random.h
@@ -10,12 +10,13 @@
 #ifndef EIGEN_RANDOM_H
 #define EIGEN_RANDOM_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
 
 template<typename Scalar> struct scalar_random_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_random_op)
   inline const Scalar operator() () const { return random<Scalar>(); }
 };
 
diff --git a/libs/eigen/Eigen/src/Core/Redux.h b/libs/eigen/Eigen/src/Core/Redux.h
index b6790d1..796e6c4 100644
--- a/libs/eigen/Eigen/src/Core/Redux.h
+++ b/libs/eigen/Eigen/src/Core/Redux.h
@@ -11,6 +11,8 @@
 #ifndef EIGEN_REDUX_H
 #define EIGEN_REDUX_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
@@ -198,8 +200,7 @@ struct redux_impl<Func, Evaluator, DefaultTraversal, NoUnrolling>
   Scalar run(const Evaluator &eval, const Func& func, const XprType& xpr)
   {
     eigen_assert(xpr.rows()>0 && xpr.cols()>0 && "you are using an empty matrix");
-    Scalar res;
-    res = eval.coeffByOuterInner(0, 0);
+    Scalar res = eval.coeffByOuterInner(0, 0);
     for(Index i = 1; i < xpr.innerSize(); ++i)
       res = func(res, eval.coeffByOuterInner(0, i));
     for(Index i = 1; i < xpr.outerSize(); ++i)
@@ -238,7 +239,7 @@ struct redux_impl<Func, Evaluator, LinearVectorizedTraversal, NoUnrolling>
     const int packetAlignment = unpacket_traits<PacketScalar>::alignment;
     enum {
       alignment0 = (bool(Evaluator::Flags & DirectAccessBit) && bool(packet_traits<Scalar>::AlignedOnScalar)) ? int(packetAlignment) : int(Unaligned),
-      alignment = EIGEN_PLAIN_ENUM_MAX(alignment0, Evaluator::Alignment)
+      alignment = plain_enum_max(alignment0, Evaluator::Alignment)
     };
     const Index alignedStart = internal::first_default_aligned(xpr);
     const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize);
@@ -353,12 +354,12 @@ struct redux_impl<Func, Evaluator, LinearVectorizedTraversal, CompleteUnrolling>
 };
 
 // evaluator adaptor
-template<typename _XprType>
-class redux_evaluator : public internal::evaluator<_XprType>
+template<typename XprType_>
+class redux_evaluator : public internal::evaluator<XprType_>
 {
-  typedef internal::evaluator<_XprType> Base;
+  typedef internal::evaluator<XprType_> Base;
 public:
-  typedef _XprType XprType;
+  typedef XprType_ XprType;
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   explicit redux_evaluator(const XprType &xpr) : Base(xpr) {}
   
diff --git a/libs/eigen/Eigen/src/Core/Ref.h b/libs/eigen/Eigen/src/Core/Ref.h
index c2a37ea..81de5f9 100644
--- a/libs/eigen/Eigen/src/Core/Ref.h
+++ b/libs/eigen/Eigen/src/Core/Ref.h
@@ -10,20 +10,22 @@
 #ifndef EIGEN_REF_H
 #define EIGEN_REF_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
 
-template<typename _PlainObjectType, int _Options, typename _StrideType>
-struct traits<Ref<_PlainObjectType, _Options, _StrideType> >
-  : public traits<Map<_PlainObjectType, _Options, _StrideType> >
+template<typename PlainObjectType_, int Options_, typename StrideType_>
+struct traits<Ref<PlainObjectType_, Options_, StrideType_> >
+  : public traits<Map<PlainObjectType_, Options_, StrideType_> >
 {
-  typedef _PlainObjectType PlainObjectType;
-  typedef _StrideType StrideType;
+  typedef PlainObjectType_ PlainObjectType;
+  typedef StrideType_ StrideType;
   enum {
-    Options = _Options,
-    Flags = traits<Map<_PlainObjectType, _Options, _StrideType> >::Flags | NestByRefBit,
-    Alignment = traits<Map<_PlainObjectType, _Options, _StrideType> >::Alignment
+    Options = Options_,
+    Flags = traits<Map<PlainObjectType_, Options_, StrideType_> >::Flags | NestByRefBit,
+    Alignment = traits<Map<PlainObjectType_, Options_, StrideType_> >::Alignment
   };
 
   template<typename Derived> struct match {
@@ -46,7 +48,7 @@ struct traits<Ref<_PlainObjectType, _Options, _StrideType> >
       ScalarTypeMatch = internal::is_same<typename PlainObjectType::Scalar, typename Derived::Scalar>::value,
       MatchAtCompileTime = HasDirectAccess && StorageOrderMatch && InnerStrideMatch && OuterStrideMatch && AlignmentMatch && ScalarTypeMatch
     };
-    typedef typename internal::conditional<MatchAtCompileTime,internal::true_type,internal::false_type>::type type;
+    typedef std::conditional_t<MatchAtCompileTime,internal::true_type,internal::false_type> type;
   };
 
 };
@@ -197,8 +199,8 @@ protected:
       return false;
     }
 
-    ::new (static_cast<Base*>(this)) Base(expr.data(), rows, cols);
-    ::new (&m_stride) StrideBase(
+    internal::construct_at<Base>(this, expr.data(), rows, cols);
+    internal::construct_at(&m_stride,
       (StrideType::OuterStrideAtCompileTime == 0) ? 0 : outer_stride,
       (StrideType::InnerStrideAtCompileTime == 0) ? 0 : inner_stride );
     return true;
@@ -285,7 +287,7 @@ template<typename PlainObjectType, int Options, typename StrideType> class Ref
     typedef internal::traits<Ref> Traits;
     template<typename Derived>
     EIGEN_DEVICE_FUNC inline Ref(const PlainObjectBase<Derived>& expr,
-                                 typename internal::enable_if<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>::type* = 0);
+                                 std::enable_if_t<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>* = 0);
   public:
 
     typedef RefBase<Ref> Base;
@@ -295,17 +297,17 @@ template<typename PlainObjectType, int Options, typename StrideType> class Ref
     #ifndef EIGEN_PARSED_BY_DOXYGEN
     template<typename Derived>
     EIGEN_DEVICE_FUNC inline Ref(PlainObjectBase<Derived>& expr,
-                                 typename internal::enable_if<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>::type* = 0)
+                                 std::enable_if_t<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>* = 0)
     {
       EIGEN_STATIC_ASSERT(bool(Traits::template match<Derived>::MatchAtCompileTime), STORAGE_LAYOUT_DOES_NOT_MATCH);
-      // Construction must pass since we will not create temprary storage in the non-const case.
+      // Construction must pass since we will not create temporary storage in the non-const case.
       const bool success = Base::construct(expr.derived());
       EIGEN_UNUSED_VARIABLE(success)
       eigen_assert(success);
     }
     template<typename Derived>
     EIGEN_DEVICE_FUNC inline Ref(const DenseBase<Derived>& expr,
-                                 typename internal::enable_if<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>::type* = 0)
+                                 std::enable_if_t<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>* = 0)
     #else
     /** Implicit constructor from any dense expression */
     template<typename Derived>
@@ -337,7 +339,7 @@ template<typename TPlainObjectType, int Options, typename StrideType> class Ref<
 
     template<typename Derived>
     EIGEN_DEVICE_FUNC inline Ref(const DenseBase<Derived>& expr,
-                                 typename internal::enable_if<bool(Traits::template match<Derived>::ScalarTypeMatch),Derived>::type* = 0)
+                                 std::enable_if_t<bool(Traits::template match<Derived>::ScalarTypeMatch),Derived>* = 0)
     {
 //      std::cout << match_helper<Derived>::HasDirectAccess << "," << match_helper<Derived>::OuterStrideMatch << "," << match_helper<Derived>::InnerStrideMatch << "\n";
 //      std::cout << int(StrideType::OuterStrideAtCompileTime) << " - " << int(Derived::OuterStrideAtCompileTime) << "\n";
diff --git a/libs/eigen/Eigen/src/Core/Replicate.h b/libs/eigen/Eigen/src/Core/Replicate.h
index ab5be7e..4f91bbe 100644
--- a/libs/eigen/Eigen/src/Core/Replicate.h
+++ b/libs/eigen/Eigen/src/Core/Replicate.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_REPLICATE_H
 #define EIGEN_REPLICATE_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
@@ -21,7 +23,7 @@ struct traits<Replicate<MatrixType,RowFactor,ColFactor> >
   typedef typename traits<MatrixType>::StorageKind StorageKind;
   typedef typename traits<MatrixType>::XprKind XprKind;
   typedef typename ref_selector<MatrixType>::type MatrixTypeNested;
-  typedef typename remove_reference<MatrixTypeNested>::type _MatrixTypeNested;
+  typedef std::remove_reference_t<MatrixTypeNested> MatrixTypeNested_;
   enum {
     RowsAtCompileTime = RowFactor==Dynamic || int(MatrixType::RowsAtCompileTime)==Dynamic
                       ? Dynamic
@@ -62,19 +64,19 @@ template<typename MatrixType,int RowFactor,int ColFactor> class Replicate
   : public internal::dense_xpr_base< Replicate<MatrixType,RowFactor,ColFactor> >::type
 {
     typedef typename internal::traits<Replicate>::MatrixTypeNested MatrixTypeNested;
-    typedef typename internal::traits<Replicate>::_MatrixTypeNested _MatrixTypeNested;
+    typedef typename internal::traits<Replicate>::MatrixTypeNested_ MatrixTypeNested_;
   public:
 
     typedef typename internal::dense_xpr_base<Replicate>::type Base;
     EIGEN_DENSE_PUBLIC_INTERFACE(Replicate)
-    typedef typename internal::remove_all<MatrixType>::type NestedExpression;
+    typedef internal::remove_all_t<MatrixType> NestedExpression;
 
     template<typename OriginalMatrixType>
     EIGEN_DEVICE_FUNC
     inline explicit Replicate(const OriginalMatrixType& matrix)
       : m_matrix(matrix), m_rowFactor(RowFactor), m_colFactor(ColFactor)
     {
-      EIGEN_STATIC_ASSERT((internal::is_same<typename internal::remove_const<MatrixType>::type,OriginalMatrixType>::value),
+      EIGEN_STATIC_ASSERT((internal::is_same<std::remove_const_t<MatrixType>,OriginalMatrixType>::value),
                           THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE)
       eigen_assert(RowFactor!=Dynamic && ColFactor!=Dynamic);
     }
@@ -84,7 +86,7 @@ template<typename MatrixType,int RowFactor,int ColFactor> class Replicate
     inline Replicate(const OriginalMatrixType& matrix, Index rowFactor, Index colFactor)
       : m_matrix(matrix), m_rowFactor(rowFactor), m_colFactor(colFactor)
     {
-      EIGEN_STATIC_ASSERT((internal::is_same<typename internal::remove_const<MatrixType>::type,OriginalMatrixType>::value),
+      EIGEN_STATIC_ASSERT((internal::is_same<std::remove_const_t<MatrixType>,OriginalMatrixType>::value),
                           THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE)
     }
 
@@ -94,7 +96,7 @@ template<typename MatrixType,int RowFactor,int ColFactor> class Replicate
     inline Index cols() const { return m_matrix.cols() * m_colFactor.value(); }
 
     EIGEN_DEVICE_FUNC
-    const _MatrixTypeNested& nestedExpression() const
+    const MatrixTypeNested_& nestedExpression() const
     {
       return m_matrix;
     }
diff --git a/libs/eigen/Eigen/src/Core/Reshaped.h b/libs/eigen/Eigen/src/Core/Reshaped.h
index 52de73b..81355ac 100644
--- a/libs/eigen/Eigen/src/Core/Reshaped.h
+++ b/libs/eigen/Eigen/src/Core/Reshaped.h
@@ -11,6 +11,8 @@
 #ifndef EIGEN_RESHAPED_H
 #define EIGEN_RESHAPED_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /** \class Reshaped
@@ -27,10 +29,9 @@ namespace Eigen {
   * It is the return type of DenseBase::reshaped(NRowsType,NColsType) and
   * most of the time this is the only way it is used.
   *
-  * However, in C++98, if you want to directly maniputate reshaped expressions,
-  * for instance if you want to write a function returning such an expression, you
-  * will need to use this class. In C++11, it is advised to use the \em auto
-  * keyword for such use cases.
+  * If you want to directly manipulate reshaped expressions,
+  * for instance if you want to write a function returning such an expression,
+  * it is advised to use the \em auto keyword for such use cases.
   *
   * Here is an example illustrating the dynamic case:
   * \include class_Reshaped.cpp
@@ -156,7 +157,7 @@ class ReshapedImpl_dense<XprType,Rows,Cols,Order,false>
     EIGEN_INHERIT_ASSIGNMENT_OPERATORS(ReshapedImpl_dense)
 
     typedef typename internal::ref_selector<XprType>::non_const_type MatrixTypeNested;
-    typedef typename internal::remove_all<XprType>::type NestedExpression;
+    typedef internal::remove_all_t<XprType> NestedExpression;
 
     class InnerIterator;
 
@@ -186,12 +187,12 @@ class ReshapedImpl_dense<XprType,Rows,Cols,Order,false>
 
     /** \returns the nested expression */
     EIGEN_DEVICE_FUNC
-    const typename internal::remove_all<XprType>::type&
+    const internal::remove_all_t<XprType>&
     nestedExpression() const { return m_xpr; }
 
     /** \returns the nested expression */
     EIGEN_DEVICE_FUNC
-    typename internal::remove_reference<XprType>::type&
+    std::remove_reference_t<XprType>&
     nestedExpression() { return m_xpr; }
 
   protected:
@@ -231,7 +232,7 @@ class ReshapedImpl_dense<XprType, Rows, Cols, Order, true>
     {}
 
     EIGEN_DEVICE_FUNC
-    const typename internal::remove_all<XprTypeNested>::type& nestedExpression() const
+    const internal::remove_all_t<XprTypeNested>& nestedExpression() const
     {
       return m_xpr;
     }
@@ -250,7 +251,7 @@ class ReshapedImpl_dense<XprType, Rows, Cols, Order, true>
     EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
     inline Index outerStride() const
     {
-      return ((Flags&RowMajorBit)==RowMajorBit) ? this->cols() : this->rows();
+      return (((Flags&RowMajorBit)==RowMajorBit) ? this->cols() : this->rows()) * m_xpr.innerStride();
     }
 
   protected:
@@ -324,7 +325,7 @@ struct reshaped_evaluator<ArgType, Rows, Cols, Order, /* HasDirectAccess */ fals
 
   typedef std::pair<Index, Index> RowCol;
 
-  inline RowCol index_remap(Index rowId, Index colId) const
+  EIGEN_DEVICE_FUNC inline RowCol index_remap(Index rowId, Index colId) const
   {
     if(Order==ColMajor)
     {
@@ -443,7 +444,7 @@ struct reshaped_evaluator<ArgType, Rows, Cols, Order, /* HasDirectAccess */ true
     : mapbase_evaluator<XprType, typename XprType::PlainObject>(xpr)
   {
     // TODO: for the 3.4 release, this should be turned to an internal assertion, but let's keep it as is for the beta lifetime
-    eigen_assert(((internal::UIntPtr(xpr.data()) % EIGEN_PLAIN_ENUM_MAX(1,evaluator<XprType>::Alignment)) == 0) && "data is not aligned");
+    eigen_assert(((internal::UIntPtr(xpr.data()) % plain_enum_max(1, evaluator<XprType>::Alignment)) == 0) && "data is not aligned");
   }
 };
 
diff --git a/libs/eigen/Eigen/src/Core/ReturnByValue.h b/libs/eigen/Eigen/src/Core/ReturnByValue.h
index 4dad13e..9025282 100644
--- a/libs/eigen/Eigen/src/Core/ReturnByValue.h
+++ b/libs/eigen/Eigen/src/Core/ReturnByValue.h
@@ -11,6 +11,8 @@
 #ifndef EIGEN_RETURNBYVALUE_H
 #define EIGEN_RETURNBYVALUE_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
@@ -104,7 +106,7 @@ struct evaluator<ReturnByValue<Derived> >
   EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr)
     : m_result(xpr.rows(), xpr.cols())
   {
-    ::new (static_cast<Base*>(this)) Base(m_result);
+    internal::construct_at<Base>(this, m_result);
     xpr.evalTo(m_result);
   }
 
diff --git a/libs/eigen/Eigen/src/Core/Reverse.h b/libs/eigen/Eigen/src/Core/Reverse.h
index 28cdd76..97e1d68 100644
--- a/libs/eigen/Eigen/src/Core/Reverse.h
+++ b/libs/eigen/Eigen/src/Core/Reverse.h
@@ -12,6 +12,8 @@
 #ifndef EIGEN_REVERSE_H
 #define EIGEN_REVERSE_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
@@ -24,13 +26,13 @@ struct traits<Reverse<MatrixType, Direction> >
   typedef typename traits<MatrixType>::StorageKind StorageKind;
   typedef typename traits<MatrixType>::XprKind XprKind;
   typedef typename ref_selector<MatrixType>::type MatrixTypeNested;
-  typedef typename remove_reference<MatrixTypeNested>::type _MatrixTypeNested;
+  typedef std::remove_reference_t<MatrixTypeNested> MatrixTypeNested_;
   enum {
     RowsAtCompileTime = MatrixType::RowsAtCompileTime,
     ColsAtCompileTime = MatrixType::ColsAtCompileTime,
     MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
     MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
-    Flags = _MatrixTypeNested::Flags & (RowMajorBit | LvalueBit)
+    Flags = MatrixTypeNested_::Flags & (RowMajorBit | LvalueBit)
   };
 };
 
@@ -67,7 +69,7 @@ template<typename MatrixType, int Direction> class Reverse
 
     typedef typename internal::dense_xpr_base<Reverse>::type Base;
     EIGEN_DENSE_PUBLIC_INTERFACE(Reverse)
-    typedef typename internal::remove_all<MatrixType>::type NestedExpression;
+    typedef internal::remove_all_t<MatrixType> NestedExpression;
     using Base::IsRowMajor;
 
   protected:
@@ -99,7 +101,7 @@ template<typename MatrixType, int Direction> class Reverse
       return -m_matrix.innerStride();
     }
 
-    EIGEN_DEVICE_FUNC const typename internal::remove_all<typename MatrixType::Nested>::type&
+    EIGEN_DEVICE_FUNC const internal::remove_all_t<typename MatrixType::Nested>&
     nestedExpression() const
     {
       return m_matrix;
@@ -173,10 +175,10 @@ struct vectorwise_reverse_inplace_impl<Vertical>
   template<typename ExpressionType>
   static void run(ExpressionType &xpr)
   {
-    const int HalfAtCompileTime = ExpressionType::RowsAtCompileTime==Dynamic?Dynamic:ExpressionType::RowsAtCompileTime/2;
+    constexpr Index HalfAtCompileTime = ExpressionType::RowsAtCompileTime==Dynamic?Dynamic:ExpressionType::RowsAtCompileTime/2;
     Index half = xpr.rows()/2;
-    xpr.topRows(fix<HalfAtCompileTime>(half))
-       .swap(xpr.bottomRows(fix<HalfAtCompileTime>(half)).colwise().reverse());
+    xpr.template topRows<HalfAtCompileTime>(half)
+       .swap(xpr.template bottomRows<HalfAtCompileTime>(half).colwise().reverse());
   }
 };
 
@@ -186,10 +188,10 @@ struct vectorwise_reverse_inplace_impl<Horizontal>
   template<typename ExpressionType>
   static void run(ExpressionType &xpr)
   {
-    const int HalfAtCompileTime = ExpressionType::ColsAtCompileTime==Dynamic?Dynamic:ExpressionType::ColsAtCompileTime/2;
+    constexpr Index HalfAtCompileTime = ExpressionType::ColsAtCompileTime==Dynamic?Dynamic:ExpressionType::ColsAtCompileTime/2;
     Index half = xpr.cols()/2;
-    xpr.leftCols(fix<HalfAtCompileTime>(half))
-       .swap(xpr.rightCols(fix<HalfAtCompileTime>(half)).rowwise().reverse());
+    xpr.template leftCols<HalfAtCompileTime>(half)
+       .swap(xpr.template rightCols<HalfAtCompileTime>(half).rowwise().reverse());
   }
 };
 
diff --git a/libs/eigen/Eigen/src/Core/Select.h b/libs/eigen/Eigen/src/Core/Select.h
index 7c86bf8..d9ed2b2 100644
--- a/libs/eigen/Eigen/src/Core/Select.h
+++ b/libs/eigen/Eigen/src/Core/Select.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_SELECT_H
 #define EIGEN_SELECT_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /** \class Select
@@ -17,9 +19,9 @@ namespace Eigen {
   *
   * \brief Expression of a coefficient wise version of the C++ ternary operator ?:
   *
-  * \param ConditionMatrixType the type of the \em condition expression which must be a boolean matrix
-  * \param ThenMatrixType the type of the \em then expression
-  * \param ElseMatrixType the type of the \em else expression
+  * \tparam ConditionMatrixType the type of the \em condition expression which must be a boolean matrix
+  * \tparam ThenMatrixType the type of the \em then expression
+  * \tparam ElseMatrixType the type of the \em else expression
   *
   * This class represents an expression of a coefficient wise version of the C++ ternary operator ?:.
   * It is the return type of DenseBase::select() and most of the time this is the only way it is used.
diff --git a/libs/eigen/Eigen/src/Core/SelfAdjointView.h b/libs/eigen/Eigen/src/Core/SelfAdjointView.h
index 8ce3b37..7a930db 100644
--- a/libs/eigen/Eigen/src/Core/SelfAdjointView.h
+++ b/libs/eigen/Eigen/src/Core/SelfAdjointView.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_SELFADJOINTMATRIX_H
 #define EIGEN_SELFADJOINTMATRIX_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /** \class SelfAdjointView
@@ -18,8 +20,8 @@ namespace Eigen {
   *
   * \brief Expression of a selfadjoint matrix from a triangular part of a dense matrix
   *
-  * \param MatrixType the type of the dense matrix storing the coefficients
-  * \param TriangularPart can be either \c #Lower or \c #Upper
+  * \tparam MatrixType the type of the dense matrix storing the coefficients
+  * \tparam TriangularPart can be either \c #Lower or \c #Upper
   *
   * This class is an expression of a sefladjoint matrix from a triangular part of a matrix
   * with given dense storage of the coefficients. It is the return type of MatrixBase::selfadjointView()
@@ -33,7 +35,7 @@ template<typename MatrixType, unsigned int UpLo>
 struct traits<SelfAdjointView<MatrixType, UpLo> > : traits<MatrixType>
 {
   typedef typename ref_selector<MatrixType>::non_const_type MatrixTypeNested;
-  typedef typename remove_all<MatrixTypeNested>::type MatrixTypeNestedCleaned;
+  typedef remove_all_t<MatrixTypeNested> MatrixTypeNestedCleaned;
   typedef MatrixType ExpressionType;
   typedef typename MatrixType::PlainObject FullMatrixType;
   enum {
@@ -46,12 +48,13 @@ struct traits<SelfAdjointView<MatrixType, UpLo> > : traits<MatrixType>
 }
 
 
-template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView
-  : public TriangularBase<SelfAdjointView<_MatrixType, UpLo> >
+template<typename MatrixType_, unsigned int UpLo> class SelfAdjointView
+  : public TriangularBase<SelfAdjointView<MatrixType_, UpLo> >
 {
   public:
+    EIGEN_STATIC_ASSERT(UpLo==Lower || UpLo==Upper,SELFADJOINTVIEW_ACCEPTS_UPPER_AND_LOWER_MODE_ONLY)
 
-    typedef _MatrixType MatrixType;
+    typedef MatrixType_ MatrixType;
     typedef TriangularBase<SelfAdjointView> Base;
     typedef typename internal::traits<SelfAdjointView>::MatrixTypeNested MatrixTypeNested;
     typedef typename internal::traits<SelfAdjointView>::MatrixTypeNestedCleaned MatrixTypeNestedCleaned;
@@ -60,8 +63,8 @@ template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView
     /** \brief The type of coefficients in this matrix */
     typedef typename internal::traits<SelfAdjointView>::Scalar Scalar;
     typedef typename MatrixType::StorageIndex StorageIndex;
-    typedef typename internal::remove_all<typename MatrixType::ConjugateReturnType>::type MatrixConjugateReturnType;
-    typedef SelfAdjointView<typename internal::add_const<MatrixType>::type, UpLo> ConstSelfAdjointView;
+    typedef internal::remove_all_t<typename MatrixType::ConjugateReturnType> MatrixConjugateReturnType;
+    typedef SelfAdjointView<std::add_const_t<MatrixType>, UpLo> ConstSelfAdjointView;
 
     enum {
       Mode = internal::traits<SelfAdjointView>::Mode,
@@ -71,10 +74,7 @@ template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView
     typedef typename MatrixType::PlainObject PlainObject;
 
     EIGEN_DEVICE_FUNC
-    explicit inline SelfAdjointView(MatrixType& matrix) : m_matrix(matrix)
-    {
-      EIGEN_STATIC_ASSERT(UpLo==Lower || UpLo==Upper,SELFADJOINTVIEW_ACCEPTS_UPPER_AND_LOWER_MODE_ONLY);
-    }
+    explicit inline SelfAdjointView(MatrixType& matrix) : m_matrix(matrix) { }
 
     EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
     inline Index rows() const EIGEN_NOEXCEPT { return m_matrix.rows(); }
@@ -180,16 +180,16 @@ template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView
       */
     template<unsigned int TriMode>
     EIGEN_DEVICE_FUNC
-    typename internal::conditional<(TriMode&(Upper|Lower))==(UpLo&(Upper|Lower)),
-                                   TriangularView<MatrixType,TriMode>,
-                                   TriangularView<typename MatrixType::AdjointReturnType,TriMode> >::type
+    std::conditional_t<(TriMode&(Upper|Lower))==(UpLo&(Upper|Lower)),
+                            TriangularView<MatrixType,TriMode>,
+                            TriangularView<typename MatrixType::AdjointReturnType,TriMode> >
     triangularView() const
     {
-      typename internal::conditional<(TriMode&(Upper|Lower))==(UpLo&(Upper|Lower)), MatrixType&, typename MatrixType::ConstTransposeReturnType>::type tmp1(m_matrix);
-      typename internal::conditional<(TriMode&(Upper|Lower))==(UpLo&(Upper|Lower)), MatrixType&, typename MatrixType::AdjointReturnType>::type tmp2(tmp1);
-      return typename internal::conditional<(TriMode&(Upper|Lower))==(UpLo&(Upper|Lower)),
-                                   TriangularView<MatrixType,TriMode>,
-                                   TriangularView<typename MatrixType::AdjointReturnType,TriMode> >::type(tmp2);
+      std::conditional_t<(TriMode&(Upper|Lower))==(UpLo&(Upper|Lower)), MatrixType&, typename MatrixType::ConstTransposeReturnType> tmp1(m_matrix);
+      std::conditional_t<(TriMode&(Upper|Lower))==(UpLo&(Upper|Lower)), MatrixType&, typename MatrixType::AdjointReturnType> tmp2(tmp1);
+      return std::conditional_t<(TriMode&(Upper|Lower))==(UpLo&(Upper|Lower)),
+                          TriangularView<MatrixType,TriMode>,
+                          TriangularView<typename MatrixType::AdjointReturnType,TriMode> >(tmp2);
     }
 
     typedef SelfAdjointView<const MatrixConjugateReturnType,UpLo> ConjugateReturnType;
@@ -203,10 +203,10 @@ template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView
      */
     template<bool Cond>
     EIGEN_DEVICE_FUNC
-    inline typename internal::conditional<Cond,ConjugateReturnType,ConstSelfAdjointView>::type
+    inline std::conditional_t<Cond,ConjugateReturnType,ConstSelfAdjointView>
     conjugateIf() const
     {
-      typedef typename internal::conditional<Cond,ConjugateReturnType,ConstSelfAdjointView>::type ReturnType;
+      typedef std::conditional_t<Cond,ConjugateReturnType,ConstSelfAdjointView> ReturnType;
       return ReturnType(m_matrix.template conjugateIf<Cond>());
     }
 
@@ -218,10 +218,10 @@ template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView
 
     typedef SelfAdjointView<typename MatrixType::TransposeReturnType,TransposeMode> TransposeReturnType;
      /** \sa MatrixBase::transpose() */
+    template<class Dummy=int>
     EIGEN_DEVICE_FUNC
-    inline TransposeReturnType transpose()
+    inline TransposeReturnType transpose(std::enable_if_t<Eigen::internal::is_lvalue<MatrixType>::value, Dummy*> = nullptr)
     {
-      EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
       typename MatrixType::TransposeReturnType tmp(m_matrix);
       return TransposeReturnType(tmp);
     }
diff --git a/libs/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h b/libs/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h
index 7c89c2e..14dbec0 100644
--- a/libs/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h
+++ b/libs/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_SELFCWISEBINARYOP_H
 #define EIGEN_SELFCWISEBINARYOP_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 // TODO generalize the scalar type of 'other'
diff --git a/libs/eigen/Eigen/src/Core/SkewSymmetricMatrix3.h b/libs/eigen/Eigen/src/Core/SkewSymmetricMatrix3.h
new file mode 100644
index 0000000..7f6b5fd
--- /dev/null
+++ b/libs/eigen/Eigen/src/Core/SkewSymmetricMatrix3.h
@@ -0,0 +1,412 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2007-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_SKEWSYMMETRICMATRIX3_H
+#define EIGEN_SKEWSYMMETRICMATRIX3_H
+
+#include "./InternalHeaderCheck.h"
+
+namespace Eigen {
+
+/** \class SkewSymmetricBase
+ * \ingroup Core_Module
+ *
+ * \brief Base class for skew symmetric matrices and expressions
+ *
+ * This is the base class that is inherited by SkewSymmetricMatrix3 and related expression
+ * types, which internally use a three vector for storing the entries. SkewSymmetric
+ * types always represent square three times three matrices.
+ *
+ * This implementations follows class DiagonalMatrix
+ *
+ * \tparam Derived is the derived type, a SkewSymmetricMatrix3 or SkewSymmetricWrapper.
+ *
+ * \sa class SkewSymmetricMatrix3, class SkewSymmetricWrapper
+ */
+template<typename Derived>
+class SkewSymmetricBase : public EigenBase<Derived>
+{
+  public:
+    typedef typename internal::traits<Derived>::SkewSymmetricVectorType SkewSymmetricVectorType;
+    typedef typename SkewSymmetricVectorType::Scalar Scalar;
+    typedef typename SkewSymmetricVectorType::RealScalar RealScalar;
+    typedef typename internal::traits<Derived>::StorageKind StorageKind;
+    typedef typename internal::traits<Derived>::StorageIndex StorageIndex;
+
+    enum {
+      RowsAtCompileTime = SkewSymmetricVectorType::SizeAtCompileTime,
+      ColsAtCompileTime = SkewSymmetricVectorType::SizeAtCompileTime,
+      MaxRowsAtCompileTime = SkewSymmetricVectorType::MaxSizeAtCompileTime,
+      MaxColsAtCompileTime = SkewSymmetricVectorType::MaxSizeAtCompileTime,
+      IsVectorAtCompileTime = 0,
+      Flags = NoPreferredStorageOrderBit
+    };
+
+    typedef Matrix<Scalar, RowsAtCompileTime, ColsAtCompileTime, 0, MaxRowsAtCompileTime, MaxColsAtCompileTime> DenseMatrixType;
+    typedef DenseMatrixType DenseType;
+    typedef SkewSymmetricMatrix3<Scalar> PlainObject;
+
+    /** \returns a reference to the derived object. */
+    EIGEN_DEVICE_FUNC
+    inline const Derived& derived() const { return *static_cast<const Derived*>(this); }
+    /** \returns a const reference to the derived object. */
+    EIGEN_DEVICE_FUNC
+    inline Derived& derived() { return *static_cast<Derived*>(this); }
+
+    /**
+     * Constructs a dense matrix from \c *this. Note, this directly returns a dense matrix type,
+     * not an expression.
+     * \returns A dense matrix, with its entries set from the the derived object. */
+    EIGEN_DEVICE_FUNC
+    DenseMatrixType toDenseMatrix() const { return derived(); }
+
+    /** Determinant vanishes */
+    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    inline Scalar determinant() const { return 0; }
+
+    /** A.transpose() = -A */
+    EIGEN_DEVICE_FUNC
+    PlainObject transpose() const { return (-vector()).asSkewSymmetric(); }
+
+    /** \returns the exponential of this matrix using Rodrigues’ formula */
+    EIGEN_DEVICE_FUNC
+    DenseMatrixType exponential() const {
+      DenseMatrixType retVal = DenseMatrixType::Identity();
+      const SkewSymmetricVectorType& v = vector();
+      if (v.isZero()) {
+        return retVal;
+      }
+      const Scalar norm2 = v.squaredNorm();
+      const Scalar norm = numext::sqrt(norm2);
+      retVal += ((((1 - numext::cos(norm))/norm2)*derived())*derived()) + (numext::sin(norm)/norm)*derived().toDenseMatrix();
+      return retVal;
+    }
+
+    /** \returns a reference to the derived object's vector of coefficients. */
+    EIGEN_DEVICE_FUNC
+    inline const SkewSymmetricVectorType& vector() const { return derived().vector(); }
+    /** \returns a const reference to the derived object's vector of coefficients. */
+    EIGEN_DEVICE_FUNC
+    inline SkewSymmetricVectorType& vector() { return derived().vector(); }
+
+    /** \returns the number of rows. */
+    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR 
+    inline Index rows() const { return 3; }
+    /** \returns the number of columns. */
+    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR 
+    inline Index cols() const { return 3; }
+
+    /** \returns the matrix product of \c *this by the dense matrix, \a matrix */
+    template<typename MatrixDerived>
+    EIGEN_DEVICE_FUNC
+    Product<Derived,MatrixDerived,LazyProduct>
+    operator*(const MatrixBase<MatrixDerived> &matrix) const
+    {
+      return Product<Derived, MatrixDerived, LazyProduct>(derived(), matrix.derived());
+    }
+
+    /** \returns the matrix product of \c *this by the skew symmetric matrix, \a matrix */
+    template<typename MatrixDerived>
+    EIGEN_DEVICE_FUNC
+    Product<Derived,MatrixDerived,LazyProduct>
+    operator*(const SkewSymmetricBase<MatrixDerived> &matrix) const
+    {
+      return Product<Derived, MatrixDerived, LazyProduct>(derived(), matrix.derived());
+    }
+
+    template <typename OtherDerived>
+    using SkewSymmetricProductReturnType = SkewSymmetricWrapper<const EIGEN_CWISE_BINARY_RETURN_TYPE(
+        SkewSymmetricVectorType, typename OtherDerived::SkewSymmetricVectorType, product)>;
+
+    /** \returns the wedge product of \c *this by the skew symmetric matrix \a other
+     *  A wedge B = AB - BA */
+    template <typename OtherDerived>
+    EIGEN_DEVICE_FUNC SkewSymmetricProductReturnType<OtherDerived> wedge(
+        const SkewSymmetricBase<OtherDerived>& other) const {
+      return vector().cross(other.vector()).asSkewSymmetric();
+    }
+
+    using SkewSymmetricScaleReturnType =
+        SkewSymmetricWrapper<const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(SkewSymmetricVectorType, Scalar, product)>;
+
+    /** \returns the product of \c *this by the scalar \a scalar */
+    EIGEN_DEVICE_FUNC
+    inline SkewSymmetricScaleReturnType operator*(const Scalar& scalar) const {
+      return (vector() * scalar).asSkewSymmetric();
+    }
+
+    using ScaleSkewSymmetricReturnType =
+        SkewSymmetricWrapper<const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar, SkewSymmetricVectorType, product)>;
+
+    /** \returns the product of a scalar and the skew symmetric matrix \a other */
+    EIGEN_DEVICE_FUNC
+    friend inline ScaleSkewSymmetricReturnType operator*(const Scalar& scalar, const SkewSymmetricBase& other) {
+      return (scalar * other.vector()).asSkewSymmetric();
+    }
+
+    template <typename OtherDerived>
+    using SkewSymmetricSumReturnType = SkewSymmetricWrapper<const EIGEN_CWISE_BINARY_RETURN_TYPE(
+        SkewSymmetricVectorType, typename OtherDerived::SkewSymmetricVectorType, sum)>;
+
+    /** \returns the sum of \c *this and the skew symmetric matrix \a other */
+    template <typename OtherDerived>
+    EIGEN_DEVICE_FUNC inline SkewSymmetricSumReturnType<OtherDerived> operator+(
+        const SkewSymmetricBase<OtherDerived>& other) const {
+      return (vector() + other.vector()).asSkewSymmetric();
+    }
+
+    template <typename OtherDerived>
+    using SkewSymmetricDifferenceReturnType = SkewSymmetricWrapper<const EIGEN_CWISE_BINARY_RETURN_TYPE(
+        SkewSymmetricVectorType, typename OtherDerived::SkewSymmetricVectorType, difference)>;
+
+    /** \returns the difference of \c *this and the skew symmetric matrix \a other */
+    template <typename OtherDerived>
+    EIGEN_DEVICE_FUNC inline SkewSymmetricDifferenceReturnType<OtherDerived> operator-(
+        const SkewSymmetricBase<OtherDerived>& other) const {
+      return (vector() - other.vector()).asSkewSymmetric();
+    }
+};
+
+/** \class SkewSymmetricMatrix3
+ * \ingroup Core_Module
+ *
+ * \brief Represents a 3x3 skew symmetric matrix with its storage
+ *
+ * \tparam Scalar_ the type of coefficients
+ *
+ * \sa class SkewSymmetricBase, class SkewSymmetricWrapper
+ */
+
+namespace internal {
+template<typename Scalar_>
+struct traits<SkewSymmetricMatrix3<Scalar_> >
+ : traits<Matrix<Scalar_,3,3,0,3,3> >
+{
+  typedef Matrix<Scalar_,3,1,0,3,1> SkewSymmetricVectorType;
+  typedef SkewSymmetricShape StorageKind;
+  enum {
+    Flags = LvalueBit | NoPreferredStorageOrderBit | NestByRefBit
+  };
+};
+}
+template<typename Scalar_>
+class SkewSymmetricMatrix3
+  : public SkewSymmetricBase<SkewSymmetricMatrix3<Scalar_> >
+{
+  public:
+    #ifndef EIGEN_PARSED_BY_DOXYGEN
+    typedef typename internal::traits<SkewSymmetricMatrix3>::SkewSymmetricVectorType SkewSymmetricVectorType;
+    typedef const SkewSymmetricMatrix3& Nested;
+    typedef Scalar_ Scalar;
+    typedef typename internal::traits<SkewSymmetricMatrix3>::StorageKind StorageKind;
+    typedef typename internal::traits<SkewSymmetricMatrix3>::StorageIndex StorageIndex;
+    #endif
+
+  protected:
+
+    SkewSymmetricVectorType m_vector;
+
+  public:
+
+    /** const version of vector(). */
+    EIGEN_DEVICE_FUNC
+    inline const SkewSymmetricVectorType& vector() const { return m_vector; }
+    /** \returns a reference to the stored vector of coefficients. */
+    EIGEN_DEVICE_FUNC
+    inline SkewSymmetricVectorType& vector() { return m_vector; }
+
+    /** Default constructor without initialization */
+    EIGEN_DEVICE_FUNC
+    inline SkewSymmetricMatrix3() {}
+
+    /** Constructor from three scalars */
+    EIGEN_DEVICE_FUNC
+    inline SkewSymmetricMatrix3(const Scalar& x, const Scalar& y, const Scalar& z) : m_vector(x,y,z) {}
+
+    /** \brief Constructs a SkewSymmetricMatrix3 from an r-value vector type */
+    EIGEN_DEVICE_FUNC
+    explicit inline SkewSymmetricMatrix3(SkewSymmetricVectorType&& vec) : m_vector(std::move(vec)) {}
+
+    /** generic constructor from expression of the coefficients */
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
+        explicit inline SkewSymmetricMatrix3(const MatrixBase<OtherDerived>& other) : m_vector(other)
+    {}
+
+    /** Copy constructor. */
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
+    inline SkewSymmetricMatrix3(const SkewSymmetricBase<OtherDerived>& other) : m_vector(other.vector()) {}
+
+    #ifndef EIGEN_PARSED_BY_DOXYGEN
+    /** copy constructor. prevent a default copy constructor from hiding the other templated constructor */
+    inline SkewSymmetricMatrix3(const SkewSymmetricMatrix3& other) : m_vector(other.vector()) {}
+    #endif
+
+    /** Copy operator. */
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
+    SkewSymmetricMatrix3& operator=(const SkewSymmetricBase<OtherDerived>& other)
+    {
+      m_vector = other.vector();
+      return *this;
+    }
+
+    #ifndef EIGEN_PARSED_BY_DOXYGEN
+    /** This is a special case of the templated operator=. Its purpose is to
+      * prevent a default operator= from hiding the templated operator=.
+      */
+    EIGEN_DEVICE_FUNC
+    SkewSymmetricMatrix3& operator=(const SkewSymmetricMatrix3& other)
+    {
+      m_vector = other.vector();
+      return *this;
+    }
+    #endif
+
+    typedef SkewSymmetricWrapper<const CwiseNullaryOp<internal::scalar_constant_op<Scalar>, SkewSymmetricVectorType>>
+        InitializeReturnType;
+
+    /** Initializes a skew symmetric matrix with coefficients set to zero */
+    EIGEN_DEVICE_FUNC
+    static InitializeReturnType Zero() { return SkewSymmetricVectorType::Zero().asSkewSymmetric(); }
+
+    /** Sets all coefficients to zero. */
+    EIGEN_DEVICE_FUNC
+    inline void setZero() { m_vector.setZero(); }
+};
+
+/** \class SkewSymmetricWrapper
+  * \ingroup Core_Module
+  *
+  * \brief Expression of a skew symmetric matrix
+  *
+  * \tparam SkewSymmetricVectorType_ the type of the vector of coefficients
+  *
+  * This class is an expression of a skew symmetric matrix, but not storing its own vector of coefficients,
+  * instead wrapping an existing vector expression. It is the return type of MatrixBase::asSkewSymmetric()
+  * and most of the time this is the only way that it is used.
+  *
+  * \sa class SkewSymmetricMatrix3, class SkewSymmetricBase, MatrixBase::asSkewSymmetric()
+  */
+
+namespace internal {
+template<typename SkewSymmetricVectorType_>
+struct traits<SkewSymmetricWrapper<SkewSymmetricVectorType_> >
+{
+  typedef SkewSymmetricVectorType_ SkewSymmetricVectorType;
+  typedef typename SkewSymmetricVectorType::Scalar Scalar;
+  typedef typename SkewSymmetricVectorType::StorageIndex StorageIndex;
+  typedef SkewSymmetricShape StorageKind;
+  typedef typename traits<SkewSymmetricVectorType>::XprKind XprKind;
+  enum {
+    RowsAtCompileTime = SkewSymmetricVectorType::SizeAtCompileTime,
+    ColsAtCompileTime = SkewSymmetricVectorType::SizeAtCompileTime,
+    MaxRowsAtCompileTime = SkewSymmetricVectorType::MaxSizeAtCompileTime,
+    MaxColsAtCompileTime = SkewSymmetricVectorType::MaxSizeAtCompileTime,
+    Flags =  (traits<SkewSymmetricVectorType>::Flags & LvalueBit) | NoPreferredStorageOrderBit
+  };
+};
+}
+
+template<typename SkewSymmetricVectorType_>
+class SkewSymmetricWrapper
+  : public SkewSymmetricBase<SkewSymmetricWrapper<SkewSymmetricVectorType_> >, internal::no_assignment_operator
+{
+  public:
+    #ifndef EIGEN_PARSED_BY_DOXYGEN
+    typedef SkewSymmetricVectorType_ SkewSymmetricVectorType;
+    typedef SkewSymmetricWrapper Nested;
+    #endif
+
+    /** Constructor from expression of coefficients to wrap. */
+    EIGEN_DEVICE_FUNC
+    explicit inline SkewSymmetricWrapper(SkewSymmetricVectorType& a_vector) : m_vector(a_vector) {}
+
+    /** \returns a const reference to the wrapped expression of coefficients. */
+    EIGEN_DEVICE_FUNC
+    const SkewSymmetricVectorType& vector() const { return m_vector; }
+
+  protected:
+    typename SkewSymmetricVectorType::Nested m_vector;
+};
+
+/** \returns a pseudo-expression of a skew symmetric matrix with *this as vector of coefficients
+  *
+  * \only_for_vectors
+  *
+  * \sa class SkewSymmetricWrapper, class SkewSymmetricMatrix3, vector(), isSkewSymmetric()
+  **/
+template<typename Derived>
+EIGEN_DEVICE_FUNC inline const SkewSymmetricWrapper<const Derived>
+MatrixBase<Derived>::asSkewSymmetric() const
+{
+  return SkewSymmetricWrapper<const Derived>(derived());
+}
+
+/** \returns true if *this is approximately equal to a skew symmetric matrix,
+  *          within the precision given by \a prec.
+  */
+template<typename Derived>
+bool MatrixBase<Derived>::isSkewSymmetric(const RealScalar& prec) const
+{
+  if(cols() != rows()) return false;
+  return (this->transpose() + *this).isZero(prec);
+}
+
+/** \returns the matrix product of \c *this by the skew symmetric matrix \skew.
+ */
+template<typename Derived>
+template<typename SkewDerived>
+EIGEN_DEVICE_FUNC inline const Product<Derived, SkewDerived, LazyProduct>
+MatrixBase<Derived>::operator*(const SkewSymmetricBase<SkewDerived> &skew) const
+{
+  return Product<Derived, SkewDerived, LazyProduct>(derived(), skew.derived());
+}
+
+namespace internal {
+
+template<> struct storage_kind_to_shape<SkewSymmetricShape> { typedef SkewSymmetricShape Shape; };
+
+struct SkewSymmetric2Dense {};
+
+template<> struct AssignmentKind<DenseShape,SkewSymmetricShape> { typedef SkewSymmetric2Dense Kind; };
+
+// SkewSymmetric matrix to Dense assignment
+template< typename DstXprType, typename SrcXprType, typename Functor>
+struct Assignment<DstXprType, SrcXprType, Functor, SkewSymmetric2Dense>
+{
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
+  {
+    if((dst.rows()!=3) || (dst.cols()!=3)) {
+      dst.resize(3, 3);
+    }
+    dst.diagonal().setZero();
+    const typename SrcXprType::SkewSymmetricVectorType v = src.vector();
+    dst(0, 1) = -v(2);
+    dst(1, 0) = v(2);
+    dst(0, 2) = v(1);
+    dst(2, 0) = -v(1);
+    dst(1, 2) = -v(0);
+    dst(2, 1) = v(0);
+  }
+  
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
+  { dst.vector() += src.vector(); }
+  
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
+  { dst.vector() -= src.vector(); }
+};
+
+} // namespace internal
+
+}  // end namespace Eigen
+
+#endif // EIGEN_SKEWSYMMETRICMATRIX3_H
diff --git a/libs/eigen/Eigen/src/Core/Solve.h b/libs/eigen/Eigen/src/Core/Solve.h
index 23d5cb7..f77eac9 100644
--- a/libs/eigen/Eigen/src/Core/Solve.h
+++ b/libs/eigen/Eigen/src/Core/Solve.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_SOLVE_H
 #define EIGEN_SOLVE_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 template<typename Decomposition, typename RhsType, typename StorageKind> class SolveImpl;
@@ -77,7 +79,7 @@ public:
 
 protected:
   const Decomposition &m_dec;
-  const RhsType       &m_rhs;
+  const typename internal::ref_selector<RhsType>::type m_rhs;
 };
 
 
@@ -123,7 +125,7 @@ struct evaluator<Solve<Decomposition,RhsType> >
   EIGEN_DEVICE_FUNC explicit evaluator(const SolveType& solve)
     : m_result(solve.rows(), solve.cols())
   {
-    ::new (static_cast<Base*>(this)) Base(m_result);
+    internal::construct_at<Base>(this, m_result);
     solve.dec()._solve_impl(solve.rhs(), m_result);
   }
 
diff --git a/libs/eigen/Eigen/src/Core/SolveTriangular.h b/libs/eigen/Eigen/src/Core/SolveTriangular.h
index dfbf995..71d6f85 100644
--- a/libs/eigen/Eigen/src/Core/SolveTriangular.h
+++ b/libs/eigen/Eigen/src/Core/SolveTriangular.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_SOLVETRIANGULAR_H
 #define EIGEN_SOLVETRIANGULAR_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
@@ -87,7 +89,7 @@ struct triangular_solver_selector<Lhs,Rhs,Side,Mode,NoUnrolling,Dynamic>
 
   static EIGEN_DEVICE_FUNC void run(const Lhs& lhs, Rhs& rhs)
   {
-    typename internal::add_const_on_value_type<ActualLhsType>::type actualLhs = LhsProductTraits::extract(lhs);
+    add_const_on_value_type_t<ActualLhsType> actualLhs = LhsProductTraits::extract(lhs);
 
     const Index size = lhs.rows();
     const Index othersize = Side==OnTheLeft? rhs.cols() : rhs.rows();
@@ -174,11 +176,11 @@ EIGEN_DEVICE_FUNC void TriangularViewImpl<MatrixType,Mode,Dense>::solveInPlace(c
     return;
 
   enum { copy = (internal::traits<OtherDerived>::Flags & RowMajorBit)  && OtherDerived::IsVectorAtCompileTime && OtherDerived::SizeAtCompileTime!=1};
-  typedef typename internal::conditional<copy,
-    typename internal::plain_matrix_type_column_major<OtherDerived>::type, OtherDerived&>::type OtherCopy;
+  typedef std::conditional_t<copy,
+    typename internal::plain_matrix_type_column_major<OtherDerived>::type, OtherDerived&> OtherCopy;
   OtherCopy otherCopy(other);
 
-  internal::triangular_solver_selector<MatrixType, typename internal::remove_reference<OtherCopy>::type,
+  internal::triangular_solver_selector<MatrixType, std::remove_reference_t<OtherCopy>,
     Side, Mode>::run(derived().nestedExpression(), otherCopy);
 
   if (copy)
@@ -206,7 +208,7 @@ struct traits<triangular_solve_retval<Side, TriangularType, Rhs> >
 template<int Side, typename TriangularType, typename Rhs> struct triangular_solve_retval
  : public ReturnByValue<triangular_solve_retval<Side, TriangularType, Rhs> >
 {
-  typedef typename remove_all<typename Rhs::Nested>::type RhsNestedCleaned;
+  typedef remove_all_t<typename Rhs::Nested> RhsNestedCleaned;
   typedef ReturnByValue<triangular_solve_retval> Base;
 
   triangular_solve_retval(const TriangularType& tri, const Rhs& rhs)
diff --git a/libs/eigen/Eigen/src/Core/SolverBase.h b/libs/eigen/Eigen/src/Core/SolverBase.h
index 5014610..7396e04 100644
--- a/libs/eigen/Eigen/src/Core/SolverBase.h
+++ b/libs/eigen/Eigen/src/Core/SolverBase.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_SOLVERBASE_H
 #define EIGEN_SOLVERBASE_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
@@ -28,7 +30,7 @@ struct solve_assertion<Transpose<Derived> >
     template<bool Transpose_, typename Rhs>
     static void run(const type& transpose, const Rhs& b)
     {
-        internal::solve_assertion<typename internal::remove_all<Derived>::type>::template run<true>(transpose.nestedExpression(), b);
+        internal::solve_assertion<internal::remove_all_t<Derived>>::template run<true>(transpose.nestedExpression(), b);
     }
 };
 
@@ -40,7 +42,7 @@ struct solve_assertion<CwiseUnaryOp<Eigen::internal::scalar_conjugate_op<Scalar>
     template<bool Transpose_, typename Rhs>
     static void run(const type& adjoint, const Rhs& b)
     {
-        internal::solve_assertion<typename internal::remove_all<Transpose<Derived> >::type>::template run<true>(adjoint.nestedExpression(), b);
+        internal::solve_assertion<internal::remove_all_t<Transpose<Derived> >>::template run<true>(adjoint.nestedExpression(), b);
     }
 };
 } // end namespace internal
@@ -79,12 +81,11 @@ class SolverBase : public EigenBase<Derived>
     enum {
       RowsAtCompileTime = internal::traits<Derived>::RowsAtCompileTime,
       ColsAtCompileTime = internal::traits<Derived>::ColsAtCompileTime,
-      SizeAtCompileTime = (internal::size_at_compile_time<internal::traits<Derived>::RowsAtCompileTime,
-                                                          internal::traits<Derived>::ColsAtCompileTime>::ret),
+      SizeAtCompileTime = (internal::size_of_xpr_at_compile_time<Derived>::ret),
       MaxRowsAtCompileTime = internal::traits<Derived>::MaxRowsAtCompileTime,
       MaxColsAtCompileTime = internal::traits<Derived>::MaxColsAtCompileTime,
-      MaxSizeAtCompileTime = (internal::size_at_compile_time<internal::traits<Derived>::MaxRowsAtCompileTime,
-                                                             internal::traits<Derived>::MaxColsAtCompileTime>::ret),
+      MaxSizeAtCompileTime = internal::size_at_compile_time(internal::traits<Derived>::MaxRowsAtCompileTime,
+                                                            internal::traits<Derived>::MaxColsAtCompileTime),
       IsVectorAtCompileTime = internal::traits<Derived>::MaxRowsAtCompileTime == 1
                            || internal::traits<Derived>::MaxColsAtCompileTime == 1,
       NumDimensions = int(MaxSizeAtCompileTime) == 1 ? 0 : bool(IsVectorAtCompileTime) ? 1 : 2
@@ -105,12 +106,12 @@ class SolverBase : public EigenBase<Derived>
     inline const Solve<Derived, Rhs>
     solve(const MatrixBase<Rhs>& b) const
     {
-      internal::solve_assertion<typename internal::remove_all<Derived>::type>::template run<false>(derived(), b);
+      internal::solve_assertion<internal::remove_all_t<Derived>>::template run<false>(derived(), b);
       return Solve<Derived, Rhs>(derived(), b.derived());
     }
 
     /** \internal the return type of transpose() */
-    typedef typename internal::add_const<Transpose<const Derived> >::type ConstTransposeReturnType;
+    typedef Transpose<const Derived> ConstTransposeReturnType;
     /** \returns an expression of the transposed of the factored matrix.
       *
       * A typical usage is to solve for the transposed problem A^T x = b:
@@ -118,16 +119,16 @@ class SolverBase : public EigenBase<Derived>
       *
       * \sa adjoint(), solve()
       */
-    inline ConstTransposeReturnType transpose() const
+    inline const ConstTransposeReturnType transpose() const
     {
       return ConstTransposeReturnType(derived());
     }
 
     /** \internal the return type of adjoint() */
-    typedef typename internal::conditional<NumTraits<Scalar>::IsComplex,
-                        CwiseUnaryOp<internal::scalar_conjugate_op<Scalar>, ConstTransposeReturnType>,
-                        ConstTransposeReturnType
-                     >::type AdjointReturnType;
+    typedef std::conditional_t<NumTraits<Scalar>::IsComplex,
+               CwiseUnaryOp<internal::scalar_conjugate_op<Scalar>, const ConstTransposeReturnType>,
+               const ConstTransposeReturnType
+            > AdjointReturnType;
     /** \returns an expression of the adjoint of the factored matrix
       *
       * A typical usage is to solve for the adjoint problem A' x = b:
@@ -137,7 +138,7 @@ class SolverBase : public EigenBase<Derived>
       *
       * \sa transpose(), solve()
       */
-    inline AdjointReturnType adjoint() const
+    inline const AdjointReturnType adjoint() const
     {
       return AdjointReturnType(derived().transpose());
     }
diff --git a/libs/eigen/Eigen/src/Core/StableNorm.h b/libs/eigen/Eigen/src/Core/StableNorm.h
index 4a3f0cc..a3bc918 100644
--- a/libs/eigen/Eigen/src/Core/StableNorm.h
+++ b/libs/eigen/Eigen/src/Core/StableNorm.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_STABLENORM_H
 #define EIGEN_STABLENORM_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
@@ -57,7 +59,7 @@ void stable_norm_impl_inner_step(const VectorType &vec, RealScalar& ssq, RealSca
   const Index blockSize = 4096;
   
   typedef typename internal::nested_eval<VectorType,2>::type VectorTypeCopy;
-  typedef typename internal::remove_all<VectorTypeCopy>::type VectorTypeCopyClean;
+  typedef internal::remove_all_t<VectorTypeCopy> VectorTypeCopyClean;
   const VectorTypeCopy copy(vec);
   
   enum {
@@ -66,8 +68,8 @@ void stable_norm_impl_inner_step(const VectorType &vec, RealScalar& ssq, RealSca
                ) && (blockSize*sizeof(Scalar)*2<EIGEN_STACK_ALLOCATION_LIMIT)
                  && (EIGEN_MAX_STATIC_ALIGN_BYTES>0) // if we cannot allocate on the stack, then let's not bother about this optimization
   };
-  typedef typename internal::conditional<CanAlign, Ref<const Matrix<Scalar,Dynamic,1,0,blockSize,1>, internal::evaluator<VectorTypeCopyClean>::Alignment>,
-                                                   typename VectorTypeCopyClean::ConstSegmentReturnType>::type SegmentWrapper;
+  typedef std::conditional_t<CanAlign, Ref<const Matrix<Scalar,Dynamic,1,0,blockSize,1>, internal::evaluator<VectorTypeCopyClean>::Alignment>,
+                                                   typename VectorTypeCopyClean::ConstSegmentReturnType> SegmentWrapper;
   Index n = vec.size();
   
   Index bi = internal::first_default_aligned(copy);
@@ -79,7 +81,7 @@ void stable_norm_impl_inner_step(const VectorType &vec, RealScalar& ssq, RealSca
 
 template<typename VectorType>
 typename VectorType::RealScalar
-stable_norm_impl(const VectorType &vec, typename enable_if<VectorType::IsVectorAtCompileTime>::type* = 0 )
+stable_norm_impl(const VectorType &vec, std::enable_if_t<VectorType::IsVectorAtCompileTime>* = 0 )
 {
   using std::sqrt;
   using std::abs;
@@ -101,7 +103,7 @@ stable_norm_impl(const VectorType &vec, typename enable_if<VectorType::IsVectorA
 
 template<typename MatrixType>
 typename MatrixType::RealScalar
-stable_norm_impl(const MatrixType &mat, typename enable_if<!MatrixType::IsVectorAtCompileTime>::type* = 0 )
+stable_norm_impl(const MatrixType &mat, std::enable_if_t<!MatrixType::IsVectorAtCompileTime>* = 0 )
 {
   using std::sqrt;
 
diff --git a/libs/eigen/Eigen/src/Core/StlIterators.h b/libs/eigen/Eigen/src/Core/StlIterators.h
index 09041db..d5d3971 100644
--- a/libs/eigen/Eigen/src/Core/StlIterators.h
+++ b/libs/eigen/Eigen/src/Core/StlIterators.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_STLITERATORS_H
 #define EIGEN_STLITERATORS_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
@@ -25,7 +27,7 @@ protected:
   typedef typename traits::XprType XprType;
   typedef indexed_based_stl_iterator_base<typename traits::non_const_iterator> non_const_iterator;
   typedef indexed_based_stl_iterator_base<typename traits::const_iterator> const_iterator;
-  typedef typename internal::conditional<internal::is_const<XprType>::value,non_const_iterator,const_iterator>::type other_iterator;
+  typedef std::conditional_t<internal::is_const<XprType>::value,non_const_iterator,const_iterator> other_iterator;
   // NOTE: in C++03 we cannot declare friend classes through typedefs because we need to write friend class:
   friend class indexed_based_stl_iterator_base<typename traits::const_iterator>;
   friend class indexed_based_stl_iterator_base<typename traits::non_const_iterator>;
@@ -104,7 +106,7 @@ protected:
   typedef typename traits::XprType XprType;
   typedef indexed_based_stl_reverse_iterator_base<typename traits::non_const_iterator> non_const_iterator;
   typedef indexed_based_stl_reverse_iterator_base<typename traits::const_iterator> const_iterator;
-  typedef typename internal::conditional<internal::is_const<XprType>::value,non_const_iterator,const_iterator>::type other_iterator;
+  typedef std::conditional_t<internal::is_const<XprType>::value,non_const_iterator,const_iterator> other_iterator;
   // NOTE: in C++03 we cannot declare friend classes through typedefs because we need to write friend class:
   friend class indexed_based_stl_reverse_iterator_base<typename traits::const_iterator>;
   friend class indexed_based_stl_reverse_iterator_base<typename traits::non_const_iterator>;
@@ -179,18 +181,18 @@ template<typename XprType>
 class pointer_based_stl_iterator
 {
   enum { is_lvalue  = internal::is_lvalue<XprType>::value };
-  typedef pointer_based_stl_iterator<typename internal::remove_const<XprType>::type> non_const_iterator;
-  typedef pointer_based_stl_iterator<typename internal::add_const<XprType>::type> const_iterator;
-  typedef typename internal::conditional<internal::is_const<XprType>::value,non_const_iterator,const_iterator>::type other_iterator;
+  typedef pointer_based_stl_iterator<std::remove_const_t<XprType>> non_const_iterator;
+  typedef pointer_based_stl_iterator<std::add_const_t<XprType>> const_iterator;
+  typedef std::conditional_t<internal::is_const<XprType>::value,non_const_iterator,const_iterator> other_iterator;
   // NOTE: in C++03 we cannot declare friend classes through typedefs because we need to write friend class:
-  friend class pointer_based_stl_iterator<typename internal::add_const<XprType>::type>;
-  friend class pointer_based_stl_iterator<typename internal::remove_const<XprType>::type>;
+  friend class pointer_based_stl_iterator<std::add_const_t<XprType>>;
+  friend class pointer_based_stl_iterator<std::remove_const_t<XprType>>;
 public:
   typedef Index difference_type;
   typedef typename XprType::Scalar value_type;
   typedef std::random_access_iterator_tag iterator_category;
-  typedef typename internal::conditional<bool(is_lvalue), value_type*, const value_type*>::type pointer;
-  typedef typename internal::conditional<bool(is_lvalue), value_type&, const value_type&>::type reference;
+  typedef std::conditional_t<bool(is_lvalue), value_type*, const value_type*> pointer;
+  typedef std::conditional_t<bool(is_lvalue), value_type&, const value_type&> reference;
 
 
   pointer_based_stl_iterator() EIGEN_NO_THROW : m_ptr(0) {}
@@ -256,12 +258,12 @@ protected:
   internal::variable_if_dynamic<Index, XprType::InnerStrideAtCompileTime> m_incr;
 };
 
-template<typename _XprType>
-struct indexed_based_stl_iterator_traits<generic_randaccess_stl_iterator<_XprType> >
+template<typename XprType_>
+struct indexed_based_stl_iterator_traits<generic_randaccess_stl_iterator<XprType_> >
 {
-  typedef _XprType XprType;
-  typedef generic_randaccess_stl_iterator<typename internal::remove_const<XprType>::type> non_const_iterator;
-  typedef generic_randaccess_stl_iterator<typename internal::add_const<XprType>::type> const_iterator;
+  typedef XprType_ XprType;
+  typedef generic_randaccess_stl_iterator<std::remove_const_t<XprType>> non_const_iterator;
+  typedef generic_randaccess_stl_iterator<std::add_const_t<XprType>> const_iterator;
 };
 
 template<typename XprType>
@@ -283,13 +285,13 @@ protected:
 
   // TODO currently const Transpose/Reshape expressions never returns const references,
   // so lets return by value too.
-  //typedef typename internal::conditional<bool(has_direct_access), const value_type&, const value_type>::type read_only_ref_t;
+  //typedef std::conditional_t<bool(has_direct_access), const value_type&, const value_type> read_only_ref_t;
   typedef const value_type read_only_ref_t;
 
 public:
   
-  typedef typename internal::conditional<bool(is_lvalue), value_type *, const value_type *>::type pointer;
-  typedef typename internal::conditional<bool(is_lvalue), value_type&, read_only_ref_t>::type reference;
+  typedef std::conditional_t<bool(is_lvalue), value_type *, const value_type *> pointer;
+  typedef std::conditional_t<bool(is_lvalue), value_type&, read_only_ref_t> reference;
   
   generic_randaccess_stl_iterator() : Base() {}
   generic_randaccess_stl_iterator(XprType& xpr, Index index) : Base(xpr,index) {}
@@ -301,12 +303,12 @@ public:
   pointer   operator->()        const { return &((*mp_xpr)(m_index)); }
 };
 
-template<typename _XprType, DirectionType Direction>
-struct indexed_based_stl_iterator_traits<subvector_stl_iterator<_XprType,Direction> >
+template<typename XprType_, DirectionType Direction>
+struct indexed_based_stl_iterator_traits<subvector_stl_iterator<XprType_,Direction> >
 {
-  typedef _XprType XprType;
-  typedef subvector_stl_iterator<typename internal::remove_const<XprType>::type, Direction> non_const_iterator;
-  typedef subvector_stl_iterator<typename internal::add_const<XprType>::type, Direction> const_iterator;
+  typedef XprType_ XprType;
+  typedef subvector_stl_iterator<std::remove_const_t<XprType>, Direction> non_const_iterator;
+  typedef subvector_stl_iterator<std::add_const_t<XprType>, Direction> const_iterator;
 };
 
 template<typename XprType, DirectionType Direction>
@@ -320,12 +322,12 @@ protected:
   using Base::m_index;
   using Base::mp_xpr;
 
-  typedef typename internal::conditional<Direction==Vertical,typename XprType::ColXpr,typename XprType::RowXpr>::type SubVectorType;
-  typedef typename internal::conditional<Direction==Vertical,typename XprType::ConstColXpr,typename XprType::ConstRowXpr>::type ConstSubVectorType;
+  typedef std::conditional_t<Direction==Vertical,typename XprType::ColXpr,typename XprType::RowXpr> SubVectorType;
+  typedef std::conditional_t<Direction==Vertical,typename XprType::ConstColXpr,typename XprType::ConstRowXpr> ConstSubVectorType;
 
 
 public:
-  typedef typename internal::conditional<bool(is_lvalue), SubVectorType, ConstSubVectorType>::type reference;
+  typedef std::conditional_t<bool(is_lvalue), SubVectorType, ConstSubVectorType> reference;
   typedef typename reference::PlainObject value_type;
 
 private:
@@ -349,12 +351,12 @@ public:
   pointer   operator->()        const { return (*mp_xpr).template subVector<Direction>(m_index); }
 };
 
-template<typename _XprType, DirectionType Direction>
-struct indexed_based_stl_iterator_traits<subvector_stl_reverse_iterator<_XprType,Direction> >
+template<typename XprType_, DirectionType Direction>
+struct indexed_based_stl_iterator_traits<subvector_stl_reverse_iterator<XprType_,Direction> >
 {
-  typedef _XprType XprType;
-  typedef subvector_stl_reverse_iterator<typename internal::remove_const<XprType>::type, Direction> non_const_iterator;
-  typedef subvector_stl_reverse_iterator<typename internal::add_const<XprType>::type, Direction> const_iterator;
+  typedef XprType_ XprType;
+  typedef subvector_stl_reverse_iterator<std::remove_const_t<XprType>, Direction> non_const_iterator;
+  typedef subvector_stl_reverse_iterator<std::add_const_t<XprType>, Direction> const_iterator;
 };
 
 template<typename XprType, DirectionType Direction>
@@ -368,12 +370,12 @@ protected:
   using Base::m_index;
   using Base::mp_xpr;
 
-  typedef typename internal::conditional<Direction==Vertical,typename XprType::ColXpr,typename XprType::RowXpr>::type SubVectorType;
-  typedef typename internal::conditional<Direction==Vertical,typename XprType::ConstColXpr,typename XprType::ConstRowXpr>::type ConstSubVectorType;
+  typedef std::conditional_t<Direction==Vertical,typename XprType::ColXpr,typename XprType::RowXpr> SubVectorType;
+  typedef std::conditional_t<Direction==Vertical,typename XprType::ConstColXpr,typename XprType::ConstRowXpr> ConstSubVectorType;
 
 
 public:
-  typedef typename internal::conditional<bool(is_lvalue), SubVectorType, ConstSubVectorType>::type reference;
+  typedef std::conditional_t<bool(is_lvalue), SubVectorType, ConstSubVectorType> reference;
   typedef typename reference::PlainObject value_type;
 
 private:
diff --git a/libs/eigen/Eigen/src/Core/Stride.h b/libs/eigen/Eigen/src/Core/Stride.h
index 6494d51..2832e80 100644
--- a/libs/eigen/Eigen/src/Core/Stride.h
+++ b/libs/eigen/Eigen/src/Core/Stride.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_STRIDE_H
 #define EIGEN_STRIDE_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /** \class Stride
@@ -31,27 +33,31 @@ namespace Eigen {
   * arguments to the constructor.
   *
   * Indeed, this class takes two template parameters:
-  *  \tparam _OuterStrideAtCompileTime the outer stride, or Dynamic if you want to specify it at runtime.
-  *  \tparam _InnerStrideAtCompileTime the inner stride, or Dynamic if you want to specify it at runtime.
+  *  \tparam OuterStrideAtCompileTime_ the outer stride, or Dynamic if you want to specify it at runtime.
+  *  \tparam InnerStrideAtCompileTime_ the inner stride, or Dynamic if you want to specify it at runtime.
   *
   * Here is an example:
   * \include Map_general_stride.cpp
   * Output: \verbinclude Map_general_stride.out
   *
-  * Both strides can be negative, however, a negative stride of -1 cannot be specified at compiletime
+  * Both strides can be negative. However, a negative stride of -1 cannot be specified at compile time
   * because of the ambiguity with Dynamic which is defined to -1 (historically, negative strides were
   * not allowed).
   *
+  * Note that for compile-time vectors (ColsAtCompileTime==1 or RowsAtCompile==1),
+  * the inner stride is the pointer increment between two consecutive elements,
+  * regardless of storage layout.
+  *
   * \sa class InnerStride, class OuterStride, \ref TopicStorageOrders
   */
-template<int _OuterStrideAtCompileTime, int _InnerStrideAtCompileTime>
+template<int OuterStrideAtCompileTime_, int InnerStrideAtCompileTime_>
 class Stride
 {
   public:
     typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3
     enum {
-      InnerStrideAtCompileTime = _InnerStrideAtCompileTime,
-      OuterStrideAtCompileTime = _OuterStrideAtCompileTime
+      InnerStrideAtCompileTime = InnerStrideAtCompileTime_,
+      OuterStrideAtCompileTime = OuterStrideAtCompileTime_
     };
 
     /** Default constructor, for use when strides are fixed at compile time */
diff --git a/libs/eigen/Eigen/src/Core/Swap.h b/libs/eigen/Eigen/src/Core/Swap.h
index 180a4e5..b2e7511 100644
--- a/libs/eigen/Eigen/src/Core/Swap.h
+++ b/libs/eigen/Eigen/src/Core/Swap.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_SWAP_H
 #define EIGEN_SWAP_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
diff --git a/libs/eigen/Eigen/src/Core/Transpose.h b/libs/eigen/Eigen/src/Core/Transpose.h
index 2bc658f..74650ef 100644
--- a/libs/eigen/Eigen/src/Core/Transpose.h
+++ b/libs/eigen/Eigen/src/Core/Transpose.h
@@ -11,6 +11,8 @@
 #ifndef EIGEN_TRANSPOSE_H
 #define EIGEN_TRANSPOSE_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
@@ -18,7 +20,7 @@ template<typename MatrixType>
 struct traits<Transpose<MatrixType> > : public traits<MatrixType>
 {
   typedef typename ref_selector<MatrixType>::type MatrixTypeNested;
-  typedef typename remove_reference<MatrixTypeNested>::type MatrixTypeNestedPlain;
+  typedef std::remove_reference_t<MatrixTypeNested> MatrixTypeNestedPlain;
   enum {
     RowsAtCompileTime = MatrixType::ColsAtCompileTime,
     ColsAtCompileTime = MatrixType::RowsAtCompileTime,
@@ -58,7 +60,7 @@ template<typename MatrixType> class Transpose
 
     typedef typename TransposeImpl<MatrixType,typename internal::traits<MatrixType>::StorageKind>::Base Base;
     EIGEN_GENERIC_PUBLIC_INTERFACE(Transpose)
-    typedef typename internal::remove_all<MatrixType>::type NestedExpression;
+    typedef internal::remove_all_t<MatrixType> NestedExpression;
 
     EIGEN_DEVICE_FUNC
     explicit EIGEN_STRONG_INLINE Transpose(MatrixType& matrix) : m_matrix(matrix) {}
@@ -72,12 +74,12 @@ template<typename MatrixType> class Transpose
 
     /** \returns the nested expression */
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const typename internal::remove_all<MatrixTypeNested>::type&
+    const internal::remove_all_t<MatrixTypeNested>&
     nestedExpression() const { return m_matrix; }
 
     /** \returns the nested expression */
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    typename internal::remove_reference<MatrixTypeNested>::type&
+    std::remove_reference_t<MatrixTypeNested>&
     nestedExpression() { return m_matrix; }
 
     /** \internal */
@@ -130,11 +132,11 @@ template<typename MatrixType> class TransposeImpl<MatrixType,Dense>
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     Index outerStride() const { return derived().nestedExpression().outerStride(); }
 
-    typedef typename internal::conditional<
-                       internal::is_lvalue<MatrixType>::value,
-                       Scalar,
-                       const Scalar
-                     >::type ScalarWithConstIfNotLvalue;
+    typedef std::conditional_t<
+              internal::is_lvalue<MatrixType>::value,
+              Scalar,
+              const Scalar
+            > ScalarWithConstIfNotLvalue;
 
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     ScalarWithConstIfNotLvalue* data() { return derived().nestedExpression().data(); }
@@ -178,7 +180,7 @@ template<typename MatrixType> class TransposeImpl<MatrixType,Dense>
   * \sa transposeInPlace(), adjoint() */
 template<typename Derived>
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-Transpose<Derived>
+typename DenseBase<Derived>::TransposeReturnType
 DenseBase<Derived>::transpose()
 {
   return TransposeReturnType(derived());
@@ -191,7 +193,7 @@ DenseBase<Derived>::transpose()
   * \sa transposeInPlace(), adjoint() */
 template<typename Derived>
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-typename DenseBase<Derived>::ConstTransposeReturnType
+const typename DenseBase<Derived>::ConstTransposeReturnType
 DenseBase<Derived>::transpose() const
 {
   return ConstTransposeReturnType(derived());
diff --git a/libs/eigen/Eigen/src/Core/Transpositions.h b/libs/eigen/Eigen/src/Core/Transpositions.h
index 38a7b01..84a9773 100644
--- a/libs/eigen/Eigen/src/Core/Transpositions.h
+++ b/libs/eigen/Eigen/src/Core/Transpositions.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_TRANSPOSITIONS_H
 #define EIGEN_TRANSPOSITIONS_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 template<typename Derived>
@@ -113,11 +115,11 @@ class TranspositionsBase
 };
 
 namespace internal {
-template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex>
-struct traits<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex> >
- : traits<PermutationMatrix<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex> >
+template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename StorageIndex_>
+struct traits<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,StorageIndex_> >
+ : traits<PermutationMatrix<SizeAtCompileTime,MaxSizeAtCompileTime,StorageIndex_> >
 {
-  typedef Matrix<_StorageIndex, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1> IndicesType;
+  typedef Matrix<StorageIndex_, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1> IndicesType;
   typedef TranspositionsStorage StorageKind;
 };
 }
@@ -151,8 +153,8 @@ struct traits<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageInde
   * \sa class PermutationMatrix
   */
 
-template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex>
-class Transpositions : public TranspositionsBase<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex> >
+template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename StorageIndex_>
+class Transpositions : public TranspositionsBase<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,StorageIndex_> >
 {
     typedef internal::traits<Transpositions> Traits;
   public:
@@ -199,19 +201,19 @@ class Transpositions : public TranspositionsBase<Transpositions<SizeAtCompileTim
 
 
 namespace internal {
-template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex, int _PacketAccess>
-struct traits<Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex>,_PacketAccess> >
- : traits<PermutationMatrix<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex> >
+template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename StorageIndex_, int PacketAccess_>
+struct traits<Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,StorageIndex_>,PacketAccess_> >
+ : traits<PermutationMatrix<SizeAtCompileTime,MaxSizeAtCompileTime,StorageIndex_> >
 {
-  typedef Map<const Matrix<_StorageIndex,SizeAtCompileTime,1,0,MaxSizeAtCompileTime,1>, _PacketAccess> IndicesType;
-  typedef _StorageIndex StorageIndex;
+  typedef Map<const Matrix<StorageIndex_,SizeAtCompileTime,1,0,MaxSizeAtCompileTime,1>, PacketAccess_> IndicesType;
+  typedef StorageIndex_ StorageIndex;
   typedef TranspositionsStorage StorageKind;
 };
 }
 
-template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex, int PacketAccess>
-class Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex>,PacketAccess>
- : public TranspositionsBase<Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex>,PacketAccess> >
+template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename StorageIndex_, int PacketAccess>
+class Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,StorageIndex_>,PacketAccess>
+ : public TranspositionsBase<Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,StorageIndex_>,PacketAccess> >
 {
     typedef internal::traits<Map> Traits;
   public:
@@ -260,17 +262,17 @@ class Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex>,P
 };
 
 namespace internal {
-template<typename _IndicesType>
-struct traits<TranspositionsWrapper<_IndicesType> >
- : traits<PermutationWrapper<_IndicesType> >
+template<typename IndicesType_>
+struct traits<TranspositionsWrapper<IndicesType_> >
+ : traits<PermutationWrapper<IndicesType_> >
 {
   typedef TranspositionsStorage StorageKind;
 };
 }
 
-template<typename _IndicesType>
+template<typename IndicesType_>
 class TranspositionsWrapper
- : public TranspositionsBase<TranspositionsWrapper<_IndicesType> >
+ : public TranspositionsBase<TranspositionsWrapper<IndicesType_> >
 {
     typedef internal::traits<TranspositionsWrapper> Traits;
   public:
diff --git a/libs/eigen/Eigen/src/Core/TriangularMatrix.h b/libs/eigen/Eigen/src/Core/TriangularMatrix.h
index fdb8bc1..c1bd13a 100644
--- a/libs/eigen/Eigen/src/Core/TriangularMatrix.h
+++ b/libs/eigen/Eigen/src/Core/TriangularMatrix.h
@@ -11,6 +11,8 @@
 #ifndef EIGEN_TRIANGULARMATRIX_H
 #define EIGEN_TRIANGULARMATRIX_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
@@ -35,14 +37,13 @@ template<typename Derived> class TriangularBase : public EigenBase<Derived>
       MaxRowsAtCompileTime = internal::traits<Derived>::MaxRowsAtCompileTime,
       MaxColsAtCompileTime = internal::traits<Derived>::MaxColsAtCompileTime,
 
-      SizeAtCompileTime = (internal::size_at_compile_time<internal::traits<Derived>::RowsAtCompileTime,
-                                                   internal::traits<Derived>::ColsAtCompileTime>::ret),
+      SizeAtCompileTime = (internal::size_of_xpr_at_compile_time<Derived>::ret),
       /**< This is equal to the number of coefficients, i.e. the number of
           * rows times the number of columns, or to \a Dynamic if this is not
           * known at compile-time. \sa RowsAtCompileTime, ColsAtCompileTime */
 
-      MaxSizeAtCompileTime = (internal::size_at_compile_time<internal::traits<Derived>::MaxRowsAtCompileTime,
-                                                   internal::traits<Derived>::MaxColsAtCompileTime>::ret)
+      MaxSizeAtCompileTime = internal::size_at_compile_time(internal::traits<Derived>::MaxRowsAtCompileTime,
+                                                            internal::traits<Derived>::MaxColsAtCompileTime)
 
     };
     typedef typename internal::traits<Derived>::Scalar Scalar;
@@ -153,8 +154,8 @@ template<typename Derived> class TriangularBase : public EigenBase<Derived>
   *
   * \brief Expression of a triangular part in a matrix
   *
-  * \param MatrixType the type of the object in which we are taking the triangular part
-  * \param Mode the kind of triangular matrix expression to construct. Can be #Upper,
+  * \tparam MatrixType the type of the object in which we are taking the triangular part
+  * \tparam Mode the kind of triangular matrix expression to construct. Can be #Upper,
   *             #Lower, #UnitUpper, #UnitLower, #StrictlyUpper, or #StrictlyLower.
   *             This is in fact a bit field; it must have either #Upper or #Lower,
   *             and additionally it may have #UnitDiag or #ZeroDiag or neither.
@@ -166,39 +167,39 @@ template<typename Derived> class TriangularBase : public EigenBase<Derived>
   * \sa MatrixBase::triangularView()
   */
 namespace internal {
-template<typename MatrixType, unsigned int _Mode>
-struct traits<TriangularView<MatrixType, _Mode> > : traits<MatrixType>
+template<typename MatrixType, unsigned int Mode_>
+struct traits<TriangularView<MatrixType, Mode_> > : traits<MatrixType>
 {
   typedef typename ref_selector<MatrixType>::non_const_type MatrixTypeNested;
-  typedef typename remove_reference<MatrixTypeNested>::type MatrixTypeNestedNonRef;
-  typedef typename remove_all<MatrixTypeNested>::type MatrixTypeNestedCleaned;
+  typedef std::remove_reference_t<MatrixTypeNested> MatrixTypeNestedNonRef;
+  typedef remove_all_t<MatrixTypeNested> MatrixTypeNestedCleaned;
   typedef typename MatrixType::PlainObject FullMatrixType;
   typedef MatrixType ExpressionType;
   enum {
-    Mode = _Mode,
+    Mode = Mode_,
     FlagsLvalueBit = is_lvalue<MatrixType>::value ? LvalueBit : 0,
     Flags = (MatrixTypeNestedCleaned::Flags & (HereditaryBits | FlagsLvalueBit) & (~(PacketAccessBit | DirectAccessBit | LinearAccessBit)))
   };
 };
 }
 
-template<typename _MatrixType, unsigned int _Mode, typename StorageKind> class TriangularViewImpl;
+template<typename MatrixType_, unsigned int Mode_, typename StorageKind> class TriangularViewImpl;
 
-template<typename _MatrixType, unsigned int _Mode> class TriangularView
-  : public TriangularViewImpl<_MatrixType, _Mode, typename internal::traits<_MatrixType>::StorageKind >
+template<typename MatrixType_, unsigned int Mode_> class TriangularView
+  : public TriangularViewImpl<MatrixType_, Mode_, typename internal::traits<MatrixType_>::StorageKind >
 {
   public:
 
-    typedef TriangularViewImpl<_MatrixType, _Mode, typename internal::traits<_MatrixType>::StorageKind > Base;
+    typedef TriangularViewImpl<MatrixType_, Mode_, typename internal::traits<MatrixType_>::StorageKind > Base;
     typedef typename internal::traits<TriangularView>::Scalar Scalar;
-    typedef _MatrixType MatrixType;
+    typedef MatrixType_ MatrixType;
 
   protected:
     typedef typename internal::traits<TriangularView>::MatrixTypeNested MatrixTypeNested;
     typedef typename internal::traits<TriangularView>::MatrixTypeNestedNonRef MatrixTypeNestedNonRef;
 
-    typedef typename internal::remove_all<typename MatrixType::ConjugateReturnType>::type MatrixConjugateReturnType;
-    typedef TriangularView<typename internal::add_const<MatrixType>::type, _Mode> ConstTriangularView;
+    typedef internal::remove_all_t<typename MatrixType::ConjugateReturnType> MatrixConjugateReturnType;
+    typedef TriangularView<std::add_const_t<MatrixType>, Mode_> ConstTriangularView;
 
   public:
 
@@ -206,7 +207,7 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
     typedef typename internal::traits<TriangularView>::MatrixTypeNestedCleaned NestedExpression;
 
     enum {
-      Mode = _Mode,
+      Mode = Mode_,
       Flags = internal::traits<TriangularView>::Flags,
       TransposeMode = (Mode & Upper ? Lower : 0)
                     | (Mode & Lower ? Upper : 0)
@@ -247,10 +248,10 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
      */
     template<bool Cond>
     EIGEN_DEVICE_FUNC
-    inline typename internal::conditional<Cond,ConjugateReturnType,ConstTriangularView>::type
+    inline std::conditional_t<Cond,ConjugateReturnType,ConstTriangularView>
     conjugateIf() const
     {
-      typedef typename internal::conditional<Cond,ConjugateReturnType,ConstTriangularView>::type ReturnType;
+      typedef std::conditional_t<Cond,ConjugateReturnType,ConstTriangularView> ReturnType;
       return ReturnType(m_matrix.template conjugateIf<Cond>());
     }
 
@@ -262,10 +263,10 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
 
     typedef TriangularView<typename MatrixType::TransposeReturnType,TransposeMode> TransposeReturnType;
      /** \sa MatrixBase::transpose() */
+    template<class Dummy=int>
     EIGEN_DEVICE_FUNC
-    inline TransposeReturnType transpose()
+    inline TransposeReturnType transpose(std::enable_if_t<Eigen::internal::is_lvalue<MatrixType>::value, Dummy*> = nullptr)
     {
-      EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
       typename MatrixType::TransposeReturnType tmp(m_matrix);
       return TransposeReturnType(tmp);
     }
@@ -342,16 +343,17 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
   *
   * \sa class TriangularView, MatrixBase::triangularView()
   */
-template<typename _MatrixType, unsigned int _Mode> class TriangularViewImpl<_MatrixType,_Mode,Dense>
-  : public TriangularBase<TriangularView<_MatrixType, _Mode> >
+template<typename MatrixType_, unsigned int Mode_> class TriangularViewImpl<MatrixType_,Mode_,Dense>
+  : public TriangularBase<TriangularView<MatrixType_, Mode_> >
 {
   public:
 
-    typedef TriangularView<_MatrixType, _Mode> TriangularViewType;
+    typedef TriangularView<MatrixType_, Mode_> TriangularViewType;
+
     typedef TriangularBase<TriangularViewType> Base;
     typedef typename internal::traits<TriangularViewType>::Scalar Scalar;
 
-    typedef _MatrixType MatrixType;
+    typedef MatrixType_ MatrixType;
     typedef typename MatrixType::PlainObject DenseMatrixType;
     typedef DenseMatrixType PlainObject;
 
@@ -362,7 +364,7 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularViewImpl<_Mat
     typedef typename internal::traits<TriangularViewType>::StorageKind StorageKind;
 
     enum {
-      Mode = _Mode,
+      Mode = Mode_,
       Flags = internal::traits<TriangularViewType>::Flags
     };
 
@@ -728,10 +730,10 @@ struct evaluator_traits<TriangularView<MatrixType,Mode> >
 
 template<typename MatrixType, unsigned int Mode>
 struct unary_evaluator<TriangularView<MatrixType,Mode>, IndexBased>
- : evaluator<typename internal::remove_all<MatrixType>::type>
+ : evaluator<internal::remove_all_t<MatrixType>>
 {
   typedef TriangularView<MatrixType,Mode> XprType;
-  typedef evaluator<typename internal::remove_all<MatrixType>::type> Base;
+  typedef evaluator<internal::remove_all_t<MatrixType>> Base;
   EIGEN_DEVICE_FUNC
   unary_evaluator(const XprType &xpr) : Base(xpr.nestedExpression()) {}
 };
diff --git a/libs/eigen/Eigen/src/Core/VectorBlock.h b/libs/eigen/Eigen/src/Core/VectorBlock.h
index 71c5b95..ee28da1 100644
--- a/libs/eigen/Eigen/src/Core/VectorBlock.h
+++ b/libs/eigen/Eigen/src/Core/VectorBlock.h
@@ -11,6 +11,8 @@
 #ifndef EIGEN_VECTORBLOCK_H
 #define EIGEN_VECTORBLOCK_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
@@ -66,6 +68,7 @@ template<typename VectorType, int Size> class VectorBlock
     };
   public:
     EIGEN_DENSE_PUBLIC_INTERFACE(VectorBlock)
+    EIGEN_STATIC_ASSERT_VECTOR_ONLY(VectorBlock)
 
     using Base::operator=;
 
@@ -76,18 +79,14 @@ template<typename VectorType, int Size> class VectorBlock
       : Base(vector,
              IsColVector ? start : 0, IsColVector ? 0 : start,
              IsColVector ? size  : 1, IsColVector ? 1 : size)
-    {
-      EIGEN_STATIC_ASSERT_VECTOR_ONLY(VectorBlock);
-    }
+    { }
 
     /** Fixed-size constructor
       */
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     VectorBlock(VectorType& vector, Index start)
       : Base(vector, IsColVector ? start : 0, IsColVector ? 0 : start)
-    {
-      EIGEN_STATIC_ASSERT_VECTOR_ONLY(VectorBlock);
-    }
+    { }
 };
 
 
diff --git a/libs/eigen/Eigen/src/Core/VectorwiseOp.h b/libs/eigen/Eigen/src/Core/VectorwiseOp.h
index 870f4f1..b004f76 100644
--- a/libs/eigen/Eigen/src/Core/VectorwiseOp.h
+++ b/libs/eigen/Eigen/src/Core/VectorwiseOp.h
@@ -11,6 +11,8 @@
 #ifndef EIGEN_PARTIAL_REDUX_H
 #define EIGEN_PARTIAL_REDUX_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /** \class PartialReduxExpr
@@ -86,7 +88,6 @@ template<typename A,typename B> struct partial_redux_dummy_func;
 #define EIGEN_MAKE_PARTIAL_REDUX_FUNCTOR(MEMBER,COST,VECTORIZABLE,BINARYOP)                \
   template <typename ResultType,typename Scalar>                                                            \
   struct member_##MEMBER {                                                                  \
-    EIGEN_EMPTY_STRUCT_CTOR(member_##MEMBER)                                                \
     typedef ResultType result_type;                                                         \
     typedef BINARYOP<Scalar,Scalar> BinaryOp;   \
     template<int Size> struct Cost { enum { value = COST }; };             \
@@ -191,7 +192,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
     typedef typename ExpressionType::RealScalar RealScalar;
     typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3
     typedef typename internal::ref_selector<ExpressionType>::non_const_type ExpressionTypeNested;
-    typedef typename internal::remove_all<ExpressionTypeNested>::type ExpressionTypeNestedCleaned;
+    typedef internal::remove_all_t<ExpressionTypeNested> ExpressionTypeNestedCleaned;
 
     template<template<typename OutScalar,typename InputScalar> class Functor,
                       typename ReturnScalar=Scalar> struct ReturnType
@@ -230,9 +231,9 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
     typename ExtendedType<OtherDerived>::Type
     extendedTo(const DenseBase<OtherDerived>& other) const
     {
-      EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(isVertical, OtherDerived::MaxColsAtCompileTime==1),
+      EIGEN_STATIC_ASSERT(internal::check_implication(isVertical, OtherDerived::MaxColsAtCompileTime==1),
                           YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED)
-      EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(isHorizontal, OtherDerived::MaxRowsAtCompileTime==1),
+      EIGEN_STATIC_ASSERT(internal::check_implication(isHorizontal, OtherDerived::MaxRowsAtCompileTime==1),
                           YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED)
       return typename ExtendedType<OtherDerived>::Type
                       (other.derived(),
@@ -253,9 +254,9 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
     typename OppositeExtendedType<OtherDerived>::Type
     extendedToOpposite(const DenseBase<OtherDerived>& other) const
     {
-      EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(isHorizontal, OtherDerived::MaxColsAtCompileTime==1),
+      EIGEN_STATIC_ASSERT(internal::check_implication(isHorizontal, OtherDerived::MaxColsAtCompileTime==1),
                           YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED)
-      EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(isVertical, OtherDerived::MaxRowsAtCompileTime==1),
+      EIGEN_STATIC_ASSERT(internal::check_implication(isVertical, OtherDerived::MaxRowsAtCompileTime==1),
                           YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED)
       return typename OppositeExtendedType<OtherDerived>::Type
                       (other.derived(),
@@ -594,7 +595,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
       return m_matrix += extendedTo(other.derived());
     }
 
-    /** Substracts the vector \a other to each subvector of \c *this */
+    /** Subtracts the vector \a other to each subvector of \c *this */
     template<typename OtherDerived>
     EIGEN_DEVICE_FUNC
     ExpressionType& operator-=(const DenseBase<OtherDerived>& other)
@@ -604,7 +605,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
       return m_matrix -= extendedTo(other.derived());
     }
 
-    /** Multiples each subvector of \c *this by the vector \a other */
+    /** Multiplies each subvector of \c *this by the vector \a other */
     template<typename OtherDerived>
     EIGEN_DEVICE_FUNC
     ExpressionType& operator*=(const DenseBase<OtherDerived>& other)
diff --git a/libs/eigen/Eigen/src/Core/Visitor.h b/libs/eigen/Eigen/src/Core/Visitor.h
index 00bcca8..e1c17fc 100644
--- a/libs/eigen/Eigen/src/Core/Visitor.h
+++ b/libs/eigen/Eigen/src/Core/Visitor.h
@@ -10,16 +10,23 @@
 #ifndef EIGEN_VISITOR_H
 #define EIGEN_VISITOR_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
 
+template<typename Visitor, typename Derived, int UnrollCount, bool Vectorize=((Derived::PacketAccess!=0) && functor_traits<Visitor>::PacketAccess)>
+struct visitor_impl;
+
 template<typename Visitor, typename Derived, int UnrollCount>
-struct visitor_impl
+struct visitor_impl<Visitor, Derived, UnrollCount, false>
 {
   enum {
-    col = (UnrollCount-1) / Derived::RowsAtCompileTime,
-    row = (UnrollCount-1) % Derived::RowsAtCompileTime
+    col = Derived::IsRowMajor ? (UnrollCount-1) % Derived::ColsAtCompileTime
+                              : (UnrollCount-1) / Derived::RowsAtCompileTime,
+    row = Derived::IsRowMajor ? (UnrollCount-1) / Derived::ColsAtCompileTime
+                              : (UnrollCount-1) % Derived::RowsAtCompileTime
   };
 
   EIGEN_DEVICE_FUNC
@@ -31,7 +38,7 @@ struct visitor_impl
 };
 
 template<typename Visitor, typename Derived>
-struct visitor_impl<Visitor, Derived, 1>
+struct visitor_impl<Visitor, Derived, 1, false>
 {
   EIGEN_DEVICE_FUNC
   static inline void run(const Derived &mat, Visitor& visitor)
@@ -42,24 +49,73 @@ struct visitor_impl<Visitor, Derived, 1>
 
 // This specialization enables visitors on empty matrices at compile-time
 template<typename Visitor, typename Derived>
-struct visitor_impl<Visitor, Derived, 0> {
+struct visitor_impl<Visitor, Derived, 0, false> {
   EIGEN_DEVICE_FUNC
   static inline void run(const Derived &/*mat*/, Visitor& /*visitor*/)
   {}
 };
 
 template<typename Visitor, typename Derived>
-struct visitor_impl<Visitor, Derived, Dynamic>
+struct visitor_impl<Visitor, Derived, Dynamic, /*Vectorize=*/false>
 {
   EIGEN_DEVICE_FUNC
   static inline void run(const Derived& mat, Visitor& visitor)
   {
     visitor.init(mat.coeff(0,0), 0, 0);
-    for(Index i = 1; i < mat.rows(); ++i)
-      visitor(mat.coeff(i, 0), i, 0);
-    for(Index j = 1; j < mat.cols(); ++j)
-      for(Index i = 0; i < mat.rows(); ++i)
-        visitor(mat.coeff(i, j), i, j);
+    if (Derived::IsRowMajor) {
+      for(Index i = 1; i < mat.cols(); ++i) {
+          visitor(mat.coeff(0, i), 0, i);
+      }
+      for(Index j = 1; j < mat.rows(); ++j) {
+        for(Index i = 0; i < mat.cols(); ++i) {
+          visitor(mat.coeff(j, i), j, i);
+        }
+      }
+    } else {
+      for(Index i = 1; i < mat.rows(); ++i) {
+          visitor(mat.coeff(i, 0), i, 0);
+      }
+      for(Index j = 1; j < mat.cols(); ++j) {
+        for(Index i = 0; i < mat.rows(); ++i) {
+          visitor(mat.coeff(i, j), i, j);
+        }
+      }
+    }
+  }
+};
+
+template<typename Visitor, typename Derived, int UnrollSize>
+struct visitor_impl<Visitor, Derived, UnrollSize, /*Vectorize=*/true>
+{
+  typedef typename Derived::Scalar Scalar;
+  typedef typename packet_traits<Scalar>::type Packet;
+
+  EIGEN_DEVICE_FUNC
+  static inline void run(const Derived& mat, Visitor& visitor)
+  {
+    const Index PacketSize = packet_traits<Scalar>::size;
+    visitor.init(mat.coeff(0,0), 0, 0);
+    if (Derived::IsRowMajor) {
+      for(Index i = 0; i < mat.rows(); ++i) {
+        Index j = i == 0 ? 1 : 0;
+        for(; j+PacketSize-1 < mat.cols(); j += PacketSize) {
+          Packet p = mat.packet(i, j);
+          visitor.packet(p, i, j);
+        }
+        for(; j < mat.cols(); ++j)
+          visitor(mat.coeff(i, j), i, j);
+      }
+    } else {
+      for(Index j = 0; j < mat.cols(); ++j) {
+        Index i = j == 0 ? 1 : 0;
+        for(; i+PacketSize-1 < mat.rows(); i += PacketSize) {
+          Packet p = mat.packet(i, j);
+          visitor.packet(p, i, j);
+        }
+        for(; i < mat.rows(); ++i)
+          visitor(mat.coeff(i, j), i, j);
+      }
+    }
   }
 };
 
@@ -68,28 +124,38 @@ template<typename XprType>
 class visitor_evaluator
 {
 public:
-  EIGEN_DEVICE_FUNC
-  explicit visitor_evaluator(const XprType &xpr) : m_evaluator(xpr), m_xpr(xpr) {}
-
-  typedef typename XprType::Scalar Scalar;
-  typedef typename XprType::CoeffReturnType CoeffReturnType;
+  typedef internal::evaluator<XprType> Evaluator;
 
   enum {
+    PacketAccess = Evaluator::Flags & PacketAccessBit,
+    IsRowMajor = XprType::IsRowMajor,
     RowsAtCompileTime = XprType::RowsAtCompileTime,
-    CoeffReadCost = internal::evaluator<XprType>::CoeffReadCost
+    ColsAtCompileTime = XprType::ColsAtCompileTime,
+    CoeffReadCost = Evaluator::CoeffReadCost
   };
 
+
+  EIGEN_DEVICE_FUNC
+  explicit visitor_evaluator(const XprType &xpr) : m_evaluator(xpr), m_xpr(xpr) { }
+
+  typedef typename XprType::Scalar Scalar;
+  typedef std::remove_const_t<typename XprType::CoeffReturnType> CoeffReturnType;
+  typedef std::remove_const_t<typename XprType::PacketReturnType> PacketReturnType;
+
   EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index rows() const EIGEN_NOEXCEPT { return m_xpr.rows(); }
   EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index cols() const EIGEN_NOEXCEPT { return m_xpr.cols(); }
   EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index size() const EIGEN_NOEXCEPT { return m_xpr.size(); }
 
   EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
   { return m_evaluator.coeff(row, col); }
+  EIGEN_DEVICE_FUNC PacketReturnType packet(Index row, Index col) const
+  { return m_evaluator.template packet<Unaligned,PacketReturnType>(row, col); }
 
 protected:
-  internal::evaluator<XprType> m_evaluator;
+  Evaluator m_evaluator;
   const XprType &m_xpr;
 };
+
 } // end namespace internal
 
 /** Applies the visitor \a visitor to the whole coefficients of the matrix or vector.
@@ -152,123 +218,131 @@ struct coeff_visitor
   }
 };
 
-/** \internal
-  * \brief Visitor computing the min coefficient with its value and coordinates
-  *
-  * \sa DenseBase::minCoeff(Index*, Index*)
-  */
-template <typename Derived, int NaNPropagation>
-struct min_coeff_visitor : coeff_visitor<Derived>
-{
-  typedef typename Derived::Scalar Scalar;
-  EIGEN_DEVICE_FUNC
-  void operator() (const Scalar& value, Index i, Index j)
-  {
-    if(value < this->res)
-    {
-      this->res = value;
-      this->row = i;
-      this->col = j;
-    }
-  }
-};
 
-template <typename Derived>
-struct min_coeff_visitor<Derived, PropagateNumbers> : coeff_visitor<Derived>
-{
-  typedef typename Derived::Scalar Scalar;
-  EIGEN_DEVICE_FUNC
-  void operator() (const Scalar& value, Index i, Index j)
-  {
-    if((numext::isnan)(this->res) || (!(numext::isnan)(value) && value < this->res))
-    {
-      this->res = value;
-      this->row = i;
-      this->col = j;
-    }
-  }
-};
-
-template <typename Derived>
-struct min_coeff_visitor<Derived, PropagateNaN> : coeff_visitor<Derived>
-{
-  typedef typename Derived::Scalar Scalar;
-  EIGEN_DEVICE_FUNC
-  void operator() (const Scalar& value, Index i, Index j)
-  {
-    if((numext::isnan)(value) || value < this->res)
-    {
-      this->res = value;
-      this->row = i;
-      this->col = j;
-    }
-  }
+template<typename Scalar, int NaNPropagation, bool is_min=true>
+struct minmax_compare {
+  typedef typename packet_traits<Scalar>::type Packet;
+  static EIGEN_DEVICE_FUNC inline bool compare(Scalar a, Scalar b) { return a < b; }
+  static EIGEN_DEVICE_FUNC inline Scalar predux(const Packet& p) { return predux_min<NaNPropagation>(p);}
 };
 
 template<typename Scalar, int NaNPropagation>
-    struct functor_traits<min_coeff_visitor<Scalar, NaNPropagation> > {
+struct minmax_compare<Scalar, NaNPropagation, false> {
+  typedef typename packet_traits<Scalar>::type Packet;
+  static EIGEN_DEVICE_FUNC inline bool compare(Scalar a, Scalar b) { return a > b; }
+  static EIGEN_DEVICE_FUNC inline Scalar predux(const Packet& p) { return predux_max<NaNPropagation>(p);}
+};
+
+template <typename Derived, bool is_min, int NaNPropagation>
+struct minmax_coeff_visitor : coeff_visitor<Derived>
+{
+  using Scalar = typename Derived::Scalar;
+  using Packet = typename packet_traits<Scalar>::type;
+  using Comparator = minmax_compare<Scalar, NaNPropagation, is_min>;
+
+  EIGEN_DEVICE_FUNC inline
+  void operator() (const Scalar& value, Index i, Index j)
+  {
+    if(Comparator::compare(value, this->res)) {
+      this->res = value;
+      this->row = i;
+      this->col = j;
+    }
+  }
+
+  EIGEN_DEVICE_FUNC inline
+  void packet(const Packet& p, Index i, Index j) {
+    const Index PacketSize = packet_traits<Scalar>::size;
+    Scalar value = Comparator::predux(p);
+    if (Comparator::compare(value, this->res)) {
+      const Packet range = preverse(plset<Packet>(Scalar(1)));
+      Packet mask = pcmp_eq(pset1<Packet>(value), p);
+      Index max_idx = PacketSize - static_cast<Index>(predux_max(pand(range, mask)));
+      this->res = value;
+      this->row = Derived::IsRowMajor ? i : i + max_idx;;
+      this->col = Derived::IsRowMajor ? j + max_idx : j;
+    }
+  }
+};
+
+// Suppress NaN. The only case in which we return NaN is if the matrix is all NaN, in which case,
+// the row=0, col=0 is returned for the location.
+template <typename Derived, bool is_min>
+struct minmax_coeff_visitor<Derived, is_min, PropagateNumbers> : coeff_visitor<Derived>
+{
+  typedef typename Derived::Scalar Scalar;
+  using Packet = typename packet_traits<Scalar>::type;
+  using Comparator = minmax_compare<Scalar, PropagateNumbers, is_min>;
+
+  EIGEN_DEVICE_FUNC inline
+  void operator() (const Scalar& value, Index i, Index j)
+  {
+    if ((!(numext::isnan)(value) && (numext::isnan)(this->res)) || Comparator::compare(value, this->res)) {
+      this->res = value;
+      this->row = i;
+      this->col = j;
+    }
+  }
+
+  EIGEN_DEVICE_FUNC inline
+  void packet(const Packet& p, Index i, Index j) {
+    const Index PacketSize = packet_traits<Scalar>::size;
+    Scalar value = Comparator::predux(p);
+    if ((!(numext::isnan)(value) && (numext::isnan)(this->res)) || Comparator::compare(value, this->res)) {
+      const Packet range = preverse(plset<Packet>(Scalar(1)));
+      /* mask will be zero for NaNs, so they will be ignored. */
+      Packet mask = pcmp_eq(pset1<Packet>(value), p);
+      Index max_idx = PacketSize - static_cast<Index>(predux_max(pand(range, mask)));
+      this->res = value;
+      this->row = Derived::IsRowMajor ? i : i + max_idx;;
+      this->col = Derived::IsRowMajor ? j + max_idx : j;
+    }
+  }
+
+};
+
+// Propagate NaN. If the matrix contains NaN, the location of the first NaN will be returned in
+// row and col.
+template <typename Derived, bool is_min>
+struct minmax_coeff_visitor<Derived, is_min, PropagateNaN> : coeff_visitor<Derived>
+{
+  typedef typename Derived::Scalar Scalar;
+  using Packet = typename packet_traits<Scalar>::type;
+  using Comparator = minmax_compare<Scalar, PropagateNaN, is_min>;
+
+  EIGEN_DEVICE_FUNC inline
+  void operator() (const Scalar& value, Index i, Index j)
+  {
+    const bool value_is_nan = (numext::isnan)(value);
+    if ((value_is_nan && !(numext::isnan)(this->res)) || Comparator::compare(value, this->res)) {
+      this->res = value;
+      this->row = i;
+      this->col = j;
+    }
+  }
+
+  EIGEN_DEVICE_FUNC inline
+  void packet(const Packet& p, Index i, Index j) {
+    const Index PacketSize = packet_traits<Scalar>::size;
+    Scalar value = Comparator::predux(p);
+    const bool value_is_nan = (numext::isnan)(value);
+    if ((value_is_nan && !(numext::isnan)(this->res)) || Comparator::compare(value, this->res)) {
+      const Packet range = preverse(plset<Packet>(Scalar(1)));
+      // If the value is NaN, pick the first position of a NaN, otherwise pick the first extremal value.
+      Packet mask = value_is_nan ? pnot(pcmp_eq(p, p)) : pcmp_eq(pset1<Packet>(value), p);
+      Index max_idx = PacketSize - static_cast<Index>(predux_max(pand(range, mask)));
+      this->res = value;
+      this->row = Derived::IsRowMajor ? i : i + max_idx;;
+      this->col = Derived::IsRowMajor ? j + max_idx : j;
+    }
+  }
+};
+
+template<typename Scalar, bool is_min, int NaNPropagation>
+struct functor_traits<minmax_coeff_visitor<Scalar, is_min, NaNPropagation> > {
   enum {
-    Cost = NumTraits<Scalar>::AddCost
-  };
-};
-
-/** \internal
-  * \brief Visitor computing the max coefficient with its value and coordinates
-  *
-  * \sa DenseBase::maxCoeff(Index*, Index*)
-  */
-template <typename Derived, int NaNPropagation>
-struct max_coeff_visitor : coeff_visitor<Derived>
-{
-  typedef typename Derived::Scalar Scalar;
-  EIGEN_DEVICE_FUNC
-  void operator() (const Scalar& value, Index i, Index j)
-  {
-    if(value > this->res)
-    {
-      this->res = value;
-      this->row = i;
-      this->col = j;
-    }
-  }
-};
-
-template <typename Derived>
-struct max_coeff_visitor<Derived, PropagateNumbers> : coeff_visitor<Derived>
-{
-  typedef typename Derived::Scalar Scalar;
-  EIGEN_DEVICE_FUNC
-  void operator() (const Scalar& value, Index i, Index j)
-  {
-    if((numext::isnan)(this->res) || (!(numext::isnan)(value) && value > this->res))
-    {
-      this->res = value;
-      this->row = i;
-      this->col = j;
-    }
-  }
-};
-
-template <typename Derived>
-struct max_coeff_visitor<Derived, PropagateNaN> : coeff_visitor<Derived>
-{
-  typedef typename Derived::Scalar Scalar;
-  EIGEN_DEVICE_FUNC
-  void operator() (const Scalar& value, Index i, Index j)
-  {
-    if((numext::isnan)(value) || value > this->res)
-    {
-      this->res = value;
-      this->row = i;
-      this->col = j;
-    }
-  }
-};
-
-template<typename Scalar, int NaNPropagation>
-struct functor_traits<max_coeff_visitor<Scalar, NaNPropagation> > {
-  enum {
-    Cost = NumTraits<Scalar>::AddCost
+    Cost = NumTraits<Scalar>::AddCost,
+    PacketAccess = true
   };
 };
 
@@ -293,7 +367,7 @@ DenseBase<Derived>::minCoeff(IndexType* rowId, IndexType* colId) const
 {
   eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");
 
-  internal::min_coeff_visitor<Derived, NaNPropagation> minVisitor;
+  internal::minmax_coeff_visitor<Derived, true, NaNPropagation> minVisitor;
   this->visit(minVisitor);
   *rowId = minVisitor.row;
   if (colId) *colId = minVisitor.col;
@@ -319,7 +393,7 @@ DenseBase<Derived>::minCoeff(IndexType* index) const
   eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");
 
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
-      internal::min_coeff_visitor<Derived, NaNPropagation> minVisitor;
+      internal::minmax_coeff_visitor<Derived, true, NaNPropagation> minVisitor;
   this->visit(minVisitor);
   *index = IndexType((RowsAtCompileTime==1) ? minVisitor.col : minVisitor.row);
   return minVisitor.res;
@@ -344,7 +418,7 @@ DenseBase<Derived>::maxCoeff(IndexType* rowPtr, IndexType* colPtr) const
 {
   eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");
 
-  internal::max_coeff_visitor<Derived, NaNPropagation> maxVisitor;
+  internal::minmax_coeff_visitor<Derived, false, NaNPropagation> maxVisitor;
   this->visit(maxVisitor);
   *rowPtr = maxVisitor.row;
   if (colPtr) *colPtr = maxVisitor.col;
@@ -370,7 +444,7 @@ DenseBase<Derived>::maxCoeff(IndexType* index) const
   eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");
 
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
-      internal::max_coeff_visitor<Derived, NaNPropagation> maxVisitor;
+      internal::minmax_coeff_visitor<Derived, false, NaNPropagation> maxVisitor;
   this->visit(maxVisitor);
   *index = (RowsAtCompileTime==1) ? maxVisitor.col : maxVisitor.row;
   return maxVisitor.res;
diff --git a/libs/eigen/Eigen/src/Core/arch/AVX/Complex.h b/libs/eigen/Eigen/src/Core/arch/AVX/Complex.h
index ab7bd6c..3abb5bd 100644
--- a/libs/eigen/Eigen/src/Core/arch/AVX/Complex.h
+++ b/libs/eigen/Eigen/src/Core/arch/AVX/Complex.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_COMPLEX_AVX_H
 #define EIGEN_COMPLEX_AVX_H
 
+#include "../../InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
@@ -99,7 +101,9 @@ template<> EIGEN_STRONG_INLINE Packet4cf ploadu<Packet4cf>(const std::complex<fl
 
 template<> EIGEN_STRONG_INLINE Packet4cf pset1<Packet4cf>(const std::complex<float>& from)
 {
-  return Packet4cf(_mm256_castpd_ps(_mm256_broadcast_sd((const double*)(const void*)&from)));
+  const float re = std::real(from);
+  const float im = std::imag(from);
+  return Packet4cf(_mm256_set_ps(im, re, im, re, im, re, im, re));
 }
 
 template<> EIGEN_STRONG_INLINE Packet4cf ploaddup<Packet4cf>(const std::complex<float>* from)
@@ -167,15 +171,12 @@ template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet4cf>(const P
                          Packet2cf(_mm256_extractf128_ps(a.v, 1))));
 }
 
+
 EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet4cf,Packet8f)
 
 template<> EIGEN_STRONG_INLINE Packet4cf pdiv<Packet4cf>(const Packet4cf& a, const Packet4cf& b)
 {
-  Packet4cf num = pmul(a, pconj(b));
-  __m256 tmp = _mm256_mul_ps(b.v, b.v);
-  __m256 tmp2    = _mm256_shuffle_ps(tmp,tmp,0xB1);
-  __m256 denom = _mm256_add_ps(tmp, tmp2);
-  return Packet4cf(_mm256_div_ps(num.v, denom));
+  return pdiv_complex(a, b);
 }
 
 template<> EIGEN_STRONG_INLINE Packet4cf pcplxflip<Packet4cf>(const Packet4cf& x)
@@ -321,10 +322,7 @@ EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cd,Packet4d)
 
 template<> EIGEN_STRONG_INLINE Packet2cd pdiv<Packet2cd>(const Packet2cd& a, const Packet2cd& b)
 {
-  Packet2cd num = pmul(a, pconj(b));
-  __m256d tmp = _mm256_mul_pd(b.v, b.v);
-  __m256d denom = _mm256_hadd_pd(tmp, tmp);
-  return Packet2cd(_mm256_div_pd(num.v, denom));
+  return pdiv_complex(a, b);
 }
 
 template<> EIGEN_STRONG_INLINE Packet2cd pcplxflip<Packet2cd>(const Packet2cd& x)
diff --git a/libs/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h b/libs/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h
index 67041c8..cb7d7b8 100644
--- a/libs/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h
+++ b/libs/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h
@@ -14,52 +14,78 @@
  * Julien Pommier's sse math library: http://gruntthepeon.free.fr/ssemath/
  */
 
+#include "../../InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
 
 template <>
-EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8f
 psin<Packet8f>(const Packet8f& _x) {
   return psin_float(_x);
 }
 
 template <>
-EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8f
 pcos<Packet8f>(const Packet8f& _x) {
   return pcos_float(_x);
 }
 
 template <>
-EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8f
+pasin<Packet8f>(const Packet8f& _x) {
+  return pasin_float(_x);
+}
+
+template <>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8f
+pacos<Packet8f>(const Packet8f& _x) {
+  return pacos_float(_x);
+}
+
+template <>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8f
+patan<Packet8f>(const Packet8f& _x) {
+  return patan_float(_x);
+}
+
+template <>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4d
+patan<Packet4d>(const Packet4d& _x) {
+  return patan_double(_x);
+}
+
+template <>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8f
 plog<Packet8f>(const Packet8f& _x) {
   return plog_float(_x);
 }
 
 template <>
-EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4d
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4d
 plog<Packet4d>(const Packet4d& _x) {
   return plog_double(_x);
 }
 
 template <>
-EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8f
 plog2<Packet8f>(const Packet8f& _x) {
   return plog2_float(_x);
 }
 
 template <>
-EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4d
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4d
 plog2<Packet4d>(const Packet4d& _x) {
   return plog2_double(_x);
 }
 
-template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet8f plog1p<Packet8f>(const Packet8f& _x) {
   return generic_plog1p(_x);
 }
 
-template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet8f pexpm1<Packet8f>(const Packet8f& _x) {
   return generic_expm1(_x);
 }
@@ -68,110 +94,59 @@ Packet8f pexpm1<Packet8f>(const Packet8f& _x) {
 // "m = floor(x/log(2)+1/2)" and "r" is the remainder. The result is then
 // "exp(x) = 2^m*exp(r)" where exp(r) is in the range [-1,1).
 template <>
-EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8f
 pexp<Packet8f>(const Packet8f& _x) {
   return pexp_float(_x);
 }
 
 // Hyperbolic Tangent function.
 template <>
-EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8f
 ptanh<Packet8f>(const Packet8f& _x) {
   return internal::generic_fast_tanh_float(_x);
 }
 
 // Exponential function for doubles.
 template <>
-EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4d
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4d
 pexp<Packet4d>(const Packet4d& _x) {
   return pexp_double(_x);
 }
 
-// Functions for sqrt.
-// The EIGEN_FAST_MATH version uses the _mm_rsqrt_ps approximation and one step
-// of Newton's method, at a cost of 1-2 bits of precision as opposed to the
-// exact solution. It does not handle +inf, or denormalized numbers correctly.
-// The main advantage of this approach is not just speed, but also the fact that
-// it can be inlined and pipelined with other computations, further reducing its
-// effective latency. This is similar to Quake3's fast inverse square root.
-// For detail see here: http://www.beyond3d.com/content/articles/8/
-#if EIGEN_FAST_MATH
-template <>
-EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
-Packet8f psqrt<Packet8f>(const Packet8f& _x) {
-  Packet8f minus_half_x = pmul(_x, pset1<Packet8f>(-0.5f));
-  Packet8f denormal_mask = pandnot(
-      pcmp_lt(_x, pset1<Packet8f>((std::numeric_limits<float>::min)())),
-      pcmp_lt(_x, pzero(_x)));
 
-  // Compute approximate reciprocal sqrt.
-  Packet8f x = _mm256_rsqrt_ps(_x);
-  // Do a single step of Newton's iteration.
-  x = pmul(x, pmadd(minus_half_x, pmul(x,x), pset1<Packet8f>(1.5f)));
-  // Flush results for denormals to zero.
-  return pandnot(pmul(_x,x), denormal_mask);
-}
-
-#else
-
-template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+// Notice that for newer processors, it is counterproductive to use Newton
+// iteration for square root. In particular, Skylake and Zen2 processors
+// have approximately doubled throughput of the _mm_sqrt_ps instruction
+// compared to their predecessors.
+template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet8f psqrt<Packet8f>(const Packet8f& _x) {
   return _mm256_sqrt_ps(_x);
 }
-
-#endif
-
-template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet4d psqrt<Packet4d>(const Packet4d& _x) {
   return _mm256_sqrt_pd(_x);
 }
 
+
+// Even on Skylake, using Newton iteration is a win for reciprocal square root.
 #if EIGEN_FAST_MATH
-template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
-Packet8f prsqrt<Packet8f>(const Packet8f& _x) {
-  _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(inf, 0x7f800000);
-  _EIGEN_DECLARE_CONST_Packet8f(one_point_five, 1.5f);
-  _EIGEN_DECLARE_CONST_Packet8f(minus_half, -0.5f);
-  _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(flt_min, 0x00800000);
-
-  Packet8f neg_half = pmul(_x, p8f_minus_half);
-
-  // select only the inverse sqrt of positive normal inputs (denormals are
-  // flushed to zero and cause infs as well).
-  Packet8f lt_min_mask = _mm256_cmp_ps(_x, p8f_flt_min, _CMP_LT_OQ);
-  Packet8f inf_mask =  _mm256_cmp_ps(_x, p8f_inf, _CMP_EQ_OQ);
-  Packet8f not_normal_finite_mask = _mm256_or_ps(lt_min_mask, inf_mask);
-
-  // Compute an approximate result using the rsqrt intrinsic.
-  Packet8f y_approx = _mm256_rsqrt_ps(_x);
-
-  // Do a single step of Newton-Raphson iteration to improve the approximation.
-  // This uses the formula y_{n+1} = y_n * (1.5 - y_n * (0.5 * x) * y_n).
-  // It is essential to evaluate the inner term like this because forming
-  // y_n^2 may over- or underflow.
-  Packet8f y_newton = pmul(y_approx, pmadd(y_approx, pmul(neg_half, y_approx), p8f_one_point_five));
-
-  // Select the result of the Newton-Raphson step for positive normal arguments.
-  // For other arguments, choose the output of the intrinsic. This will
-  // return rsqrt(+inf) = 0, rsqrt(x) = NaN if x < 0, and rsqrt(x) = +inf if
-  // x is zero or a positive denormalized float (equivalent to flushing positive
-  // denormalized inputs to zero).
-  return pselect<Packet8f>(not_normal_finite_mask, y_approx, y_newton);
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet8f prsqrt<Packet8f>(const Packet8f& a) {
+  // _mm256_rsqrt_ps returns -inf for negative denormals.
+  // _mm512_rsqrt**_ps returns -NaN for negative denormals.  We may want
+  // consistency here.
+  // const Packet8f rsqrt = pselect(pcmp_lt(a, pzero(a)),
+  //                                pset1<Packet8f>(-NumTraits<float>::quiet_NaN()),
+  //                                _mm256_rsqrt_ps(a));
+  return generic_rsqrt_newton_step<Packet8f, /*Steps=*/1>::run(a, _mm256_rsqrt_ps(a));
 }
 
-#else
-template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
-Packet8f prsqrt<Packet8f>(const Packet8f& _x) {
-  _EIGEN_DECLARE_CONST_Packet8f(one, 1.0f);
-  return _mm256_div_ps(p8f_one, _mm256_sqrt_ps(_x));
+template<> EIGEN_STRONG_INLINE Packet8f preciprocal<Packet8f>(const Packet8f& a) {
+  return generic_reciprocal_newton_step<Packet8f, /*Steps=*/1>::run(a, _mm256_rcp_ps(a));
 }
+
 #endif
 
-template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
-Packet4d prsqrt<Packet4d>(const Packet4d& _x) {
-  _EIGEN_DECLARE_CONST_Packet4d(one, 1.0);
-  return _mm256_div_pd(p4d_one, _mm256_sqrt_pd(_x));
-}
 
 F16_PACKET_FUNCTION(Packet8f, Packet8h, psin)
 F16_PACKET_FUNCTION(Packet8f, Packet8h, pcos)
@@ -183,6 +158,7 @@ F16_PACKET_FUNCTION(Packet8f, Packet8h, pexp)
 F16_PACKET_FUNCTION(Packet8f, Packet8h, ptanh)
 F16_PACKET_FUNCTION(Packet8f, Packet8h, psqrt)
 F16_PACKET_FUNCTION(Packet8f, Packet8h, prsqrt)
+F16_PACKET_FUNCTION(Packet8f, Packet8h, preciprocal)
 
 template <>
 EIGEN_STRONG_INLINE Packet8h pfrexp(const Packet8h& a, Packet8h& exponent) {
@@ -207,6 +183,7 @@ BF16_PACKET_FUNCTION(Packet8f, Packet8bf, pexp)
 BF16_PACKET_FUNCTION(Packet8f, Packet8bf, ptanh)
 BF16_PACKET_FUNCTION(Packet8f, Packet8bf, psqrt)
 BF16_PACKET_FUNCTION(Packet8f, Packet8bf, prsqrt)
+BF16_PACKET_FUNCTION(Packet8f, Packet8bf, preciprocal)
 
 template <>
 EIGEN_STRONG_INLINE Packet8bf pfrexp(const Packet8bf& a, Packet8bf& exponent) {
diff --git a/libs/eigen/Eigen/src/Core/arch/AVX/PacketMath.h b/libs/eigen/Eigen/src/Core/arch/AVX/PacketMath.h
index 7fc32fd..0fe830a 100644
--- a/libs/eigen/Eigen/src/Core/arch/AVX/PacketMath.h
+++ b/libs/eigen/Eigen/src/Core/arch/AVX/PacketMath.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_PACKET_MATH_AVX_H
 #define EIGEN_PACKET_MATH_AVX_H
 
+#include "../../InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
@@ -29,28 +31,29 @@ namespace internal {
 #endif
 
 typedef __m256  Packet8f;
-typedef __m256i Packet8i;
+typedef eigen_packet_wrapper<__m256i, 0> Packet8i;
 typedef __m256d Packet4d;
+#ifndef EIGEN_VECTORIZE_AVX512FP16
 typedef eigen_packet_wrapper<__m128i, 2> Packet8h;
+#endif
 typedef eigen_packet_wrapper<__m128i, 3> Packet8bf;
 
+#ifdef EIGEN_VECTORIZE_AVX2
+// Start from 3 to be compatible with AVX512
+typedef eigen_packet_wrapper<__m256i, 3> Packet4l;
+#endif
+
 template<> struct is_arithmetic<__m256>  { enum { value = true }; };
 template<> struct is_arithmetic<__m256i> { enum { value = true }; };
 template<> struct is_arithmetic<__m256d> { enum { value = true }; };
+template<> struct is_arithmetic<Packet8i> { enum { value = true }; };
+#ifndef EIGEN_VECTORIZE_AVX512FP16
 template<> struct is_arithmetic<Packet8h> { enum { value = true }; };
+#endif
 template<> struct is_arithmetic<Packet8bf> { enum { value = true }; };
-
-#define _EIGEN_DECLARE_CONST_Packet8f(NAME,X) \
-  const Packet8f p8f_##NAME = pset1<Packet8f>(X)
-
-#define _EIGEN_DECLARE_CONST_Packet4d(NAME,X) \
-  const Packet4d p4d_##NAME = pset1<Packet4d>(X)
-
-#define _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(NAME,X) \
-  const Packet8f p8f_##NAME = _mm256_castsi256_ps(pset1<Packet8i>(X))
-
-#define _EIGEN_DECLARE_CONST_Packet8i(NAME,X) \
-  const Packet8i p8i_##NAME = pset1<Packet8i>(X)
+#ifdef EIGEN_VECTORIZE_AVX2
+template<> struct is_arithmetic<Packet4l> { enum { value = true }; };
+#endif
 
 // Use the packet_traits defined in AVX512/PacketMath.h instead if we're going
 // to leverage AVX512 instructions.
@@ -67,8 +70,12 @@ template<> struct packet_traits<float>  : default_packet_traits
 
     HasCmp  = 1,
     HasDiv = 1,
+    HasReciprocal = EIGEN_FAST_MATH,
     HasSin = EIGEN_FAST_MATH,
     HasCos = EIGEN_FAST_MATH,
+    HasACos = 1,
+    HasASin = 1,
+    HasATan = 1,
     HasLog = 1,
     HasLog1p = 1,
     HasExpm1 = 1,
@@ -102,6 +109,7 @@ template<> struct packet_traits<double> : default_packet_traits
     HasExp  = 1,
     HasSqrt = 1,
     HasRsqrt = 1,
+    HasATan = 1,
     HasBlend = 1,
     HasRound = 1,
     HasFloor = 1,
@@ -196,38 +204,74 @@ struct packet_traits<bfloat16> : default_packet_traits {
     HasNdtri  = 1
   };
 };
+
+template<> struct packet_traits<int> : default_packet_traits
+{
+  typedef Packet8i type;
+  typedef Packet4i half;
+  enum {
+    Vectorizable = 1,
+    AlignedOnScalar = 1,
+    HasCmp = 1,
+    HasDiv = 1,
+    size=8
+  };
+};
+
+#ifdef EIGEN_VECTORIZE_AVX2
+template<> struct packet_traits<int64_t> : default_packet_traits
+{
+  typedef Packet4l type;
+  // There is no half-size packet for current Packet4l.
+  // TODO: support as SSE path.
+  typedef Packet4l half;
+  enum {
+    Vectorizable = 1,
+    AlignedOnScalar = 1,
+    HasCmp = 1,
+    size=4
+  };
+};
+#endif
+
 #endif
 
 template<> struct scalar_div_cost<float,true> { enum { value = 14 }; };
 template<> struct scalar_div_cost<double,true> { enum { value = 16 }; };
 
-/* Proper support for integers is only provided by AVX2. In the meantime, we'll
-   use SSE instructions and packets to deal with integers.
-template<> struct packet_traits<int>    : default_packet_traits
-{
-  typedef Packet8i type;
-  enum {
-    Vectorizable = 1,
-    AlignedOnScalar = 1,
-    size=8
-  };
-};
-*/
-
 template<> struct unpacket_traits<Packet8f> {
   typedef float     type;
   typedef Packet4f  half;
   typedef Packet8i  integer_packet;
   typedef uint8_t   mask_t;
-  enum {size=8, alignment=Aligned32, vectorizable=true, masked_load_available=true, masked_store_available=true};
+  enum {size=8, alignment=Aligned32, vectorizable=true, masked_load_available=true, masked_store_available=true
+#ifdef EIGEN_VECTORIZE_AVX512
+    , masked_fpops_available=true
+#endif
+  };
 };
 template<> struct unpacket_traits<Packet4d> {
   typedef double type;
   typedef Packet2d half;
   enum {size=4, alignment=Aligned32, vectorizable=true, masked_load_available=false, masked_store_available=false};
 };
-template<> struct unpacket_traits<Packet8i> { typedef int    type; typedef Packet4i half; enum {size=8, alignment=Aligned32, vectorizable=false, masked_load_available=false, masked_store_available=false}; };
-template<> struct unpacket_traits<Packet8bf> { typedef bfloat16 type; typedef Packet8bf half; enum {size=8, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; };
+template<> struct unpacket_traits<Packet8i> {
+  typedef int    type;
+  typedef Packet4i half;
+  enum {size=8, alignment=Aligned32, vectorizable=true, masked_load_available=false, masked_store_available=false};
+};
+#ifdef EIGEN_VECTORIZE_AVX2
+template<> struct unpacket_traits<Packet4l> {
+  typedef int64_t    type;
+  typedef Packet4l half;
+  enum {size=4, alignment=Aligned32, vectorizable=true, masked_load_available=false, masked_store_available=false};
+};
+#endif
+template<> struct unpacket_traits<Packet8bf> {
+  typedef bfloat16 type;
+  typedef Packet8bf half;
+  enum {size=8, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false};
+};
 
 // Helper function for bit packing snippet of low precision comparison.
 // It packs the flags from 16x16 to 8x16.
@@ -236,6 +280,210 @@ EIGEN_STRONG_INLINE __m128i Pack16To8(Packet8f rf) {
                          _mm256_extractf128_si256(_mm256_castps_si256(rf), 1));
 }
 
+#ifdef EIGEN_VECTORIZE_AVX2
+template <>
+EIGEN_STRONG_INLINE Packet4l pset1<Packet4l>(const int64_t& from) {
+  return _mm256_set1_epi64x(from);
+}
+template <>
+EIGEN_STRONG_INLINE Packet4l pzero(const Packet4l& /*a*/) {
+  return _mm256_setzero_si256();
+}
+template <>
+EIGEN_STRONG_INLINE Packet4l peven_mask(const Packet4l& /*a*/) {
+  return _mm256_set_epi64x(0ll, -1ll, 0ll, -1ll);
+}
+template <>
+EIGEN_STRONG_INLINE Packet4l pload1<Packet4l>(const int64_t* from) {
+  return _mm256_set1_epi64x(*from);
+}
+template <>
+EIGEN_STRONG_INLINE Packet4l padd<Packet4l>(const Packet4l& a, const Packet4l& b) {
+  return _mm256_add_epi64(a, b);
+}
+template <>
+EIGEN_STRONG_INLINE Packet4l plset<Packet4l>(const int64_t& a) {
+  return padd(pset1<Packet4l>(a), Packet4l(_mm256_set_epi64x(3ll, 2ll, 1ll, 0ll)));
+}
+template <>
+EIGEN_STRONG_INLINE Packet4l psub<Packet4l>(const Packet4l& a, const Packet4l& b) {
+  return _mm256_sub_epi64(a, b);
+}
+template <>
+EIGEN_STRONG_INLINE Packet4l pnegate(const Packet4l& a) {
+  return psub(pzero(a), a);
+}
+template <>
+EIGEN_STRONG_INLINE Packet4l pconj(const Packet4l& a) {
+  return a;
+}
+template <>
+EIGEN_STRONG_INLINE Packet4l pcmp_le(const Packet4l& a, const Packet4l& b) {
+  return _mm256_xor_si256(_mm256_cmpgt_epi64(a, b), _mm256_set1_epi32(-1));
+}
+template <>
+EIGEN_STRONG_INLINE Packet4l pcmp_lt(const Packet4l& a, const Packet4l& b) {
+  return _mm256_cmpgt_epi64(b, a);
+}
+template <>
+EIGEN_STRONG_INLINE Packet4l pcmp_eq(const Packet4l& a, const Packet4l& b) {
+  return _mm256_cmpeq_epi64(a, b);
+}
+template <>
+EIGEN_STRONG_INLINE Packet4l ptrue<Packet4l>(const Packet4l& a) {
+  return _mm256_cmpeq_epi64(a, a);
+}
+template <>
+EIGEN_STRONG_INLINE Packet4l pand<Packet4l>(const Packet4l& a, const Packet4l& b) {
+  return _mm256_and_si256(a, b);
+}
+template <>
+EIGEN_STRONG_INLINE Packet4l por<Packet4l>(const Packet4l& a, const Packet4l& b) {
+  return _mm256_or_si256(a, b);
+}
+template <>
+EIGEN_STRONG_INLINE Packet4l pxor<Packet4l>(const Packet4l& a, const Packet4l& b) {
+  return _mm256_xor_si256(a, b);
+}
+template <>
+EIGEN_STRONG_INLINE Packet4l pandnot<Packet4l>(const Packet4l& a, const Packet4l& b) {
+  return _mm256_andnot_si256(b, a);
+}
+template <int N>
+EIGEN_STRONG_INLINE Packet4l plogical_shift_right(Packet4l a) {
+  return _mm256_srli_epi64(a, N);
+}
+template <int N>
+EIGEN_STRONG_INLINE Packet4l plogical_shift_left(Packet4l a) {
+  return _mm256_slli_epi64(a, N);
+}
+#ifdef EIGEN_VECTORIZE_AVX512FP16
+template <int N>
+EIGEN_STRONG_INLINE Packet4l parithmetic_shift_right(Packet4l a) { return _mm256_srai_epi64(a, N); }
+#else
+template <int N>
+EIGEN_STRONG_INLINE std::enable_if_t< (N == 0), Packet4l> parithmetic_shift_right(Packet4l a) {
+  return a;
+}
+template <int N>
+EIGEN_STRONG_INLINE std::enable_if_t< (N > 0) && (N < 32), Packet4l> parithmetic_shift_right(Packet4l a) {
+  __m256i hi_word = _mm256_srai_epi32(a, N);
+  __m256i lo_word = _mm256_srli_epi64(a, N);
+  return _mm256_blend_epi32(hi_word, lo_word, 0b01010101);
+}
+template <int N>
+EIGEN_STRONG_INLINE std::enable_if_t< (N >= 32) && (N < 63), Packet4l> parithmetic_shift_right(Packet4l a) {
+  __m256i hi_word = _mm256_srai_epi32(a, 31);
+  __m256i lo_word = _mm256_shuffle_epi32(_mm256_srai_epi32(a, N - 32), (shuffle_mask<1, 1, 3, 3>::mask));
+  return _mm256_blend_epi32(hi_word, lo_word, 0b01010101);
+}
+template <int N>
+EIGEN_STRONG_INLINE std::enable_if_t< (N == 63), Packet4l> parithmetic_shift_right(Packet4l a) {
+  return _mm256_shuffle_epi32(_mm256_srai_epi32(a, 31), (shuffle_mask<1, 1, 3, 3>::mask));
+}
+template <int N>
+EIGEN_STRONG_INLINE std::enable_if_t< (N < 0) || (N > 63), Packet4l> parithmetic_shift_right(Packet4l a) {
+  return parithmetic_shift_right<int(N&63)>(a);
+}
+#endif
+template <>
+EIGEN_STRONG_INLINE Packet4l pload<Packet4l>(const int64_t* from) {
+  EIGEN_DEBUG_ALIGNED_LOAD return _mm256_load_si256(reinterpret_cast<const __m256i*>(from));
+}
+template <>
+EIGEN_STRONG_INLINE Packet4l ploadu<Packet4l>(const int64_t* from) {
+  EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_si256(reinterpret_cast<const __m256i*>(from));
+}
+// Loads 2 int64_ts from memory a returns the packet {a0, a0, a1, a1}
+template <>
+EIGEN_STRONG_INLINE Packet4l ploaddup<Packet4l>(const int64_t* from) {
+  const Packet4l a = _mm256_castsi128_si256(_mm_loadu_si128(reinterpret_cast<const __m128i*>(from)));
+  return _mm256_permutevar8x32_epi32(a, _mm256_setr_epi32(0, 1, 0, 1, 2, 3, 2, 3));
+}
+template <>
+EIGEN_STRONG_INLINE void pstore<int64_t>(int64_t* to, const Packet4l& from) {
+  EIGEN_DEBUG_ALIGNED_STORE _mm256_storeu_si256(reinterpret_cast<__m256i*>(to), from);
+}
+template <>
+EIGEN_STRONG_INLINE void pstoreu<int64_t>(int64_t* to, const Packet4l& from) {
+  EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_si256(reinterpret_cast<__m256i*>(to), from);
+}
+template <>
+EIGEN_DEVICE_FUNC inline Packet4l pgather<int64_t, Packet4l>(const int64_t* from, Index stride) {
+  return _mm256_set_epi64x(from[3 * stride], from[2 * stride], from[1 * stride], from[0 * stride]);
+}
+template <>
+EIGEN_DEVICE_FUNC inline void pscatter<int64_t, Packet4l>(int64_t* to, const Packet4l& from, Index stride) {
+  __m128i low = _mm256_extractf128_si256(from, 0);
+  to[stride * 0] = _mm_extract_epi64(low, 0);
+  to[stride * 1] = _mm_extract_epi64(low, 1);
+
+  __m128i high = _mm256_extractf128_si256(from, 1);
+  to[stride * 2] = _mm_extract_epi64(high, 0);
+  to[stride * 3] = _mm_extract_epi64(high, 1);
+}
+template <>
+EIGEN_STRONG_INLINE void pstore1<Packet4l>(int64_t* to, const int64_t& a) {
+  Packet4l pa = pset1<Packet4l>(a);
+  pstore(to, pa);
+}
+template <>
+EIGEN_STRONG_INLINE int64_t pfirst<Packet4l>(const Packet4l& a) {
+  return _mm_cvtsi128_si64(_mm256_castsi256_si128(a));
+}
+template <>
+EIGEN_STRONG_INLINE int64_t predux<Packet4l>(const Packet4l& a) {
+  __m128i r = _mm_add_epi64(_mm256_castsi256_si128(a), _mm256_extractf128_si256(a, 1));
+  return _mm_extract_epi64(r, 0) + _mm_extract_epi64(r, 1);
+}
+#define MM256_SHUFFLE_EPI64(A, B, M) _mm256_shuffle_pd(_mm256_castsi256_pd(A), _mm256_castsi256_pd(B), M)
+EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet4l, 4>& kernel) {
+  __m256d T0 = MM256_SHUFFLE_EPI64(kernel.packet[0], kernel.packet[1], 15);
+  __m256d T1 = MM256_SHUFFLE_EPI64(kernel.packet[0], kernel.packet[1], 0);
+  __m256d T2 = MM256_SHUFFLE_EPI64(kernel.packet[2], kernel.packet[3], 15);
+  __m256d T3 = MM256_SHUFFLE_EPI64(kernel.packet[2], kernel.packet[3], 0);
+
+  kernel.packet[1] = _mm256_castpd_si256(_mm256_permute2f128_pd(T0, T2, 32));
+  kernel.packet[3] = _mm256_castpd_si256(_mm256_permute2f128_pd(T0, T2, 49));
+  kernel.packet[0] = _mm256_castpd_si256(_mm256_permute2f128_pd(T1, T3, 32));
+  kernel.packet[2] = _mm256_castpd_si256(_mm256_permute2f128_pd(T1, T3, 49));
+}
+template <>
+EIGEN_STRONG_INLINE Packet4l pmin<Packet4l>(const Packet4l& a, const Packet4l& b) {
+  __m256i cmp = _mm256_cmpgt_epi64(a, b);
+  __m256i a_min = _mm256_andnot_si256(cmp, a);
+  __m256i b_min = _mm256_and_si256(cmp, b);
+  return Packet4l(_mm256_or_si256(a_min, b_min));
+}
+template <>
+EIGEN_STRONG_INLINE Packet4l pmax<Packet4l>(const Packet4l& a, const Packet4l& b) {
+  __m256i cmp = _mm256_cmpgt_epi64(a, b);
+  __m256i a_min = _mm256_and_si256(cmp, a);
+  __m256i b_min = _mm256_andnot_si256(cmp, b);
+  return Packet4l(_mm256_or_si256(a_min, b_min));
+}
+template <>
+EIGEN_STRONG_INLINE Packet4l pabs<Packet4l>(const Packet4l& a) {
+  Packet4l pz = pzero<Packet4l>(a);
+  Packet4l cmp = _mm256_cmpgt_epi64(a, pz);
+  return psub(cmp, pxor(a, cmp));
+}
+template <>
+EIGEN_STRONG_INLINE Packet4l pmul<Packet4l>(const Packet4l& a, const Packet4l& b) {
+  // 64-bit mul requires avx512, so do this with 32-bit multiplication
+  __m256i upper32_a = _mm256_srli_epi64(a, 32);
+  __m256i upper32_b = _mm256_srli_epi64(b, 32);
+
+  // upper * lower
+  __m256i mul1 = _mm256_mul_epu32(upper32_a, b);
+  __m256i mul2 = _mm256_mul_epu32(upper32_b, a);
+  // Gives us both upper*upper and lower*lower
+  __m256i mul3 = _mm256_mul_epu32(a, b);
+
+  __m256i high = _mm256_slli_epi64(_mm256_add_epi64(mul1, mul2), 32);
+  return _mm256_add_epi64(high, mul3);
+}
+#endif
 
 template<> EIGEN_STRONG_INLINE Packet8f pset1<Packet8f>(const float&  from) { return _mm256_set1_ps(from); }
 template<> EIGEN_STRONG_INLINE Packet4d pset1<Packet4d>(const double& from) { return _mm256_set1_pd(from); }
@@ -256,10 +504,17 @@ template<> EIGEN_STRONG_INLINE Packet4d peven_mask(const Packet4d& /*a*/) { retu
 template<> EIGEN_STRONG_INLINE Packet8f pload1<Packet8f>(const float*  from) { return _mm256_broadcast_ss(from); }
 template<> EIGEN_STRONG_INLINE Packet4d pload1<Packet4d>(const double* from) { return _mm256_broadcast_sd(from); }
 
-template<> EIGEN_STRONG_INLINE Packet8f plset<Packet8f>(const float& a) { return _mm256_add_ps(_mm256_set1_ps(a), _mm256_set_ps(7.0,6.0,5.0,4.0,3.0,2.0,1.0,0.0)); }
-template<> EIGEN_STRONG_INLINE Packet4d plset<Packet4d>(const double& a) { return _mm256_add_pd(_mm256_set1_pd(a), _mm256_set_pd(3.0,2.0,1.0,0.0)); }
-
 template<> EIGEN_STRONG_INLINE Packet8f padd<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_add_ps(a,b); }
+#ifdef EIGEN_VECTORIZE_AVX512
+template <>
+EIGEN_STRONG_INLINE Packet8f padd<Packet8f>(const Packet8f& a, const Packet8f& b, uint8_t umask) {
+  __mmask16 mask = static_cast<__mmask16>(umask & 0x00FF);
+  return _mm512_castps512_ps256(_mm512_maskz_add_ps(
+                                    mask,
+                                    _mm512_castps256_ps512(a),
+                                    _mm512_castps256_ps512(b)));
+}
+#endif
 template<> EIGEN_STRONG_INLINE Packet4d padd<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_add_pd(a,b); }
 template<> EIGEN_STRONG_INLINE Packet8i padd<Packet8i>(const Packet8i& a, const Packet8i& b) {
 #ifdef EIGEN_VECTORIZE_AVX2
@@ -271,6 +526,10 @@ template<> EIGEN_STRONG_INLINE Packet8i padd<Packet8i>(const Packet8i& a, const
 #endif
 }
 
+template<> EIGEN_STRONG_INLINE Packet8f plset<Packet8f>(const float& a) { return padd(pset1<Packet8f>(a), _mm256_set_ps(7.0,6.0,5.0,4.0,3.0,2.0,1.0,0.0)); }
+template<> EIGEN_STRONG_INLINE Packet4d plset<Packet4d>(const double& a) { return padd(pset1<Packet4d>(a), _mm256_set_pd(3.0,2.0,1.0,0.0)); }
+template<> EIGEN_STRONG_INLINE Packet8i plset<Packet8i>(const int& a) { return padd(pset1<Packet8i>(a), (Packet8i)_mm256_set_epi32(7,6,5,4,3,2,1,0)); }
+
 template<> EIGEN_STRONG_INLINE Packet8f psub<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_sub_ps(a,b); }
 template<> EIGEN_STRONG_INLINE Packet4d psub<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_sub_pd(a,b); }
 template<> EIGEN_STRONG_INLINE Packet8i psub<Packet8i>(const Packet8i& a, const Packet8i& b) {
@@ -285,11 +544,17 @@ template<> EIGEN_STRONG_INLINE Packet8i psub<Packet8i>(const Packet8i& a, const
 
 template<> EIGEN_STRONG_INLINE Packet8f pnegate(const Packet8f& a)
 {
-  return _mm256_sub_ps(_mm256_set1_ps(0.0),a);
+  const Packet8f mask = _mm256_castsi256_ps(_mm256_set1_epi32(0x80000000));
+  return _mm256_xor_ps(a, mask);
 }
 template<> EIGEN_STRONG_INLINE Packet4d pnegate(const Packet4d& a)
 {
-  return _mm256_sub_pd(_mm256_set1_pd(0.0),a);
+  const Packet4d mask = _mm256_castsi256_pd(_mm256_set1_epi64x(0x8000000000000000ULL));
+  return _mm256_xor_pd(a, mask);
+}
+template<> EIGEN_STRONG_INLINE Packet8i pnegate(const Packet8i& a)
+{
+  return psub(pzero(a), a);
 }
 
 template<> EIGEN_STRONG_INLINE Packet8f pconj(const Packet8f& a) { return a; }
@@ -310,36 +575,58 @@ template<> EIGEN_STRONG_INLINE Packet8i pmul<Packet8i>(const Packet8i& a, const
 
 template<> EIGEN_STRONG_INLINE Packet8f pdiv<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_div_ps(a,b); }
 template<> EIGEN_STRONG_INLINE Packet4d pdiv<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_div_pd(a,b); }
-template<> EIGEN_STRONG_INLINE Packet8i pdiv<Packet8i>(const Packet8i& /*a*/, const Packet8i& /*b*/)
-{ eigen_assert(false && "packet integer division are not supported by AVX");
-  return pset1<Packet8i>(0);
+
+template<> EIGEN_STRONG_INLINE Packet8i pdiv<Packet8i>(const Packet8i& a, const Packet8i& b)
+{
+#ifdef EIGEN_VECTORIZE_AVX512
+  return _mm512_cvttpd_epi32(_mm512_div_pd(_mm512_cvtepi32_pd(a), _mm512_cvtepi32_pd(b)));
+#else  
+  Packet4i lo = pdiv<Packet4i>(_mm256_extractf128_si256(a, 0), _mm256_extractf128_si256(b, 0));
+  Packet4i hi = pdiv<Packet4i>(_mm256_extractf128_si256(a, 1), _mm256_extractf128_si256(b, 1));
+  return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), hi, 1);
+#endif
 }
 
 #ifdef EIGEN_VECTORIZE_FMA
-template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f& b, const Packet8f& c) {
-#if ( (EIGEN_COMP_GNUC_STRICT && EIGEN_COMP_GNUC<80) || (EIGEN_COMP_CLANG) )
-  // Clang stupidly generates a vfmadd213ps instruction plus some vmovaps on registers,
-  //  and even register spilling with clang>=6.0 (bug 1637).
-  // Gcc stupidly generates a vfmadd132ps instruction.
-  // So let's enforce it to generate a vfmadd231ps instruction since the most common use
-  //  case is to accumulate the result of the product.
-  Packet8f res = c;
-  __asm__("vfmadd231ps %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b));
-  return res;
-#else
-  return _mm256_fmadd_ps(a,b,c);
-#endif
+template <>
+EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f& b, const Packet8f& c) {
+  return _mm256_fmadd_ps(a, b, c);
 }
-template<> EIGEN_STRONG_INLINE Packet4d pmadd(const Packet4d& a, const Packet4d& b, const Packet4d& c) {
-#if ( (EIGEN_COMP_GNUC_STRICT && EIGEN_COMP_GNUC<80) || (EIGEN_COMP_CLANG) )
-  // see above
-  Packet4d res = c;
-  __asm__("vfmadd231pd %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b));
-  return res;
-#else
-  return _mm256_fmadd_pd(a,b,c);
-#endif
+template <>
+EIGEN_STRONG_INLINE Packet4d pmadd(const Packet4d& a, const Packet4d& b, const Packet4d& c) {
+  return _mm256_fmadd_pd(a, b, c);
 }
+
+template <>
+EIGEN_STRONG_INLINE Packet8f pmsub(const Packet8f& a, const Packet8f& b, const Packet8f& c) {
+  return _mm256_fmsub_ps(a, b, c);
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet4d pmsub(const Packet4d& a, const Packet4d& b, const Packet4d& c) {
+  return _mm256_fmsub_pd(a, b, c);
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet8f pnmadd(const Packet8f& a, const Packet8f& b, const Packet8f& c) {
+  return _mm256_fnmadd_ps(a, b, c);
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet4d pnmadd(const Packet4d& a, const Packet4d& b, const Packet4d& c) {
+  return _mm256_fnmadd_pd(a, b, c);
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet8f pnmsub(const Packet8f& a, const Packet8f& b, const Packet8f& c) {
+  return _mm256_fnmsub_ps(a, b, c);
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet4d pnmsub(const Packet4d& a, const Packet4d& b, const Packet4d& c) {
+  return _mm256_fnmsub_pd(a, b, c);
+}
+
 #endif
 
 template<> EIGEN_STRONG_INLINE Packet8f pcmp_le(const Packet8f& a, const Packet8f& b) { return _mm256_cmp_ps(a,b,_CMP_LE_OQ); }
@@ -352,7 +639,26 @@ template<> EIGEN_STRONG_INLINE Packet4d pcmp_lt(const Packet4d& a, const Packet4
 template<> EIGEN_STRONG_INLINE Packet4d pcmp_lt_or_nan(const Packet4d& a, const Packet4d& b) { return _mm256_cmp_pd(a, b, _CMP_NGE_UQ); }
 template<> EIGEN_STRONG_INLINE Packet4d pcmp_eq(const Packet4d& a, const Packet4d& b) { return _mm256_cmp_pd(a,b,_CMP_EQ_OQ); }
 
-
+template<> EIGEN_STRONG_INLINE Packet8i pcmp_le(const Packet8i& a, const Packet8i& b) {
+#ifdef EIGEN_VECTORIZE_AVX2
+  return _mm256_xor_si256(_mm256_cmpgt_epi32(a,b), _mm256_set1_epi32(-1));
+#else
+  __m128i lo = _mm_cmpgt_epi32(_mm256_extractf128_si256(a, 0), _mm256_extractf128_si256(b, 0));
+  lo = _mm_xor_si128(lo, _mm_set1_epi32(-1));
+  __m128i hi = _mm_cmpgt_epi32(_mm256_extractf128_si256(a, 1), _mm256_extractf128_si256(b, 1));
+  hi = _mm_xor_si128(hi, _mm_set1_epi32(-1));
+  return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1);
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet8i pcmp_lt(const Packet8i& a, const Packet8i& b) {
+#ifdef EIGEN_VECTORIZE_AVX2
+  return _mm256_cmpgt_epi32(b,a);
+#else
+  __m128i lo = _mm_cmpgt_epi32(_mm256_extractf128_si256(b, 0), _mm256_extractf128_si256(a, 0));
+  __m128i hi = _mm_cmpgt_epi32(_mm256_extractf128_si256(b, 1), _mm256_extractf128_si256(a, 1));
+  return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1);
+#endif
+}
 template<> EIGEN_STRONG_INLINE Packet8i pcmp_eq(const Packet8i& a, const Packet8i& b) {
 #ifdef EIGEN_VECTORIZE_AVX2
   return _mm256_cmpeq_epi32(a,b);
@@ -388,6 +694,15 @@ template<> EIGEN_STRONG_INLINE Packet4d pmin<Packet4d>(const Packet4d& a, const
   return _mm256_min_pd(b,a);
 #endif
 }
+template<> EIGEN_STRONG_INLINE Packet8i pmin<Packet8i>(const Packet8i& a, const Packet8i& b) {
+#ifdef EIGEN_VECTORIZE_AVX2
+  return _mm256_min_epi32(a, b);
+#else
+  __m128i lo = _mm_min_epi32(_mm256_extractf128_si256(a, 0), _mm256_extractf128_si256(b, 0));
+  __m128i hi = _mm_min_epi32(_mm256_extractf128_si256(a, 1), _mm256_extractf128_si256(b, 1));
+  return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1);
+#endif
+}
 
 template<> EIGEN_STRONG_INLINE Packet8f pmax<Packet8f>(const Packet8f& a, const Packet8f& b) {
 #if EIGEN_COMP_GNUC && EIGEN_COMP_GNUC < 63
@@ -411,6 +726,21 @@ template<> EIGEN_STRONG_INLINE Packet4d pmax<Packet4d>(const Packet4d& a, const
   return _mm256_max_pd(b,a);
 #endif
 }
+template<> EIGEN_STRONG_INLINE Packet8i pmax<Packet8i>(const Packet8i& a, const Packet8i& b) {
+#ifdef EIGEN_VECTORIZE_AVX2
+  return _mm256_max_epi32(a, b);
+#else
+  __m128i lo = _mm_max_epi32(_mm256_extractf128_si256(a, 0), _mm256_extractf128_si256(b, 0));
+  __m128i hi = _mm_max_epi32(_mm256_extractf128_si256(a, 1), _mm256_extractf128_si256(b, 1));
+  return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1);
+#endif
+}
+
+#ifdef EIGEN_VECTORIZE_AVX2
+template<> EIGEN_STRONG_INLINE Packet8i psign(const Packet8i& a) {
+  return _mm256_sign_epi32(_mm256_set1_epi32(1), a);
+}
+#endif
 
 // Add specializations for min/max with prescribed NaN progation.
 template<>
@@ -583,11 +913,16 @@ template<> EIGEN_STRONG_INLINE Packet4d ploadu<Packet4d>(const double* from) { E
 template<> EIGEN_STRONG_INLINE Packet8i ploadu<Packet8i>(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_si256(reinterpret_cast<const __m256i*>(from)); }
 
 template<> EIGEN_STRONG_INLINE Packet8f ploadu<Packet8f>(const float* from, uint8_t umask) {
+#ifdef EIGEN_VECTORIZE_AVX512
+  __mmask16 mask = static_cast<__mmask16>(umask & 0x00FF);
+  EIGEN_DEBUG_UNALIGNED_LOAD return  _mm512_castps512_ps256(_mm512_maskz_loadu_ps(mask, from));
+#else
   Packet8i mask = _mm256_set1_epi8(static_cast<char>(umask));
   const Packet8i bit_mask = _mm256_set_epi32(0xffffff7f, 0xffffffbf, 0xffffffdf, 0xffffffef, 0xfffffff7, 0xfffffffb, 0xfffffffd, 0xfffffffe);
   mask = por<Packet8i>(mask, bit_mask);
   mask = pcmp_eq<Packet8i>(mask, _mm256_set1_epi32(0xffffffff));
   EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_maskload_ps(from, mask);
+#endif
 }
 
 // Loads 4 floats from memory a returns the packet {a0, a0  a1, a1, a2, a2, a3, a3}
@@ -605,12 +940,26 @@ template<> EIGEN_STRONG_INLINE Packet8f ploaddup<Packet8f>(const float* from)
   // then we can perform a consistent permutation on the global register to get everything in shape:
   return  _mm256_permute_ps(tmp, _MM_SHUFFLE(3,3,2,2));
 }
-// Loads 2 doubles from memory a returns the packet {a0, a0  a1, a1}
+// Loads 2 doubles from memory a returns the packet {a0, a0, a1, a1}
 template<> EIGEN_STRONG_INLINE Packet4d ploaddup<Packet4d>(const double* from)
 {
   Packet4d tmp = _mm256_broadcast_pd((const __m128d*)(const void*)from);
   return  _mm256_permute_pd(tmp, 3<<2);
 }
+// Loads 4 integers from memory a returns the packet {a0, a0, a1, a1, a2, a2, a3, a3}
+template<> EIGEN_STRONG_INLINE Packet8i ploaddup<Packet8i>(const int* from)
+{
+#ifdef EIGEN_VECTORIZE_AVX2
+  const Packet8i a = _mm256_castsi128_si256(ploadu<Packet4i>(from));
+  return _mm256_permutevar8x32_epi32(a, _mm256_setr_epi32(0, 0, 1, 1, 2, 2, 3, 3));
+#else
+  __m256 tmp = _mm256_broadcast_ps((const __m128*)(const void*)from);
+  // mimic an "inplace" permutation of the lower 128bits using a blend
+  tmp = _mm256_blend_ps(tmp,_mm256_castps128_ps256(_mm_permute_ps( _mm256_castps256_ps128(tmp), _MM_SHUFFLE(1,0,1,0))), 15);
+  // then we can perform a consistent permutation on the global register to get everything in shape:
+  return  _mm256_castps_si256(_mm256_permute_ps(tmp, _MM_SHUFFLE(3,3,2,2)));
+#endif
+}
 
 // Loads 2 floats from memory a returns the packet {a0, a0  a0, a0, a1, a1, a1, a1}
 template<> EIGEN_STRONG_INLINE Packet8f ploadquad<Packet8f>(const float* from)
@@ -618,6 +967,10 @@ template<> EIGEN_STRONG_INLINE Packet8f ploadquad<Packet8f>(const float* from)
   Packet8f tmp = _mm256_castps128_ps256(_mm_broadcast_ss(from));
   return _mm256_insertf128_ps(tmp, _mm_broadcast_ss(from+1), 1);
 }
+template<> EIGEN_STRONG_INLINE Packet8i ploadquad<Packet8i>(const int* from)
+{
+  return _mm256_insertf128_si256(_mm256_set1_epi32(*from), _mm_set1_epi32(*(from+1)), 1);
+}
 
 template<> EIGEN_STRONG_INLINE void pstore<float>(float*   to, const Packet8f& from) { EIGEN_DEBUG_ALIGNED_STORE _mm256_store_ps(to, from); }
 template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet4d& from) { EIGEN_DEBUG_ALIGNED_STORE _mm256_store_pd(to, from); }
@@ -628,11 +981,16 @@ template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet4d&
 template<> EIGEN_STRONG_INLINE void pstoreu<int>(int*       to, const Packet8i& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_si256(reinterpret_cast<__m256i*>(to), from); }
 
 template<> EIGEN_STRONG_INLINE void pstoreu<float>(float*   to, const Packet8f& from, uint8_t umask) {
+#ifdef EIGEN_VECTORIZE_AVX512
+  __mmask16 mask = static_cast<__mmask16>(umask & 0x00FF);
+  EIGEN_DEBUG_UNALIGNED_STORE return _mm512_mask_storeu_ps(to, mask, _mm512_castps256_ps512(from));
+#else
   Packet8i mask = _mm256_set1_epi8(static_cast<char>(umask));
   const Packet8i bit_mask = _mm256_set_epi32(0xffffff7f, 0xffffffbf, 0xffffffdf, 0xffffffef, 0xfffffff7, 0xfffffffb, 0xfffffffd, 0xfffffffe);
   mask = por<Packet8i>(mask, bit_mask);
   mask = pcmp_eq<Packet8i>(mask, _mm256_set1_epi32(0xffffffff));
   EIGEN_DEBUG_UNALIGNED_STORE return _mm256_maskstore_ps(to, mask, from);
+#endif
 }
 
 // NOTE: leverage _mm256_i32gather_ps and _mm256_i32gather_pd if AVX2 instructions are available
@@ -646,6 +1004,11 @@ template<> EIGEN_DEVICE_FUNC inline Packet4d pgather<double, Packet4d>(const dou
 {
   return _mm256_set_pd(from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
 }
+template<> EIGEN_DEVICE_FUNC inline Packet8i pgather<int, Packet8i>(const int* from, Index stride)
+{
+  return _mm256_set_epi32(from[7*stride], from[6*stride], from[5*stride], from[4*stride],
+                          from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
+}
 
 template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet8f>(float* to, const Packet8f& from, Index stride)
 {
@@ -670,6 +1033,20 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet4d>(double* to,
   to[stride*2] = _mm_cvtsd_f64(high);
   to[stride*3] = _mm_cvtsd_f64(_mm_shuffle_pd(high, high, 1));
 }
+template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet8i>(int* to, const Packet8i& from, Index stride)
+{
+  __m128i low = _mm256_extractf128_si256(from, 0);
+  to[stride*0] = _mm_extract_epi32(low, 0);
+  to[stride*1] = _mm_extract_epi32(low, 1);
+  to[stride*2] = _mm_extract_epi32(low, 2);
+  to[stride*3] = _mm_extract_epi32(low, 3);
+
+  __m128i high = _mm256_extractf128_si256(from, 1);
+  to[stride*4] = _mm_extract_epi32(high, 0);
+  to[stride*5] = _mm_extract_epi32(high, 1);
+  to[stride*6] = _mm_extract_epi32(high, 2);
+  to[stride*7] = _mm_extract_epi32(high, 3);
+}
 
 template<> EIGEN_STRONG_INLINE void pstore1<Packet8f>(float* to, const float& a)
 {
@@ -720,6 +1097,17 @@ template<> EIGEN_STRONG_INLINE Packet4d preverse(const Packet4d& a)
     return _mm256_permute_pd(swap_halves,5);
   #endif
 }
+template<> EIGEN_STRONG_INLINE Packet8i preverse(const Packet8i& a)
+{
+  return _mm256_castps_si256(preverse(_mm256_castsi256_ps(a)));
+}
+
+#ifdef EIGEN_VECTORIZE_AVX2
+template<> EIGEN_STRONG_INLINE Packet4l preverse(const Packet4l& a)
+{
+  return _mm256_castpd_si256(preverse(_mm256_castsi256_pd(a)));
+}
+#endif
 
 // pabs should be ok
 template<> EIGEN_STRONG_INLINE Packet8f pabs(const Packet8f& a)
@@ -732,6 +1120,23 @@ template<> EIGEN_STRONG_INLINE Packet4d pabs(const Packet4d& a)
   const Packet4d mask = _mm256_castsi256_pd(_mm256_setr_epi32(0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF));
   return _mm256_and_pd(a,mask);
 }
+template<> EIGEN_STRONG_INLINE Packet8i pabs(const Packet8i& a)
+{
+#ifdef EIGEN_VECTORIZE_AVX2
+  return _mm256_abs_epi32(a);
+#else
+  __m128i lo = _mm_abs_epi32(_mm256_extractf128_si256(a, 0));
+  __m128i hi = _mm_abs_epi32(_mm256_extractf128_si256(a, 1));
+  return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1);
+#endif
+}
+
+template<> EIGEN_STRONG_INLINE Packet8h  psignbit(const Packet8h&  a) { return _mm_srai_epi16(a, 15); }
+template<> EIGEN_STRONG_INLINE Packet8bf psignbit(const Packet8bf& a) { return _mm_srai_epi16(a, 15); }
+template<> EIGEN_STRONG_INLINE Packet8f  psignbit(const Packet8f&  a) { return _mm256_castsi256_ps(parithmetic_shift_right<31>((Packet8i)_mm256_castps_si256(a))); }
+#ifdef EIGEN_VECTORIZE_AVX2
+template<> EIGEN_STRONG_INLINE Packet4d  psignbit(const Packet4d& a)  { return _mm256_castsi256_pd(parithmetic_shift_right<63>((Packet4l)_mm256_castpd_si256(a))); }
+#endif
 
 template<> EIGEN_STRONG_INLINE Packet8f pfrexp<Packet8f>(const Packet8f& a, Packet8f& exponent) {
   return pfrexp_generic(a,exponent);
@@ -803,11 +1208,19 @@ template<> EIGEN_STRONG_INLINE double predux<Packet4d>(const Packet4d& a)
 {
   return predux(Packet2d(_mm_add_pd(_mm256_castpd256_pd128(a),_mm256_extractf128_pd(a,1))));
 }
+template<> EIGEN_STRONG_INLINE int predux<Packet8i>(const Packet8i& a)
+{
+  return predux(Packet4i(_mm_add_epi32(_mm256_castsi256_si128(a),_mm256_extractf128_si256(a,1))));
+}
 
 template<> EIGEN_STRONG_INLINE Packet4f predux_half_dowto4<Packet8f>(const Packet8f& a)
 {
   return _mm_add_ps(_mm256_castps256_ps128(a),_mm256_extractf128_ps(a,1));
 }
+template<> EIGEN_STRONG_INLINE Packet4i predux_half_dowto4<Packet8i>(const Packet8i& a)
+{
+  return _mm_add_epi32(_mm256_castsi256_si128(a),_mm256_extractf128_si256(a,1));
+}
 
 template<> EIGEN_STRONG_INLINE float predux_mul<Packet8f>(const Packet8f& a)
 {
@@ -856,7 +1269,12 @@ template<> EIGEN_STRONG_INLINE double predux_max<Packet4d>(const Packet4d& a)
 
 template<> EIGEN_STRONG_INLINE bool predux_any(const Packet8f& x)
 {
-  return _mm256_movemask_ps(x)!=0;
+  return _mm256_movemask_ps(x) != 0;
+}
+
+template<> EIGEN_STRONG_INLINE bool predux_any(const Packet8i& x)
+{
+  return _mm256_movemask_ps(_mm256_castsi256_ps(x)) != 0;
 }
 
 EIGEN_DEVICE_FUNC inline void
@@ -905,6 +1323,66 @@ ptranspose(PacketBlock<Packet8f,4>& kernel) {
   kernel.packet[3] = _mm256_permute2f128_ps(S2, S3, 0x31);
 }
 
+#define MM256_SHUFFLE_EPI32(A, B, M) \
+  _mm256_castps_si256(_mm256_shuffle_ps(_mm256_castsi256_ps(A), _mm256_castsi256_ps(B), M))
+
+#ifndef EIGEN_VECTORIZE_AVX2
+#define MM256_UNPACKLO_EPI32(A, B) \
+  _mm256_castps_si256(_mm256_unpacklo_ps(_mm256_castsi256_ps(A), _mm256_castsi256_ps(B)))
+#define MM256_UNPACKHI_EPI32(A, B) \
+  _mm256_castps_si256(_mm256_unpackhi_ps(_mm256_castsi256_ps(A), _mm256_castsi256_ps(B)))
+#else
+#define MM256_UNPACKLO_EPI32(A, B) _mm256_unpacklo_epi32(A, B)
+#define MM256_UNPACKHI_EPI32(A, B) _mm256_unpackhi_epi32(A, B)
+#endif
+
+
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet8i,8>& kernel) {
+  __m256i T0 = MM256_UNPACKLO_EPI32(kernel.packet[0], kernel.packet[1]);
+  __m256i T1 = MM256_UNPACKHI_EPI32(kernel.packet[0], kernel.packet[1]);
+  __m256i T2 = MM256_UNPACKLO_EPI32(kernel.packet[2], kernel.packet[3]);
+  __m256i T3 = MM256_UNPACKHI_EPI32(kernel.packet[2], kernel.packet[3]);
+  __m256i T4 = MM256_UNPACKLO_EPI32(kernel.packet[4], kernel.packet[5]);
+  __m256i T5 = MM256_UNPACKHI_EPI32(kernel.packet[4], kernel.packet[5]);
+  __m256i T6 = MM256_UNPACKLO_EPI32(kernel.packet[6], kernel.packet[7]);
+  __m256i T7 = MM256_UNPACKHI_EPI32(kernel.packet[6], kernel.packet[7]);
+  __m256i S0 = MM256_SHUFFLE_EPI32(T0,T2,_MM_SHUFFLE(1,0,1,0));
+  __m256i S1 = MM256_SHUFFLE_EPI32(T0,T2,_MM_SHUFFLE(3,2,3,2));
+  __m256i S2 = MM256_SHUFFLE_EPI32(T1,T3,_MM_SHUFFLE(1,0,1,0));
+  __m256i S3 = MM256_SHUFFLE_EPI32(T1,T3,_MM_SHUFFLE(3,2,3,2));
+  __m256i S4 = MM256_SHUFFLE_EPI32(T4,T6,_MM_SHUFFLE(1,0,1,0));
+  __m256i S5 = MM256_SHUFFLE_EPI32(T4,T6,_MM_SHUFFLE(3,2,3,2));
+  __m256i S6 = MM256_SHUFFLE_EPI32(T5,T7,_MM_SHUFFLE(1,0,1,0));
+  __m256i S7 = MM256_SHUFFLE_EPI32(T5,T7,_MM_SHUFFLE(3,2,3,2));
+  kernel.packet[0] = _mm256_permute2f128_si256(S0, S4, 0x20);
+  kernel.packet[1] = _mm256_permute2f128_si256(S1, S5, 0x20);
+  kernel.packet[2] = _mm256_permute2f128_si256(S2, S6, 0x20);
+  kernel.packet[3] = _mm256_permute2f128_si256(S3, S7, 0x20);
+  kernel.packet[4] = _mm256_permute2f128_si256(S0, S4, 0x31);
+  kernel.packet[5] = _mm256_permute2f128_si256(S1, S5, 0x31);
+  kernel.packet[6] = _mm256_permute2f128_si256(S2, S6, 0x31);
+  kernel.packet[7] = _mm256_permute2f128_si256(S3, S7, 0x31);
+}
+
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet8i,4>& kernel) {
+  __m256i T0 = MM256_UNPACKLO_EPI32(kernel.packet[0], kernel.packet[1]);
+  __m256i T1 = MM256_UNPACKHI_EPI32(kernel.packet[0], kernel.packet[1]);
+  __m256i T2 = MM256_UNPACKLO_EPI32(kernel.packet[2], kernel.packet[3]);
+  __m256i T3 = MM256_UNPACKHI_EPI32(kernel.packet[2], kernel.packet[3]);
+
+  __m256i S0 = MM256_SHUFFLE_EPI32(T0,T2,_MM_SHUFFLE(1,0,1,0));
+  __m256i S1 = MM256_SHUFFLE_EPI32(T0,T2,_MM_SHUFFLE(3,2,3,2));
+  __m256i S2 = MM256_SHUFFLE_EPI32(T1,T3,_MM_SHUFFLE(1,0,1,0));
+  __m256i S3 = MM256_SHUFFLE_EPI32(T1,T3,_MM_SHUFFLE(3,2,3,2));
+
+  kernel.packet[0] = _mm256_permute2f128_si256(S0, S1, 0x20);
+  kernel.packet[1] = _mm256_permute2f128_si256(S2, S3, 0x20);
+  kernel.packet[2] = _mm256_permute2f128_si256(S0, S1, 0x31);
+  kernel.packet[3] = _mm256_permute2f128_si256(S2, S3, 0x31);
+}
+
 EIGEN_DEVICE_FUNC inline void
 ptranspose(PacketBlock<Packet4d,4>& kernel) {
   __m256d T0 = _mm256_shuffle_pd(kernel.packet[0], kernel.packet[1], 15);
@@ -919,21 +1397,37 @@ ptranspose(PacketBlock<Packet4d,4>& kernel) {
 }
 
 template<> EIGEN_STRONG_INLINE Packet8f pblend(const Selector<8>& ifPacket, const Packet8f& thenPacket, const Packet8f& elsePacket) {
+#ifdef EIGEN_VECTORIZE_AVX2  
+  const __m256i zero = _mm256_setzero_si256();
+  const __m256i select = _mm256_set_epi32(ifPacket.select[7], ifPacket.select[6], ifPacket.select[5], ifPacket.select[4], ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
+  __m256i false_mask = _mm256_cmpeq_epi32(zero, select);
+  return _mm256_blendv_ps(thenPacket, elsePacket, _mm256_castsi256_ps(false_mask));
+#else
   const __m256 zero = _mm256_setzero_ps();
   const __m256 select = _mm256_set_ps(ifPacket.select[7], ifPacket.select[6], ifPacket.select[5], ifPacket.select[4], ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
   __m256 false_mask = _mm256_cmp_ps(select, zero, _CMP_EQ_UQ);
   return _mm256_blendv_ps(thenPacket, elsePacket, false_mask);
+#endif
 }
+
 template<> EIGEN_STRONG_INLINE Packet4d pblend(const Selector<4>& ifPacket, const Packet4d& thenPacket, const Packet4d& elsePacket) {
+#ifdef EIGEN_VECTORIZE_AVX2  
+  const __m256i zero = _mm256_setzero_si256();
+  const __m256i select = _mm256_set_epi64x(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
+  __m256i false_mask = _mm256_cmpeq_epi64(select, zero);
+  return _mm256_blendv_pd(thenPacket, elsePacket, _mm256_castsi256_pd(false_mask));
+#else
   const __m256d zero = _mm256_setzero_pd();
   const __m256d select = _mm256_set_pd(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
   __m256d false_mask = _mm256_cmp_pd(select, zero, _CMP_EQ_UQ);
   return _mm256_blendv_pd(thenPacket, elsePacket, false_mask);
+#endif
 }
 
 // Packet math for Eigen::half
-
+#ifndef EIGEN_VECTORIZE_AVX512FP16
 template<> struct unpacket_traits<Packet8h> { typedef Eigen::half type; enum {size=8, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet8h half; };
+#endif
 
 template<> EIGEN_STRONG_INLINE Packet8h pset1<Packet8h>(const Eigen::half& from) {
   return _mm_set1_epi16(numext::bit_cast<numext::uint16_t>(from));
@@ -989,18 +1483,9 @@ EIGEN_STRONG_INLINE Packet8f half2float(const Packet8h& a) {
 #ifdef EIGEN_HAS_FP16_C
   return _mm256_cvtph_ps(a);
 #else
-  EIGEN_ALIGN32 Eigen::half aux[8];
-  pstore(aux, a);
-  float f0(aux[0]);
-  float f1(aux[1]);
-  float f2(aux[2]);
-  float f3(aux[3]);
-  float f4(aux[4]);
-  float f5(aux[5]);
-  float f6(aux[6]);
-  float f7(aux[7]);
-
-  return _mm256_set_ps(f7, f6, f5, f4, f3, f2, f1, f0);
+  Eigen::internal::Packet8f pp = _mm256_castsi256_ps(_mm256_insertf128_si256(
+      _mm256_castsi128_si256(half2floatsse(a)), half2floatsse(_mm_srli_si128(a, 8)), 1));
+  return pp;
 #endif
 }
 
@@ -1008,17 +1493,9 @@ EIGEN_STRONG_INLINE Packet8h float2half(const Packet8f& a) {
 #ifdef EIGEN_HAS_FP16_C
   return _mm256_cvtps_ph(a, _MM_FROUND_TO_NEAREST_INT|_MM_FROUND_NO_EXC);
 #else
-  EIGEN_ALIGN32 float aux[8];
-  pstore(aux, a);
-  const numext::uint16_t s0 = numext::bit_cast<numext::uint16_t>(Eigen::half(aux[0]));
-  const numext::uint16_t s1 = numext::bit_cast<numext::uint16_t>(Eigen::half(aux[1]));
-  const numext::uint16_t s2 = numext::bit_cast<numext::uint16_t>(Eigen::half(aux[2]));
-  const numext::uint16_t s3 = numext::bit_cast<numext::uint16_t>(Eigen::half(aux[3]));
-  const numext::uint16_t s4 = numext::bit_cast<numext::uint16_t>(Eigen::half(aux[4]));
-  const numext::uint16_t s5 = numext::bit_cast<numext::uint16_t>(Eigen::half(aux[5]));
-  const numext::uint16_t s6 = numext::bit_cast<numext::uint16_t>(Eigen::half(aux[6]));
-  const numext::uint16_t s7 = numext::bit_cast<numext::uint16_t>(Eigen::half(aux[7]));
-  return _mm_set_epi16(s7, s6, s5, s4, s3, s2, s1, s0);
+  __m128i lo = float2half(_mm256_extractf128_ps(a, 0));
+  __m128i hi = float2half(_mm256_extractf128_ps(a, 1));
+  return   _mm_packus_epi32(lo, hi);
 #endif
 }
 
@@ -1097,6 +1574,7 @@ template<> EIGEN_STRONG_INLINE Packet8h pnegate(const Packet8h& a) {
   return _mm_xor_si128(a, sign_mask);
 }
 
+#ifndef EIGEN_VECTORIZE_AVX512FP16
 template<> EIGEN_STRONG_INLINE Packet8h padd<Packet8h>(const Packet8h& a, const Packet8h& b) {
   Packet8f af = half2float(a);
   Packet8f bf = half2float(b);
@@ -1124,6 +1602,7 @@ template<> EIGEN_STRONG_INLINE Packet8h pdiv<Packet8h>(const Packet8h& a, const
   Packet8f rf = pdiv(af, bf);
   return float2half(rf);
 }
+#endif
 
 template<> EIGEN_STRONG_INLINE Packet8h pgather<Eigen::half, Packet8h>(const Eigen::half* from, Index stride)
 {
@@ -1152,11 +1631,14 @@ template<> EIGEN_STRONG_INLINE void pscatter<Eigen::half, Packet8h>(Eigen::half*
   to[stride*7] = aux[7];
 }
 
+
+#ifndef EIGEN_VECTORIZE_AVX512FP16
 template<> EIGEN_STRONG_INLINE Eigen::half predux<Packet8h>(const Packet8h& a) {
   Packet8f af = half2float(a);
   float reduced = predux<Packet8f>(af);
   return Eigen::half(reduced);
 }
+#endif
 
 template<> EIGEN_STRONG_INLINE Eigen::half predux_max<Packet8h>(const Packet8h& a) {
   Packet8f af = half2float(a);
@@ -1272,7 +1754,6 @@ EIGEN_STRONG_INLINE Packet8f Bf16ToF32(const Packet8bf& a) {
 
 // Convert float to bfloat16 according to round-to-nearest-even/denormals algorithm.
 EIGEN_STRONG_INLINE Packet8bf F32ToBf16(const Packet8f& a) {
-  Packet8bf r;
 
   __m256i input = _mm256_castps_si256(a);
 
diff --git a/libs/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h b/libs/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h
index d507fb6..320479b 100644
--- a/libs/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h
+++ b/libs/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_TYPE_CASTING_AVX_H
 #define EIGEN_TYPE_CASTING_AVX_H
 
+#include "../../InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
diff --git a/libs/eigen/Eigen/src/Core/arch/AVX512/Complex.h b/libs/eigen/Eigen/src/Core/arch/AVX512/Complex.h
index 49c72b3..6d8ee2b 100644
--- a/libs/eigen/Eigen/src/Core/arch/AVX512/Complex.h
+++ b/libs/eigen/Eigen/src/Core/arch/AVX512/Complex.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_COMPLEX_AVX512_H
 #define EIGEN_COMPLEX_AVX512_H
 
+#include "../../InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
@@ -97,7 +99,9 @@ template<> EIGEN_STRONG_INLINE Packet8cf ploadu<Packet8cf>(const std::complex<fl
 
 template<> EIGEN_STRONG_INLINE Packet8cf pset1<Packet8cf>(const std::complex<float>& from)
 {
-  return Packet8cf(_mm512_castpd_ps(pload1<Packet8d>((const double*)(const void*)&from)));
+  const float re = std::real(from);
+  const float im = std::imag(from);
+  return Packet8cf(_mm512_set_ps(im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re));
 }
 
 template<> EIGEN_STRONG_INLINE Packet8cf ploaddup<Packet8cf>(const std::complex<float>* from)
@@ -157,11 +161,7 @@ EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet8cf,Packet16f)
 
 template<> EIGEN_STRONG_INLINE Packet8cf pdiv<Packet8cf>(const Packet8cf& a, const Packet8cf& b)
 {
-  Packet8cf num = pmul(a, pconj(b));
-  __m512 tmp = _mm512_mul_ps(b.v, b.v);
-  __m512 tmp2    = _mm512_shuffle_ps(tmp,tmp,0xB1);
-  __m512 denom = _mm512_add_ps(tmp, tmp2);
-  return Packet8cf(_mm512_div_ps(num.v, denom));
+  return pdiv_complex(a, b);
 }
 
 template<> EIGEN_STRONG_INLINE Packet8cf pcplxflip<Packet8cf>(const Packet8cf& x)
@@ -253,11 +253,7 @@ template<> EIGEN_STRONG_INLINE Packet4cd ploadu<Packet4cd>(const std::complex<do
 
 template<> EIGEN_STRONG_INLINE Packet4cd pset1<Packet4cd>(const std::complex<double>& from)
 {
-  #ifdef EIGEN_VECTORIZE_AVX512DQ
-  return Packet4cd(_mm512_broadcast_f64x2(pset1<Packet1cd>(from).v));
-  #else
   return Packet4cd(_mm512_castps_pd(_mm512_broadcast_f32x4( _mm_castpd_ps(pset1<Packet1cd>(from).v))));
-  #endif
 }
 
 template<> EIGEN_STRONG_INLINE Packet4cd ploaddup<Packet4cd>(const std::complex<double>* from) {
@@ -309,47 +305,11 @@ template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet4cd>(const
                          Packet2cd(_mm512_extractf64x4_pd(a.v,1))));
 }
 
-template<> struct conj_helper<Packet4cd, Packet4cd, false,true>
-{
-  EIGEN_STRONG_INLINE Packet4cd pmadd(const Packet4cd& x, const Packet4cd& y, const Packet4cd& c) const
-  { return padd(pmul(x,y),c); }
-
-  EIGEN_STRONG_INLINE Packet4cd pmul(const Packet4cd& a, const Packet4cd& b) const
-  {
-    return internal::pmul(a, pconj(b));
-  }
-};
-
-template<> struct conj_helper<Packet4cd, Packet4cd, true,false>
-{
-  EIGEN_STRONG_INLINE Packet4cd pmadd(const Packet4cd& x, const Packet4cd& y, const Packet4cd& c) const
-  { return padd(pmul(x,y),c); }
-
-  EIGEN_STRONG_INLINE Packet4cd pmul(const Packet4cd& a, const Packet4cd& b) const
-  {
-    return internal::pmul(pconj(a), b);
-  }
-};
-
-template<> struct conj_helper<Packet4cd, Packet4cd, true,true>
-{
-  EIGEN_STRONG_INLINE Packet4cd pmadd(const Packet4cd& x, const Packet4cd& y, const Packet4cd& c) const
-  { return padd(pmul(x,y),c); }
-
-  EIGEN_STRONG_INLINE Packet4cd pmul(const Packet4cd& a, const Packet4cd& b) const
-  {
-    return pconj(internal::pmul(a, b));
-  }
-};
-
 EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet4cd,Packet8d)
 
 template<> EIGEN_STRONG_INLINE Packet4cd pdiv<Packet4cd>(const Packet4cd& a, const Packet4cd& b)
 {
-  Packet4cd num = pmul(a, pconj(b));
-  __m512d tmp = _mm512_mul_pd(b.v, b.v);
-  __m512d denom =  padd(_mm512_permute_pd(tmp,0x55), tmp);
-  return Packet4cd(_mm512_div_pd(num.v, denom));
+  return pdiv_complex(a, b);
 }
 
 template<> EIGEN_STRONG_INLINE Packet4cd pcplxflip<Packet4cd>(const Packet4cd& x)
diff --git a/libs/eigen/Eigen/src/Core/arch/AVX512/GemmKernel.h b/libs/eigen/Eigen/src/Core/arch/AVX512/GemmKernel.h
new file mode 100644
index 0000000..cb7cfdf
--- /dev/null
+++ b/libs/eigen/Eigen/src/Core/arch/AVX512/GemmKernel.h
@@ -0,0 +1,1235 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2022 Intel Corporation
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_CORE_ARCH_AVX512_GEMM_KERNEL_H
+#define EIGEN_CORE_ARCH_AVX512_GEMM_KERNEL_H
+
+#if EIGEN_COMP_MSVC
+#include <intrin.h>
+#else
+#include <x86intrin.h>
+#endif
+#include <immintrin.h>
+#include <type_traits>
+
+#include "../../InternalHeaderCheck.h"
+
+#if !defined(EIGEN_USE_AVX512_GEMM_KERNELS)
+// Disable new AVX512 kernels by default.
+#define EIGEN_USE_AVX512_GEMM_KERNELS 0
+#endif
+
+#define SECOND_FETCH (32)
+#if (EIGEN_COMP_GNUC_STRICT != 0) && !defined(EIGEN_ARCH_AVX512_GEMM_KERNEL_USE_LESS_A_REGS)
+// Use less registers to load A elements to workaround compiler spills. Loose a
+// bit of performance (less than ~2%).
+#define EIGEN_ARCH_AVX512_GEMM_KERNEL_USE_LESS_A_REGS
+#endif
+
+namespace Eigen {
+namespace internal {
+
+template <typename Scalar, bool is_unit_inc>
+class gemm_class {
+  using vec = typename packet_traits<Scalar>::type;
+  using vec_ymm = typename unpacket_traits<vec>::half;
+  using vec_xmm = typename unpacket_traits<vec_ymm>::half;
+  using umask_t = typename unpacket_traits<vec>::mask_t;
+
+  static constexpr bool is_f32 = sizeof(Scalar) == sizeof(float);
+  static constexpr bool is_f64 = sizeof(Scalar) == sizeof(double);
+
+#ifndef EIGEN_ARCH_AVX512_GEMM_KERNEL_USE_LESS_A_REGS
+  static constexpr bool use_less_a_regs = !is_unit_inc;
+#else
+  static constexpr bool use_less_a_regs = true;
+#endif
+#ifndef EIGEN_ARCH_AVX512_GEMM_KERNEL_USE_LESS_B_REGS
+  static constexpr bool use_less_b_regs = !is_unit_inc;
+#else
+  static constexpr bool use_less_b_regs = true;
+#endif
+
+  static constexpr int a_regs[] = {0, 1, 2, use_less_a_regs ? 0 : 3, use_less_a_regs ? 1 : 4, use_less_a_regs ? 2 : 5};
+  static constexpr int b_regs[] = {6, use_less_b_regs ? 6 : 7};
+  static constexpr int c_regs[] = {
+      8, 16, 24, 9, 17, 25, 10, 18, 26, 11, 19, 27, 12, 20, 28, 13, 21, 29, 14, 22, 30, 15, 23, 31,
+  };
+
+  static constexpr int alpha_load_reg = 0;
+  static constexpr int c_load_regs[] = {1, 2, 6};
+
+  static constexpr int a_shift = 128;
+  static constexpr int b_shift = 128;
+
+  static constexpr int nelems_in_cache_line = is_f32 ? 16 : 8;
+  static constexpr int a_prefetch_size = nelems_in_cache_line * 2;
+  static constexpr int b_prefetch_size = nelems_in_cache_line * 8;
+
+  vec zmm[32];
+  umask_t mask;
+
+  // gemm arguments.
+  Index m;
+  const Index n, k, ldc;
+  const Index inc;
+  const Scalar *alpha;
+
+  const Scalar *a, *b;
+  Scalar *c;
+
+  const bool is_alpha1;
+  const bool is_beta0;
+
+  const Index a_stride, b_stride;
+  const Index a_off, b_off;
+
+  static EIGEN_ALWAYS_INLINE constexpr int div_up(int a, int b) { return a == 0 ? 0 : (a - 1) / b + 1; }
+
+  EIGEN_ALWAYS_INLINE void prefetch_a(const Scalar *a_addr) {
+    _mm_prefetch((char *)(a_prefetch_size + a_addr - a_shift), _MM_HINT_T0);
+  }
+
+  EIGEN_ALWAYS_INLINE void prefetch_b(const Scalar *b_addr) {
+    _mm_prefetch((char *)(b_prefetch_size + b_addr - b_shift), _MM_HINT_T0);
+  }
+
+  EIGEN_ALWAYS_INLINE void prefetch_x(const Scalar *x_addr) { _mm_prefetch((char *)(x_addr - a_shift), _MM_HINT_T2); }
+
+  EIGEN_ALWAYS_INLINE void prefetch_c(const Scalar *c_addr) {
+#if defined(__PRFCHW__) && __PRFCHW__ == 1
+    _m_prefetchw((void *)c_addr);
+#else
+    _mm_prefetch((char *)c_addr, _MM_HINT_T0);
+#endif
+  }
+
+  template <int nelems>
+  EIGEN_ALWAYS_INLINE void a_load(vec &a_reg, const Scalar *a_addr) {
+    switch (nelems * sizeof(*a_addr) * 8) {
+      default:
+      case 512 * 3:
+        a_reg = ploadu<vec>(a_addr);
+        break;
+      case 512 * 2:
+        a_reg = ploadu<vec>(a_addr);
+        break;
+      case 512 * 1:
+        a_reg = ploadu<vec>(a_addr);
+        break;
+      case 256 * 1:
+        a_reg = preinterpret<vec>(_mm512_broadcast_f64x4(ploadu<Packet4d>(reinterpret_cast<const double *>(a_addr))));
+        break;
+      case 128 * 1:
+        a_reg = preinterpret<vec>(_mm512_broadcast_f32x4(ploadu<Packet4f>(reinterpret_cast<const float *>(a_addr))));
+        break;
+      case 64 * 1:
+        a_reg = preinterpret<vec>(pload1<Packet8d>(reinterpret_cast<const double *>(a_addr)));
+        break;
+      case 32 * 1:
+        a_reg = pload1<vec>(a_addr);
+        break;
+    }
+  }
+
+  EIGEN_ALWAYS_INLINE void b_load(vec &b_reg, const Scalar *b_addr) { b_reg = pload1<vec>(b_addr); }
+
+  template <int nelems>
+  EIGEN_ALWAYS_INLINE void c_store(Scalar *mem, vec &src) {
+    if (is_unit_inc) {
+      switch (nelems * sizeof(*mem) * 8) {
+        default:
+        case 512 * 3:
+          pstoreu(mem, src);
+          break;
+        case 512 * 2:
+          pstoreu(mem, src);
+          break;
+        case 512 * 1:
+          pstoreu(mem, src);
+          break;
+        case 256 * 1:
+          pstoreu(mem, preinterpret<vec_ymm>(src));
+          break;
+        case 128 * 1:
+          pstoreu(mem, preinterpret<vec_xmm>(src));
+          break;
+        case 64 * 1:
+          pstorel(mem, preinterpret<vec_xmm>(src));
+          break;
+        case 32 * 1:
+          pstores(mem, preinterpret<vec_xmm>(src));
+          break;
+      }
+    } else {
+      switch (nelems * sizeof(*mem) * 8) {
+        default:
+        case 512 * 3:
+          pscatter(mem, src, inc);
+          break;
+        case 512 * 2:
+          pscatter(mem, src, inc);
+          break;
+        case 512 * 1:
+          pscatter(mem, src, inc);
+          break;
+        case 256 * 1:
+          pscatter(mem, src, inc, mask);
+          break;
+        case 128 * 1:
+          pscatter(mem, src, inc, mask);
+          break;
+        case 64 * 1:
+          pscatter(mem, src, inc, mask);
+          break;
+        case 32 * 1:
+          pscatter(mem, src, inc, mask);
+          break;
+      }
+    }
+  }
+
+  template <int nelems>
+  EIGEN_ALWAYS_INLINE void vaddm(vec &dst, const Scalar *mem, vec &src, vec &reg) {
+    if (is_unit_inc) {
+      switch (nelems * sizeof(*mem) * 8) {
+        default:
+        case 512 * 3:
+          dst = padd(src, ploadu<vec>(mem));
+          break;
+        case 512 * 2:
+          dst = padd(src, ploadu<vec>(mem));
+          break;
+        case 512 * 1:
+          dst = padd(src, ploadu<vec>(mem));
+          break;
+        case 256 * 1:
+          dst = preinterpret<vec>(padd(preinterpret<vec_ymm>(src), ploadu<vec_ymm>(mem)));
+          break;
+        case 128 * 1:
+          dst = preinterpret<vec>(padd(preinterpret<vec_xmm>(src), ploadu<vec_xmm>(mem)));
+          break;
+        case 64 * 1:
+          dst = preinterpret<vec>(padd(preinterpret<vec_xmm>(src), ploadl<vec_xmm>(mem)));
+          break;
+        case 32 * 1:
+          dst = preinterpret<vec>(padds(preinterpret<vec_xmm>(src), ploads<vec_xmm>(mem)));
+          break;
+      }
+    } else {
+      // Zero out scratch register
+      reg = pzero(reg);
+
+      switch (nelems * sizeof(*mem) * 8) {
+        default:
+        case 512 * 3:
+          reg = pgather<Scalar, vec>(mem, inc);
+          dst = padd(src, reg);
+          break;
+        case 512 * 2:
+          reg = pgather<Scalar, vec>(mem, inc);
+          dst = padd(src, reg);
+          break;
+        case 512 * 1:
+          reg = pgather<Scalar, vec>(mem, inc);
+          dst = padd(src, reg);
+          break;
+        case 256 * 1:
+          reg = preinterpret<vec>(pgather<Scalar, vec_ymm>(mem, inc));
+          dst = preinterpret<vec>(padd(preinterpret<vec_ymm>(src), preinterpret<vec_ymm>(reg)));
+          break;
+        case 128 * 1:
+          reg = preinterpret<vec>(pgather<Scalar, vec_xmm>(mem, inc));
+          dst = preinterpret<vec>(padd(preinterpret<vec_xmm>(src), preinterpret<vec_xmm>(reg)));
+          break;
+        case 64 * 1:
+          if (is_f32) {
+            reg = pgather(reg, mem, inc, mask);
+            dst = preinterpret<vec>(padd(preinterpret<vec_xmm>(src), preinterpret<vec_xmm>(reg)));
+          } else {
+            dst = preinterpret<vec>(padd(preinterpret<vec_xmm>(src), ploadl<vec_xmm>(mem)));
+          }
+          break;
+        case 32 * 1:
+          dst = preinterpret<vec>(padds(preinterpret<vec_xmm>(src), ploads<vec_xmm>(mem)));
+          break;
+      }
+    }
+  }
+
+  EIGEN_STRONG_INLINE void vfmadd(vec &dst, const vec &src1, const vec &src2) {
+    dst = pmadd(src1, src2, dst);
+
+#if (EIGEN_COMP_GNUC != 0) || (EIGEN_COMP_CLANG != 0)
+    // Workaround register spills for gcc and clang
+    __asm__("#" : [dst] "+v"(dst) : [src1] "%v"(src1), [src2] "v"(src2));
+#endif
+  }
+
+  template <int nelems>
+  EIGEN_ALWAYS_INLINE void vfmaddm(vec &dst, const Scalar *mem, vec &src, vec &scale, vec &reg) {
+    if (is_unit_inc) {
+      switch (nelems * sizeof(*mem) * 8) {
+        default:
+        case 512 * 3:
+          dst = pmadd(scale, src, ploadu<vec>(mem));
+          break;
+        case 512 * 2:
+          dst = pmadd(scale, src, ploadu<vec>(mem));
+          break;
+        case 512 * 1:
+          dst = pmadd(scale, src, ploadu<vec>(mem));
+          break;
+        case 256 * 1:
+          dst =
+              preinterpret<vec>(pmadd(preinterpret<vec_ymm>(scale), preinterpret<vec_ymm>(src), ploadu<vec_ymm>(mem)));
+          break;
+        case 128 * 1:
+          dst =
+              preinterpret<vec>(pmadd(preinterpret<vec_xmm>(scale), preinterpret<vec_xmm>(src), ploadu<vec_xmm>(mem)));
+          break;
+        case 64 * 1:
+          dst =
+              preinterpret<vec>(pmadd(preinterpret<vec_xmm>(scale), preinterpret<vec_xmm>(src), ploadl<vec_xmm>(mem)));
+          break;
+        case 32 * 1:
+          dst =
+              preinterpret<vec>(pmadds(preinterpret<vec_xmm>(scale), preinterpret<vec_xmm>(src), ploads<vec_xmm>(mem)));
+          break;
+      }
+    } else {
+      // Zero out scratch register
+      reg = pzero(reg);
+
+      switch (nelems * sizeof(*mem) * 8) {
+        default:
+        case 512 * 3:
+          reg = pgather<Scalar, vec>(mem, inc);
+          dst = pmadd(scale, src, reg);
+          break;
+        case 512 * 2:
+          reg = pgather<Scalar, vec>(mem, inc);
+          dst = pmadd(scale, src, reg);
+          break;
+        case 512 * 1:
+          reg = pgather<Scalar, vec>(mem, inc);
+          dst = pmadd(scale, src, reg);
+          break;
+        case 256 * 1:
+          reg = preinterpret<vec>(pgather<Scalar, vec_ymm>(mem, inc));
+          dst = preinterpret<vec>(
+              pmadd(preinterpret<vec_ymm>(scale), preinterpret<vec_ymm>(src), preinterpret<vec_ymm>(reg)));
+          break;
+        case 128 * 1:
+          reg = preinterpret<vec>(pgather<Scalar, vec_xmm>(mem, inc));
+          dst = preinterpret<vec>(
+              pmadd(preinterpret<vec_xmm>(scale), preinterpret<vec_xmm>(src), preinterpret<vec_xmm>(reg)));
+          break;
+        case 64 * 1:
+          if (is_f32) {
+            reg = pgather(reg, mem, inc, mask);
+            dst = preinterpret<vec>(
+                pmadd(preinterpret<vec_xmm>(scale), preinterpret<vec_xmm>(src), preinterpret<vec_xmm>(reg)));
+          } else {
+            dst = preinterpret<vec>(
+                pmadd(preinterpret<vec_xmm>(scale), preinterpret<vec_xmm>(src), ploadl<vec_xmm>(mem)));
+          }
+          break;
+        case 32 * 1:
+          dst =
+              preinterpret<vec>(pmadds(preinterpret<vec_xmm>(scale), preinterpret<vec_xmm>(src), ploads<vec_xmm>(mem)));
+          break;
+      }
+    }
+  }
+
+  template <int j, int endX, int i, int endY, int nelems>
+  EIGEN_ALWAYS_INLINE std::enable_if_t<(j > endX) || (i > endY)> a_loads(const Scalar *ao) {
+    EIGEN_UNUSED_VARIABLE(ao);
+  }
+
+  template <int j, int endX, int i, int endY, int nelems>
+  EIGEN_ALWAYS_INLINE std::enable_if_t<(j <= endX) && (i <= endY)> a_loads(const Scalar *ao) {
+    if (j < endX) {
+      if (i < endY) {
+        auto &a_reg = zmm[a_regs[i + (j % 2) * 3]];
+        const Scalar *a_addr = ao + nelems * j + nelems_in_cache_line * i - a_shift;
+        a_load<nelems>(a_reg, a_addr);
+
+        a_loads<j, endX, i + 1, endY, nelems>(ao);
+      } else {
+        a_loads<j + 1, endX, 0, endY, nelems>(ao);
+      }
+    }
+  }
+
+  template <int un, int max_b_unroll, int i, int um_vecs, int a_unroll, int b_unroll>
+  EIGEN_ALWAYS_INLINE std::enable_if_t<(un > max_b_unroll) || (i > um_vecs)> prefetch_cs(const Scalar *co1,
+                                                                                         const Scalar *co2) {
+    EIGEN_UNUSED_VARIABLE(co1);
+    EIGEN_UNUSED_VARIABLE(co2);
+  }
+
+  /* C prefetch loop structure.
+   * for (int un = 0; un < 8; un++) {
+   *     if (b_unroll >= un + 1) {
+   *         if (un == 4) co2 = co1 + 4 * ldc;
+   *
+   *         for (int i = 0; i < um_vecs; i++) {
+   *             Scalar *co = (un + 1 <= 4) ? co1 : co2;
+   *             auto co_off = (un % 4) * ldc + a_unroll - 1 + i * nelems_in_cache_line * sizeof *co;
+   *             prefetch_c(co + co_off);
+   *         }
+   *     }
+   * }
+   */
+
+  template <int un, int max_b_unroll, int i, int um_vecs, int a_unroll, int b_unroll>
+  EIGEN_ALWAYS_INLINE std::enable_if_t<(un <= max_b_unroll) && (i <= um_vecs)> prefetch_cs(Scalar *&co1, Scalar *&co2) {
+    if (un < max_b_unroll) {
+      if (b_unroll >= un + 1) {
+        if (un == 4 && i == 0) co2 = co1 + 4 * ldc;
+
+        if (i < um_vecs) {
+          Scalar *co = (un + 1 <= 4) ? co1 : co2;
+          auto co_off = (un % 4) * ldc + a_unroll - 1 + i * nelems_in_cache_line * sizeof *co;
+          prefetch_c(co + co_off);
+
+          prefetch_cs<un, max_b_unroll, i + 1, um_vecs, a_unroll, b_unroll>(co1, co2);
+        } else {
+          prefetch_cs<un + 1, max_b_unroll, 0, um_vecs, a_unroll, b_unroll>(co1, co2);
+        }
+
+      } else {
+        prefetch_cs<un + 1, max_b_unroll, 0, um_vecs, a_unroll, b_unroll>(co1, co2);
+      }
+    }
+  }
+
+  // load_c
+  template <int i, int um_vecs, int idx, int nelems>
+  EIGEN_ALWAYS_INLINE std::enable_if_t<(i > um_vecs)> scale_load_c(const Scalar *cox, vec &alpha_reg) {
+    EIGEN_UNUSED_VARIABLE(cox);
+    EIGEN_UNUSED_VARIABLE(alpha_reg);
+  }
+
+  template <int i, int um_vecs, int idx, int nelems>
+  EIGEN_ALWAYS_INLINE std::enable_if_t<(i <= um_vecs)> scale_load_c(const Scalar *cox, vec &alpha_reg) {
+    if (i < um_vecs) {
+      auto &c_reg = zmm[c_regs[i + idx * 3]];
+      auto &c_load_reg = zmm[c_load_regs[i % 3]];
+      auto c_mem = cox;
+      if (is_unit_inc)
+        c_mem += i * nelems_in_cache_line;
+      else
+        c_mem += i * nelems_in_cache_line * inc;
+
+      if (!is_beta0 && is_alpha1)
+        vaddm<nelems>(c_reg, c_mem, c_reg, c_load_reg);
+      else if (!is_beta0 && !is_alpha1)
+        vfmaddm<nelems>(c_reg, c_mem, c_reg, alpha_reg, c_load_reg);
+      else if (is_beta0 && !is_alpha1)
+        c_reg = pmul(alpha_reg, c_reg);
+
+      scale_load_c<i + 1, um_vecs, idx, nelems>(cox, alpha_reg);
+    }
+  }
+
+  // store_c
+  template <int i, int um_vecs, int idx, int nelems>
+  EIGEN_ALWAYS_INLINE std::enable_if_t<(i > um_vecs)> write_c(Scalar *cox) {
+    EIGEN_UNUSED_VARIABLE(cox);
+  }
+
+  template <int i, int um_vecs, int idx, int nelems>
+  EIGEN_ALWAYS_INLINE std::enable_if_t<(i <= um_vecs)> write_c(Scalar *cox) {
+    if (i < um_vecs) {
+      auto &c_reg = zmm[c_regs[i + idx * 3]];
+      auto c_mem = cox;
+      if (is_unit_inc)
+        c_mem += i * nelems_in_cache_line;
+      else
+        c_mem += i * nelems_in_cache_line * inc;
+
+      c_store<nelems>(c_mem, c_reg);
+      c_reg = pzero(c_reg);
+
+      write_c<i + 1, um_vecs, idx, nelems>(cox);
+    }
+  }
+
+  /*  C update loop structure.
+   *  co2 = co1 + ldc;
+   *
+   *  auto &alpha_reg = zmm[alpha_load_reg];
+   *  if (!is_alpha1) alpha_reg = pload1<vec>(alpha);
+   *
+   *  int idx = 0;
+   *  for (pow = 1; pow <= 8; pow <<= 1) {
+   *
+   *      if (b_unroll >= pow) {
+   *          for (count = 1; count < (pow + 1) / 2 + 1;  count++) {
+   *              if (pow >= 4) co2 += ldc;
+   *
+   *              const Scalar *cox = (idx == 0) ? co1 : co2;
+   *
+   *              const int um_vecs = div_up(a_unroll, nelems_in_cache_line);
+   *              scale_load_c<0, um_vecs, idx, a_unroll>(cox, alpha_reg);
+   *              write_c<0, um_vecs, idx, a_unroll>(cox);
+   *
+   *              idx++;
+   *          }
+   *      }
+   *  }
+   *
+   *  if (b_unroll == 1)
+   *      co1 += ldc;
+   *  else
+   *      co1 = co2 + ldc;
+   */
+
+  template <int pow, int a_unroll, int idx>
+  EIGEN_ALWAYS_INLINE void c_update_1count(Scalar *&cox) {
+    if (pow >= 4) cox += ldc;
+
+    const int um_vecs = div_up(a_unroll, nelems_in_cache_line);
+    auto &alpha_reg = zmm[alpha_load_reg];
+
+    scale_load_c<0, um_vecs, idx, a_unroll>(cox, alpha_reg);
+    write_c<0, um_vecs, idx, a_unroll>(cox);
+  }
+
+  template <int pow, int a_unroll>
+  EIGEN_ALWAYS_INLINE void c_update_1pow(Scalar *&co1, Scalar *&co2) {
+    constexpr int idx = pow / 2;
+    Scalar *&cox = idx == 0 ? co1 : co2;
+
+    constexpr int max_count = (pow + 1) / 2;
+    static_assert(max_count <= 4, "Unsupported max_count.");
+
+    if (1 <= max_count) c_update_1count<pow, a_unroll, idx + 0>(cox);
+    if (2 <= max_count) c_update_1count<pow, a_unroll, idx + 1>(cox);
+    if (3 <= max_count) c_update_1count<pow, a_unroll, idx + 2>(cox);
+    if (4 <= max_count) c_update_1count<pow, a_unroll, idx + 3>(cox);
+  }
+
+  template <int max_b_unroll, int a_unroll, int b_unroll>
+  EIGEN_ALWAYS_INLINE void c_update(Scalar *&co1, Scalar *&co2) {
+    auto &alpha_reg = zmm[alpha_load_reg];
+
+    co2 = co1 + ldc;
+    if (!is_alpha1) alpha_reg = pload1<vec>(alpha);
+    if (!is_unit_inc && a_unroll < nelems_in_cache_line) mask = static_cast<umask_t>((1ull << a_unroll) - 1);
+
+    static_assert(max_b_unroll <= 8, "Unsupported max_b_unroll");
+
+    if (1 <= max_b_unroll && 1 <= b_unroll) c_update_1pow<1, a_unroll>(co1, co2);
+    if (2 <= max_b_unroll && 2 <= b_unroll) c_update_1pow<2, a_unroll>(co1, co2);
+    if (4 <= max_b_unroll && 4 <= b_unroll) c_update_1pow<4, a_unroll>(co1, co2);
+    if (8 <= max_b_unroll && 8 <= b_unroll) c_update_1pow<8, a_unroll>(co1, co2);
+
+    if (b_unroll == 1)
+      co1 += ldc;
+    else
+      co1 = co2 + ldc;
+  }
+
+  // compute
+  template <int um, int um_vecs, int idx, int uk, bool fetch_x, bool ktail>
+  EIGEN_ALWAYS_INLINE std::enable_if_t<(um > um_vecs)> compute(const Scalar *ao, const Scalar *bo, int &fetchA_idx,
+                                                               int &fetchB_idx, vec &b_reg) {
+    EIGEN_UNUSED_VARIABLE(ao);
+    EIGEN_UNUSED_VARIABLE(bo);
+    EIGEN_UNUSED_VARIABLE(fetchA_idx);
+    EIGEN_UNUSED_VARIABLE(fetchB_idx);
+    EIGEN_UNUSED_VARIABLE(b_reg);
+  }
+
+  template <int um, int um_vecs, int idx, int uk, bool fetch_x, bool ktail>
+  EIGEN_ALWAYS_INLINE std::enable_if_t<(um <= um_vecs)> compute(const Scalar *ao, const Scalar *bo, int &fetchA_idx,
+                                                                int &fetchB_idx, vec &b_reg) {
+    if (um < um_vecs) {
+      auto &c_reg = zmm[c_regs[um + idx * 3]];
+      auto &a_reg = zmm[a_regs[um + (uk % 2) * 3]];
+
+      vfmadd(c_reg, a_reg, b_reg);
+
+      if (!fetch_x && um == 0 &&
+          (((idx == 0 || idx == 6) && (uk % 2 == 0 || is_f64 || ktail)) ||
+           (idx == 3 && (uk % 2 == 1 || is_f64 || ktail)))) {
+        prefetch_a(ao + nelems_in_cache_line * fetchA_idx);
+        fetchA_idx++;
+      }
+
+      if (um == 0 && idx == 1 && (uk % 2 == 0 || is_f64 || ktail)) {
+        prefetch_b(bo + nelems_in_cache_line * fetchB_idx);
+        fetchB_idx++;
+      }
+
+      compute<um + 1, um_vecs, idx, uk, fetch_x, ktail>(ao, bo, fetchA_idx, fetchB_idx, b_reg);
+    }
+  }
+
+  // load_a
+  template <int um, int um_vecs, int uk, int nelems, bool ktail>
+  EIGEN_ALWAYS_INLINE std::enable_if_t<(um > um_vecs)> load_a(const Scalar *ao) {
+    EIGEN_UNUSED_VARIABLE(ao);
+  }
+
+  template <int um, int um_vecs, int uk, int nelems, bool ktail>
+  EIGEN_ALWAYS_INLINE std::enable_if_t<(um <= um_vecs)> load_a(const Scalar *ao) {
+    if (um < um_vecs) {
+      auto &a_reg = zmm[a_regs[um + (uk % 2) * 3]];
+      const Scalar *a_addr = ao + nelems * (1 + !ktail * !use_less_a_regs + uk) + nelems_in_cache_line * um - a_shift;
+      a_load<nelems>(a_reg, a_addr);
+
+      load_a<um + 1, um_vecs, uk, nelems, ktail>(ao);
+    }
+  }
+  template <int uk, int pow, int count, int um_vecs, int b_unroll, bool ktail, bool fetch_x, bool c_fetch>
+  EIGEN_ALWAYS_INLINE std::enable_if_t<(count > (pow + 1) / 2)> innerkernel_1pow(const Scalar *&aa,
+                                                                                 const Scalar *const &ao,
+                                                                                 const Scalar *const &bo, Scalar *&co2,
+                                                                                 int &fetchA_idx, int &fetchB_idx) {
+    EIGEN_UNUSED_VARIABLE(aa);
+    EIGEN_UNUSED_VARIABLE(ao);
+    EIGEN_UNUSED_VARIABLE(bo);
+    EIGEN_UNUSED_VARIABLE(co2);
+    EIGEN_UNUSED_VARIABLE(fetchA_idx);
+    EIGEN_UNUSED_VARIABLE(fetchB_idx);
+  }
+
+  template <int uk, int pow, int count, int um_vecs, int b_unroll, bool ktail, bool fetch_x, bool c_fetch>
+  EIGEN_ALWAYS_INLINE std::enable_if_t<(count <= (pow + 1) / 2)> innerkernel_1pow(const Scalar *&aa,
+                                                                                  const Scalar *const &ao,
+                                                                                  const Scalar *const &bo, Scalar *&co2,
+                                                                                  int &fetchA_idx, int &fetchB_idx) {
+    const int idx = (pow / 2) + count;
+
+    if (count < (pow + 1) / 2) {
+      auto &b_reg = zmm[b_regs[idx % 2]];
+
+      if (fetch_x && uk == 3 && idx == 0) prefetch_x(aa);
+      if (fetch_x && uk == 3 && idx == 4) aa += 8;
+
+      if (b_unroll >= pow) {
+        compute<0, um_vecs, idx, uk, fetch_x, ktail>(ao, bo, fetchA_idx, fetchB_idx, b_reg);
+
+        const Scalar *b_addr = bo + b_unroll * uk + idx + 1 + (b_unroll > 1) * !use_less_b_regs - b_shift;
+        b_load(b_reg, b_addr);
+      }
+
+      // Go to the next count.
+      innerkernel_1pow<uk, pow, count + 1, um_vecs, b_unroll, ktail, fetch_x, c_fetch>(aa, ao, bo, co2, fetchA_idx,
+                                                                                       fetchB_idx);
+
+    } else {
+      // Maybe prefetch C data after count-loop.
+      if (pow == 2 && c_fetch) {
+        if (uk % 3 == 0 && uk > 0) {
+          co2 += ldc;
+        } else {
+          prefetch_c(co2 + (uk % 3) * nelems_in_cache_line);
+        }
+      }
+    }
+  }
+
+  template <int uk, int max_b_unroll, int a_unroll, int b_unroll, bool ktail, bool fetch_x, bool c_fetch>
+  EIGEN_ALWAYS_INLINE void innerkernel_1uk(const Scalar *&aa, const Scalar *const &ao, const Scalar *const &bo,
+                                           Scalar *&co2, int &fetchA_idx, int &fetchB_idx) {
+    const int um_vecs = div_up(a_unroll, nelems_in_cache_line);
+
+    if (max_b_unroll >= 1)
+      innerkernel_1pow<uk, 1, 0, um_vecs, b_unroll, ktail, fetch_x, c_fetch>(aa, ao, bo, co2, fetchA_idx, fetchB_idx);
+    if (max_b_unroll >= 2)
+      innerkernel_1pow<uk, 2, 0, um_vecs, b_unroll, ktail, fetch_x, c_fetch>(aa, ao, bo, co2, fetchA_idx, fetchB_idx);
+    if (max_b_unroll >= 4)
+      innerkernel_1pow<uk, 4, 0, um_vecs, b_unroll, ktail, fetch_x, c_fetch>(aa, ao, bo, co2, fetchA_idx, fetchB_idx);
+    if (max_b_unroll >= 8)
+      innerkernel_1pow<uk, 8, 0, um_vecs, b_unroll, ktail, fetch_x, c_fetch>(aa, ao, bo, co2, fetchA_idx, fetchB_idx);
+
+    // Load A after pow-loop.
+    load_a<0, um_vecs, uk, a_unroll, ktail>(ao);
+  }
+
+  /*  Inner kernel loop structure.
+   *  for (int uk = 0; uk < kfactor; uk++) {
+   *      int idx = 0;
+   *
+   *      for (pow = 1; pow < max_b_unroll << 1; pow <<= 1) {
+   *          for (int count = 0; count < (pow + 1) / 2; count++) {
+   *              auto &b_reg = zmm[b_regs[idx % 2]];
+   *
+   *              if (fetch_x && uk == 3 && idx == 0) prefetch_x(aa);
+   *              if (fetch_x && uk == 3 && idx == 4) aa += 8;
+   *
+   *              if (b_unroll >= pow) {
+   *                  compute<0, um_vecs, idx, uk, fetchx, ktail>(ao, bo, fetchA_idx, fetchB_idx, b_reg);
+   *
+   *                  const Scalar *b_addr = bo + b_unroll * uk + idx + 1 + (b_unroll > 1) - b_shift ;
+   *                  b_load(b_reg, b_addr);
+   *              }
+   *              idx++;
+   *          }
+   *
+   *          Maybe prefetch C data.
+   *          if (pow == 2 && c_fetch) {
+   *              if (uk % 3 == 0 && uk > 0) {
+   *                  co2 += ldc;
+   *              } else {
+   *                  prefetch_c(co2 + (uk % 3) * nelems_in_cache_line);
+   *              }
+   *          }
+   *      }
+   *
+   *      Load A.
+   *      load_a<0, um_vecs, uk, ktail, a_unroll>(ao);
+   *  }
+   *
+   *  Advance A/B pointers after uk-loop.
+   *  ao += a_unroll * kfactor;
+   *  bo += b_unroll * kfactor;
+   */
+
+  template <int a_unroll, int b_unroll, int k_factor, int max_b_unroll, int max_k_factor, bool c_fetch>
+  EIGEN_ALWAYS_INLINE void innerkernel(const Scalar *&aa, const Scalar *&ao, const Scalar *&bo, Scalar *&co2) {
+    int fetchA_idx = 0;
+    int fetchB_idx = 0;
+
+    const bool fetch_x = k_factor == max_k_factor;
+    const bool ktail = k_factor == 1;
+
+    static_assert(k_factor <= 4 && k_factor > 0, "innerkernel maximum k_factor supported is 4");
+
+    if (k_factor > 0)
+      innerkernel_1uk<0, max_b_unroll, a_unroll, b_unroll, ktail, fetch_x, c_fetch>(aa, ao, bo, co2, fetchA_idx,
+                                                                                    fetchB_idx);
+    if (k_factor > 1)
+      innerkernel_1uk<1, max_b_unroll, a_unroll, b_unroll, ktail, fetch_x, c_fetch>(aa, ao, bo, co2, fetchA_idx,
+                                                                                    fetchB_idx);
+    if (k_factor > 2)
+      innerkernel_1uk<2, max_b_unroll, a_unroll, b_unroll, ktail, fetch_x, c_fetch>(aa, ao, bo, co2, fetchA_idx,
+                                                                                    fetchB_idx);
+    if (k_factor > 3)
+      innerkernel_1uk<3, max_b_unroll, a_unroll, b_unroll, ktail, fetch_x, c_fetch>(aa, ao, bo, co2, fetchA_idx,
+                                                                                    fetchB_idx);
+
+    // Advance A/B pointers after uk-loop.
+    ao += a_unroll * k_factor;
+    bo += b_unroll * k_factor;
+  }
+
+  template <int a_unroll, int b_unroll, int max_b_unroll>
+  EIGEN_ALWAYS_INLINE void kloop(const Scalar *&aa, const Scalar *&ao, const Scalar *&bo, Scalar *&co1, Scalar *&co2) {
+    const int um_vecs = div_up(a_unroll, nelems_in_cache_line);
+    if (!use_less_a_regs)
+      a_loads<0, 2, 0, um_vecs, a_unroll>(ao);
+    else
+      a_loads<0, 1, 0, um_vecs, a_unroll>(ao);
+
+    b_load(zmm[b_regs[0]], bo - b_shift + 0);
+    if (!use_less_b_regs) b_load(zmm[b_regs[1]], bo - b_shift + 1);
+
+#ifndef SECOND_FETCH
+    prefetch_cs<0, max_b_unroll, 0, um_vecs, a_unroll, b_unroll>(co1, co2);
+#endif  // SECOND_FETCH
+
+    // Unrolling k-loop by a factor of 4.
+    const int max_k_factor = 4;
+    Index loop_count = k / max_k_factor;
+
+    if (loop_count > 0) {
+#ifdef SECOND_FETCH
+      loop_count -= SECOND_FETCH;
+#endif
+      while (loop_count > 0) {
+        innerkernel<a_unroll, b_unroll, max_k_factor, max_b_unroll, max_k_factor, 0>(aa, ao, bo, co2);
+        loop_count--;
+      }
+#ifdef SECOND_FETCH
+      co2 = co1 + nelems_in_cache_line - 1;
+
+      loop_count += b_unroll;
+      while (loop_count > 0) {
+        innerkernel<a_unroll, b_unroll, max_k_factor, max_b_unroll, max_k_factor, 1>(aa, ao, bo, co2);
+        loop_count--;
+      }
+
+      loop_count += SECOND_FETCH - b_unroll;
+      while (loop_count > 0) {
+        innerkernel<a_unroll, b_unroll, max_k_factor, max_b_unroll, max_k_factor, 0>(aa, ao, bo, co2);
+        loop_count--;
+      }
+#endif
+    }
+
+    // k-loop remainder handling.
+    loop_count = k % max_k_factor;
+    while (loop_count > 0) {
+      innerkernel<a_unroll, b_unroll, 1, max_b_unroll, max_k_factor, 0>(aa, ao, bo, co2);
+      loop_count--;
+    }
+
+    // Update C matrix.
+    c_update<max_b_unroll, a_unroll, b_unroll>(co1, co2);
+  }
+
+  template <int a_unroll, int b_unroll, int max_b_unroll>
+  EIGEN_ALWAYS_INLINE void nloop(const Scalar *&aa, const Scalar *&ao, const Scalar *&bo, Scalar *&co1, Scalar *&co2) {
+    // Set A matrix pointer.
+    ao = a + a_off * a_unroll;
+
+    // Set B matrix pointer if needed.
+    bo += b_unroll * b_off;
+
+    kloop<a_unroll, b_unroll, max_b_unroll>(aa, ao, bo, co1, co2);
+
+    // Advance B matrix pointer if needed.
+    bo += b_unroll * (b_stride - k - b_off);
+
+    // Advance prefetch A pointer.
+    aa += 16;
+  }
+
+  template <int a_unroll, int max_a_unroll, int max_b_unroll>
+  EIGEN_ALWAYS_INLINE void mloop(const Scalar *&ao, const Scalar *&bo, Scalar *&co1, Scalar *&co2) {
+    // Set prefetch A pointers.
+    const Scalar *aa = a + a_unroll * a_stride;
+
+    // Set C matrix pointers.
+    co1 = c;
+    if (a_unroll >= max_a_unroll) co2 = c + 2 * ldc;
+    if (is_unit_inc)
+      c += a_unroll;
+    else
+      c += a_unroll * inc;
+
+    // Set B matrix pointer.
+    bo = b;
+
+    // Main n-loop.
+    for (Index i = n / max_b_unroll; i > 0; i--) nloop<a_unroll, max_b_unroll, max_b_unroll>(aa, ao, bo, co1, co2);
+
+    // n-remainders.
+    if (n & 4 && max_b_unroll > 4) nloop<a_unroll, 4, max_b_unroll>(aa, ao, bo, co1, co2);
+#if 0
+        if (n & 2 && max_b_unroll > 2) nloop<a_unroll, 2, max_b_unroll>(aa, ao, bo, co1, co2);
+        if (n & 1 && max_b_unroll > 1) nloop<a_unroll, 1, max_b_unroll>(aa, ao, bo, co1, co2);
+#else
+    // Copy kernels don't support tails of n = 2 for single/double precision.
+    // Loop over ones.
+    int n_rem = 2 * ((n & 2) != 0) + 1 * ((n & 1) != 0);
+    while (n_rem > 0) {
+      nloop<a_unroll, 1, max_b_unroll>(aa, ao, bo, co1, co2);
+      n_rem--;
+    }
+#endif
+
+    // Advance A matrix pointer.
+    a = ao + a_unroll * (a_stride - k - a_off);
+  }
+
+ public:
+  // Compute kernel unrolling C matrix by max_a_unroll x max_b_unroll.
+  template <int max_a_unroll, int max_b_unroll>
+  EIGEN_ALWAYS_INLINE void compute_kern() {
+    a -= -a_shift;
+    b -= -b_shift;
+
+    const Scalar *ao = nullptr;
+    const Scalar *bo = nullptr;
+    Scalar *co1 = nullptr;
+    Scalar *co2 = nullptr;
+
+    // Main m-loop.
+    for (; m >= max_a_unroll; m -= max_a_unroll) mloop<max_a_unroll, max_a_unroll, max_b_unroll>(ao, bo, co1, co2);
+
+    // m-remainders.
+    if (m & 32 && max_a_unroll > 32) mloop<32, max_a_unroll, max_b_unroll>(ao, bo, co1, co2);
+    if (m & 16 && max_a_unroll > 16) mloop<16, max_a_unroll, max_b_unroll>(ao, bo, co1, co2);
+    if (m & 8 && max_a_unroll > 8) mloop<8, max_a_unroll, max_b_unroll>(ao, bo, co1, co2);
+    if (m & 4 && max_a_unroll > 4) mloop<4, max_a_unroll, max_b_unroll>(ao, bo, co1, co2);
+    if (m & 2 && max_a_unroll > 2 && is_f64) mloop<2, max_a_unroll, max_b_unroll>(ao, bo, co1, co2);
+    if (m & 1 && max_a_unroll > 1 && is_f64) mloop<1, max_a_unroll, max_b_unroll>(ao, bo, co1, co2);
+
+    // Copy kernels don't support tails of m = 2 for single precision.
+    // Loop over ones.
+    if (is_f32) {
+      int m_rem = 2 * ((m & 2) != 0) + 1 * ((m & 1) != 0);
+      while (m_rem > 0) {
+        mloop<1, max_a_unroll, max_b_unroll>(ao, bo, co1, co2);
+        m_rem--;
+      }
+    }
+  }
+
+  gemm_class(Index m_, Index n_, Index k_, Index ldc_, Index inc_, const Scalar *alpha_, const Scalar *a_,
+             const Scalar *b_, Scalar *c_, bool is_alpha1_, bool is_beta0_, Index a_stride_, Index b_stride_,
+             Index a_off_, Index b_off_)
+      : m(m_),
+        n(n_),
+        k(k_),
+        ldc(ldc_),
+        inc(inc_),
+        alpha(alpha_),
+        a(a_),
+        b(b_),
+        c(c_),
+        is_alpha1(is_alpha1_),
+        is_beta0(is_beta0_),
+        a_stride(a_stride_),
+        b_stride(b_stride_),
+        a_off(a_off_),
+        b_off(b_off_) {
+    // Zero out all accumulation registers.
+    zmm[8] = pzero(zmm[8]);
+    zmm[9] = pzero(zmm[9]);
+    zmm[10] = pzero(zmm[10]);
+    zmm[11] = pzero(zmm[11]);
+    zmm[12] = pzero(zmm[12]);
+    zmm[13] = pzero(zmm[13]);
+    zmm[14] = pzero(zmm[14]);
+    zmm[15] = pzero(zmm[15]);
+    zmm[16] = pzero(zmm[16]);
+    zmm[17] = pzero(zmm[17]);
+    zmm[18] = pzero(zmm[18]);
+    zmm[19] = pzero(zmm[19]);
+    zmm[20] = pzero(zmm[20]);
+    zmm[21] = pzero(zmm[21]);
+    zmm[22] = pzero(zmm[22]);
+    zmm[23] = pzero(zmm[23]);
+    zmm[24] = pzero(zmm[24]);
+    zmm[25] = pzero(zmm[25]);
+    zmm[26] = pzero(zmm[26]);
+    zmm[27] = pzero(zmm[27]);
+    zmm[28] = pzero(zmm[28]);
+    zmm[29] = pzero(zmm[29]);
+    zmm[30] = pzero(zmm[30]);
+    zmm[31] = pzero(zmm[31]);
+  }
+};
+
+// Compute kernel with max unroll support of:
+//   Single precision:
+//     max_a_unroll: 48, 32, 16, 8, 4, 2, 1
+//     max_b_unroll: 8, 4, 2, 1
+//   Double precision:
+//     max_a_unroll: 24, 16, 8, 4, 2, 1
+//     max_b_unroll: 8, 4, 2, 1
+template <typename Scalar, int max_a_unroll, int max_b_unroll, bool is_alpha1, bool is_beta0, bool is_unit_inc>
+EIGEN_DONT_INLINE void gemm_kern_avx512(Index m, Index n, Index k, Scalar *alpha, const Scalar *a, const Scalar *b,
+                                        Scalar *c, Index ldc, Index inc = 1, Index a_stride = -1, Index b_stride = -1,
+                                        Index a_off = 0, Index b_off = 0) {
+  if (a_stride == -1) a_stride = k;
+  if (b_stride == -1) b_stride = k;
+
+  gemm_class<Scalar, is_unit_inc> g(m, n, k, ldc, inc, alpha, a, b, c, is_alpha1, is_beta0, a_stride, b_stride, a_off,
+                                    b_off);
+  g.template compute_kern<max_a_unroll, max_b_unroll>();
+}
+
+// Template specializations of GEBP kernels with nr = 8.
+#if EIGEN_USE_AVX512_GEMM_KERNELS
+template <bool ConjLhs_, bool ConjRhs_, int PacketSize_>
+class gebp_traits<float, float, ConjLhs_, ConjRhs_, Architecture::Target, PacketSize_>
+    : public gebp_traits<float, float, ConjLhs_, ConjRhs_, Architecture::Generic, PacketSize_> {
+  using Base = gebp_traits<float, float, ConjLhs_, ConjRhs_, Architecture::Generic, PacketSize_>;
+
+ public:
+  enum { nr = Base::Vectorizable ? 8 : 4 };
+};
+
+template <bool ConjLhs_, bool ConjRhs_, int PacketSize_>
+class gebp_traits<double, double, ConjLhs_, ConjRhs_, Architecture::Target, PacketSize_>
+    : public gebp_traits<double, double, ConjLhs_, ConjRhs_, Architecture::Generic, PacketSize_> {
+  using Base = gebp_traits<double, double, ConjLhs_, ConjRhs_, Architecture::Generic, PacketSize_>;
+
+ public:
+  enum { nr = Base::Vectorizable ? 8 : 4 };
+};
+
+template <typename Scalar, typename Index, typename DataMapper, bool Conjugate, bool PanelMode>
+struct gemm_pack_rhs<Scalar, Index, DataMapper, 8, ColMajor, Conjugate, PanelMode> {
+  typedef typename packet_traits<Scalar>::type Packet;
+  typedef typename DataMapper::LinearMapper LinearMapper;
+  enum { PacketSize = packet_traits<Scalar>::size };
+  EIGEN_DONT_INLINE void operator()(Scalar *blockB, const DataMapper &rhs, Index depth, Index cols, Index stride = 0,
+                                    Index offset = 0);
+};
+
+template <typename Scalar, typename Index, typename DataMapper, bool Conjugate, bool PanelMode>
+EIGEN_DONT_INLINE void gemm_pack_rhs<Scalar, Index, DataMapper, 8, ColMajor, Conjugate, PanelMode>::operator()(
+    Scalar *blockB, const DataMapper &rhs, Index depth, Index cols, Index stride, Index offset) {
+  constexpr int nr = 8;
+  EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS COLMAJOR");
+  EIGEN_UNUSED_VARIABLE(stride);
+  EIGEN_UNUSED_VARIABLE(offset);
+  eigen_assert(((!PanelMode) && stride == 0 && offset == 0) || (PanelMode && stride >= depth && offset <= stride));
+  conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
+  Index packet_cols8 = nr >= 8 ? (cols / 8) * 8 : 0;
+  Index packet_cols4 = nr >= 4 ? (cols / 4) * 4 : 0;
+  Index count = 0;
+  const Index peeled_k = (depth / PacketSize) * PacketSize;
+  if (nr >= 8) {
+    for (Index j2 = 0; j2 < packet_cols8; j2 += 8) {
+      // skip what we have before
+      if (PanelMode) count += 8 * offset;
+      const LinearMapper dm0 = rhs.getLinearMapper(0, j2 + 0);
+      const LinearMapper dm1 = rhs.getLinearMapper(0, j2 + 1);
+      const LinearMapper dm2 = rhs.getLinearMapper(0, j2 + 2);
+      const LinearMapper dm3 = rhs.getLinearMapper(0, j2 + 3);
+      const LinearMapper dm4 = rhs.getLinearMapper(0, j2 + 4);
+      const LinearMapper dm5 = rhs.getLinearMapper(0, j2 + 5);
+      const LinearMapper dm6 = rhs.getLinearMapper(0, j2 + 6);
+      const LinearMapper dm7 = rhs.getLinearMapper(0, j2 + 7);
+      Index k = 0;
+      if ((PacketSize % 8) == 0)  // TODO enable vectorized transposition for PacketSize==4
+      {
+        for (; k < peeled_k; k += PacketSize) {
+          PacketBlock<Packet, (PacketSize % 8) == 0 ? 8 : PacketSize> kernel;
+
+          kernel.packet[0] = dm0.template loadPacket<Packet>(k);
+          kernel.packet[1] = dm1.template loadPacket<Packet>(k);
+          kernel.packet[2] = dm2.template loadPacket<Packet>(k);
+          kernel.packet[3] = dm3.template loadPacket<Packet>(k);
+          kernel.packet[4] = dm4.template loadPacket<Packet>(k);
+          kernel.packet[5] = dm5.template loadPacket<Packet>(k);
+          kernel.packet[6] = dm6.template loadPacket<Packet>(k);
+          kernel.packet[7] = dm7.template loadPacket<Packet>(k);
+
+          ptranspose(kernel);
+
+          pstoreu(blockB + count + 0 * PacketSize, cj.pconj(kernel.packet[0]));
+          pstoreu(blockB + count + 1 * PacketSize, cj.pconj(kernel.packet[1 % PacketSize]));
+          pstoreu(blockB + count + 2 * PacketSize, cj.pconj(kernel.packet[2 % PacketSize]));
+          pstoreu(blockB + count + 3 * PacketSize, cj.pconj(kernel.packet[3 % PacketSize]));
+          pstoreu(blockB + count + 4 * PacketSize, cj.pconj(kernel.packet[4 % PacketSize]));
+          pstoreu(blockB + count + 5 * PacketSize, cj.pconj(kernel.packet[5 % PacketSize]));
+          pstoreu(blockB + count + 6 * PacketSize, cj.pconj(kernel.packet[6 % PacketSize]));
+          pstoreu(blockB + count + 7 * PacketSize, cj.pconj(kernel.packet[7 % PacketSize]));
+          count += 8 * PacketSize;
+        }
+      }
+      for (; k < depth; k++) {
+        blockB[count + 0] = cj(dm0(k));
+        blockB[count + 1] = cj(dm1(k));
+        blockB[count + 2] = cj(dm2(k));
+        blockB[count + 3] = cj(dm3(k));
+        blockB[count + 4] = cj(dm4(k));
+        blockB[count + 5] = cj(dm5(k));
+        blockB[count + 6] = cj(dm6(k));
+        blockB[count + 7] = cj(dm7(k));
+        count += 8;
+      }
+      // skip what we have after
+      if (PanelMode) count += 8 * (stride - offset - depth);
+    }
+  }
+
+  if (nr >= 4) {
+    for (Index j2 = packet_cols8; j2 < packet_cols4; j2 += 4) {
+      // skip what we have before
+      if (PanelMode) count += 4 * offset;
+      const LinearMapper dm0 = rhs.getLinearMapper(0, j2 + 0);
+      const LinearMapper dm1 = rhs.getLinearMapper(0, j2 + 1);
+      const LinearMapper dm2 = rhs.getLinearMapper(0, j2 + 2);
+      const LinearMapper dm3 = rhs.getLinearMapper(0, j2 + 3);
+
+      Index k = 0;
+      if ((PacketSize % 4) == 0)  // TODO enable vectorized transposition for PacketSize==2 ??
+      {
+        for (; k < peeled_k; k += PacketSize) {
+          PacketBlock<Packet, (PacketSize % 4) == 0 ? 4 : PacketSize> kernel;
+          kernel.packet[0] = dm0.template loadPacket<Packet>(k);
+          kernel.packet[1 % PacketSize] = dm1.template loadPacket<Packet>(k);
+          kernel.packet[2 % PacketSize] = dm2.template loadPacket<Packet>(k);
+          kernel.packet[3 % PacketSize] = dm3.template loadPacket<Packet>(k);
+          ptranspose(kernel);
+          pstoreu(blockB + count + 0 * PacketSize, cj.pconj(kernel.packet[0]));
+          pstoreu(blockB + count + 1 * PacketSize, cj.pconj(kernel.packet[1 % PacketSize]));
+          pstoreu(blockB + count + 2 * PacketSize, cj.pconj(kernel.packet[2 % PacketSize]));
+          pstoreu(blockB + count + 3 * PacketSize, cj.pconj(kernel.packet[3 % PacketSize]));
+          count += 4 * PacketSize;
+        }
+      }
+      for (; k < depth; k++) {
+        blockB[count + 0] = cj(dm0(k));
+        blockB[count + 1] = cj(dm1(k));
+        blockB[count + 2] = cj(dm2(k));
+        blockB[count + 3] = cj(dm3(k));
+        count += 4;
+      }
+      // skip what we have after
+      if (PanelMode) count += 4 * (stride - offset - depth);
+    }
+  }
+
+  // copy the remaining columns one at a time (nr==1)
+  for (Index j2 = packet_cols4; j2 < cols; ++j2) {
+    if (PanelMode) count += offset;
+    const LinearMapper dm0 = rhs.getLinearMapper(0, j2);
+    for (Index k = 0; k < depth; k++) {
+      blockB[count] = cj(dm0(k));
+      count += 1;
+    }
+    if (PanelMode) count += (stride - offset - depth);
+  }
+}
+
+template <typename Scalar, typename Index, typename DataMapper, bool Conjugate, bool PanelMode>
+struct gemm_pack_rhs<Scalar, Index, DataMapper, 8, RowMajor, Conjugate, PanelMode> {
+  typedef typename packet_traits<Scalar>::type Packet;
+  typedef typename unpacket_traits<Packet>::half HalfPacket;
+  typedef typename unpacket_traits<typename unpacket_traits<Packet>::half>::half QuarterPacket;
+  typedef typename DataMapper::LinearMapper LinearMapper;
+  enum {
+    PacketSize = packet_traits<Scalar>::size,
+    HalfPacketSize = unpacket_traits<HalfPacket>::size,
+    QuarterPacketSize = unpacket_traits<QuarterPacket>::size
+  };
+  EIGEN_DONT_INLINE void operator()(Scalar *blockB, const DataMapper &rhs, Index depth, Index cols, Index stride = 0,
+                                    Index offset = 0) {
+    constexpr int nr = 8;
+    EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS ROWMAJOR");
+    EIGEN_UNUSED_VARIABLE(stride);
+    EIGEN_UNUSED_VARIABLE(offset);
+    eigen_assert(((!PanelMode) && stride == 0 && offset == 0) || (PanelMode && stride >= depth && offset <= stride));
+    const bool HasHalf = (int)HalfPacketSize < (int)PacketSize;
+    const bool HasQuarter = (int)QuarterPacketSize < (int)HalfPacketSize;
+    conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
+    Index packet_cols8 = nr >= 8 ? (cols / 8) * 8 : 0;
+    Index packet_cols4 = nr >= 4 ? (cols / 4) * 4 : 0;
+    Index count = 0;
+
+    if (nr >= 8) {
+      for (Index j2 = 0; j2 < packet_cols8; j2 += 8) {
+        // skip what we have before
+        if (PanelMode) count += 8 * offset;
+        for (Index k = 0; k < depth; k++) {
+          if (PacketSize == 8) {
+            // Packet A = ploadu<Packet>(&rhs.data()[k*rhs.stride() + j2]);
+            Packet A = rhs.template loadPacket<Packet>(k, j2);
+            pstoreu(blockB + count, cj.pconj(A));
+          } else if (HasHalf && HalfPacketSize == 8) {
+            HalfPacket A = rhs.template loadPacket<HalfPacket>(k, j2);
+            pstoreu(blockB + count, cj.pconj(A));
+          } else if (HasQuarter && QuarterPacketSize == 8) {
+            QuarterPacket A = rhs.template loadPacket<QuarterPacket>(k, j2);
+            pstoreu(blockB + count, cj.pconj(A));
+          } else if (PacketSize == 4) {
+            // Packet A = ploadu<Packet>(&rhs.data()[k*rhs.stride() + j2]);
+            // Packet B = ploadu<Packet>(&rhs.data()[k*rhs.stride() + j2 + PacketSize]);
+            Packet A = rhs.template loadPacket<Packet>(k, j2);
+            Packet B = rhs.template loadPacket<Packet>(k, j2 + PacketSize);
+            pstoreu(blockB + count, cj.pconj(A));
+            pstoreu(blockB + count + PacketSize, cj.pconj(B));
+          } else {
+            // const Scalar* b0 = &rhs.data()[k*rhs.stride() + j2];
+            const LinearMapper dm0 = rhs.getLinearMapper(k, j2);
+            blockB[count + 0] = cj(dm0(0));
+            blockB[count + 1] = cj(dm0(1));
+            blockB[count + 2] = cj(dm0(2));
+            blockB[count + 3] = cj(dm0(3));
+            blockB[count + 4] = cj(dm0(4));
+            blockB[count + 5] = cj(dm0(5));
+            blockB[count + 6] = cj(dm0(6));
+            blockB[count + 7] = cj(dm0(7));
+          }
+          count += 8;
+        }
+        // skip what we have after
+        if (PanelMode) count += 8 * (stride - offset - depth);
+      }
+    }
+
+    if (nr >= 4) {
+      for (Index j2 = packet_cols8; j2 < packet_cols4; j2 += 4) {
+        // skip what we have before
+        if (PanelMode) count += 4 * offset;
+        for (Index k = 0; k < depth; k++) {
+          if (PacketSize == 4) {
+            Packet A = rhs.template loadPacket<Packet>(k, j2);
+            pstoreu(blockB + count, cj.pconj(A));
+            count += PacketSize;
+          } else if (HasHalf && HalfPacketSize == 4) {
+            HalfPacket A = rhs.template loadPacket<HalfPacket>(k, j2);
+            pstoreu(blockB + count, cj.pconj(A));
+            count += HalfPacketSize;
+          } else if (HasQuarter && QuarterPacketSize == 4) {
+            QuarterPacket A = rhs.template loadPacket<QuarterPacket>(k, j2);
+            pstoreu(blockB + count, cj.pconj(A));
+            count += QuarterPacketSize;
+          } else {
+            const LinearMapper dm0 = rhs.getLinearMapper(k, j2);
+            blockB[count + 0] = cj(dm0(0));
+            blockB[count + 1] = cj(dm0(1));
+            blockB[count + 2] = cj(dm0(2));
+            blockB[count + 3] = cj(dm0(3));
+            count += 4;
+          }
+        }
+        // skip what we have after
+        if (PanelMode) count += 4 * (stride - offset - depth);
+      }
+    }
+    // copy the remaining columns one at a time (nr==1)
+    for (Index j2 = packet_cols4; j2 < cols; ++j2) {
+      if (PanelMode) count += offset;
+      for (Index k = 0; k < depth; k++) {
+        blockB[count] = cj(rhs(k, j2));
+        count += 1;
+      }
+      if (PanelMode) count += stride - offset - depth;
+    }
+  }
+};
+
+template <typename Scalar, typename Index, typename DataMapper, int mr, bool ConjugateLhs, bool ConjugateRhs>
+struct gebp_kernel<Scalar, Scalar, Index, DataMapper, mr, 8, ConjugateLhs, ConjugateRhs> {
+  EIGEN_ALWAYS_INLINE
+  void operator()(const DataMapper &res, const Scalar *blockA, const Scalar *blockB, Index rows, Index depth,
+                  Index cols, Scalar alpha, Index strideA = -1, Index strideB = -1, Index offsetA = 0,
+                  Index offsetB = 0);
+};
+
+template <typename Scalar, typename Index, typename DataMapper, int mr, bool ConjugateLhs, bool ConjugateRhs>
+EIGEN_ALWAYS_INLINE void gebp_kernel<Scalar, Scalar, Index, DataMapper, mr, 8, ConjugateLhs, ConjugateRhs>::operator()(
+    const DataMapper &res, const Scalar *blockA, const Scalar *blockB, Index rows, Index depth, Index cols,
+    Scalar alpha, Index strideA, Index strideB, Index offsetA, Index offsetB) {
+  if (res.incr() == 1) {
+    if (alpha == 1) {
+      gemm_kern_avx512<Scalar, mr, 8, true, false, true>(rows, cols, depth, &alpha, blockA, blockB,
+                                                         (Scalar *)res.data(), res.stride(), res.incr(), strideA,
+                                                         strideB, offsetA, offsetB);
+    } else {
+      gemm_kern_avx512<Scalar, mr, 8, false, false, true>(rows, cols, depth, &alpha, blockA, blockB,
+                                                          (Scalar *)res.data(), res.stride(), res.incr(), strideA,
+                                                          strideB, offsetA, offsetB);
+    }
+  } else {
+    if (alpha == 1) {
+      gemm_kern_avx512<Scalar, mr, 8, true, false, false>(rows, cols, depth, &alpha, blockA, blockB,
+                                                          (Scalar *)res.data(), res.stride(), res.incr(), strideA,
+                                                          strideB, offsetA, offsetB);
+    } else {
+      gemm_kern_avx512<Scalar, mr, 8, false, false, false>(rows, cols, depth, &alpha, blockA, blockB,
+                                                           (Scalar *)res.data(), res.stride(), res.incr(), strideA,
+                                                           strideB, offsetA, offsetB);
+    }
+  }
+}
+#endif // EIGEN_USE_AVX512_GEMM_KERNELS
+
+}  // namespace internal
+}  // namespace Eigen
+
+#undef SECOND_FETCH
+
+#endif  // EIGEN_CORE_ARCH_AVX512_GEMM_KERNEL_H
diff --git a/libs/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h b/libs/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h
index 6fd726d..af47a85 100644
--- a/libs/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h
+++ b/libs/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h
@@ -10,39 +10,40 @@
 #ifndef THIRD_PARTY_EIGEN3_EIGEN_SRC_CORE_ARCH_AVX512_MATHFUNCTIONS_H_
 #define THIRD_PARTY_EIGEN3_EIGEN_SRC_CORE_ARCH_AVX512_MATHFUNCTIONS_H_
 
+#include "../../InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
 
-// Disable the code for older versions of gcc that don't support many of the required avx512 instrinsics.
-#if EIGEN_GNUC_AT_LEAST(5, 3) || EIGEN_COMP_CLANG  || EIGEN_COMP_MSVC >= 1923
+#if EIGEN_HAS_AVX512_MATH
 
-#define _EIGEN_DECLARE_CONST_Packet16f(NAME, X) \
+#define EIGEN_DECLARE_CONST_Packet16f(NAME, X) \
   const Packet16f p16f_##NAME = pset1<Packet16f>(X)
 
-#define _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(NAME, X) \
+#define EIGEN_DECLARE_CONST_Packet16f_FROM_INT(NAME, X) \
   const Packet16f p16f_##NAME =  preinterpret<Packet16f,Packet16i>(pset1<Packet16i>(X))
 
-#define _EIGEN_DECLARE_CONST_Packet8d(NAME, X) \
+#define EIGEN_DECLARE_CONST_Packet8d(NAME, X) \
   const Packet8d p8d_##NAME = pset1<Packet8d>(X)
 
-#define _EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(NAME, X) \
+#define EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(NAME, X) \
   const Packet8d p8d_##NAME = _mm512_castsi512_pd(_mm512_set1_epi64(X))
 
-#define _EIGEN_DECLARE_CONST_Packet16bf(NAME, X) \
+#define EIGEN_DECLARE_CONST_Packet16bf(NAME, X) \
   const Packet16bf p16bf_##NAME = pset1<Packet16bf>(X)
 
-#define _EIGEN_DECLARE_CONST_Packet16bf_FROM_INT(NAME, X) \
+#define EIGEN_DECLARE_CONST_Packet16bf_FROM_INT(NAME, X) \
   const Packet16bf p16bf_##NAME =  preinterpret<Packet16bf,Packet16i>(pset1<Packet16i>(X))
 
 template <>
-EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16f
 plog<Packet16f>(const Packet16f& _x) {
   return plog_float(_x);
 }
 
 template <>
-EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8d
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8d
 plog<Packet8d>(const Packet8d& _x) {
   return plog_double(_x);
 }
@@ -51,13 +52,13 @@ F16_PACKET_FUNCTION(Packet16f, Packet16h, plog)
 BF16_PACKET_FUNCTION(Packet16f, Packet16bf, plog)
 
 template <>
-EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16f
 plog2<Packet16f>(const Packet16f& _x) {
   return plog2_float(_x);
 }
 
 template <>
-EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8d
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8d
 plog2<Packet8d>(const Packet8d& _x) {
   return plog2_double(_x);
 }
@@ -69,23 +70,23 @@ BF16_PACKET_FUNCTION(Packet16f, Packet16bf, plog2)
 // "m = floor(x/log(2)+1/2)" and "r" is the remainder. The result is then
 // "exp(x) = 2^m*exp(r)" where exp(r) is in the range [-1,1).
 template <>
-EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16f
 pexp<Packet16f>(const Packet16f& _x) {
-  _EIGEN_DECLARE_CONST_Packet16f(1, 1.0f);
-  _EIGEN_DECLARE_CONST_Packet16f(half, 0.5f);
-  _EIGEN_DECLARE_CONST_Packet16f(127, 127.0f);
+  EIGEN_DECLARE_CONST_Packet16f(1, 1.0f);
+  EIGEN_DECLARE_CONST_Packet16f(half, 0.5f);
+  EIGEN_DECLARE_CONST_Packet16f(127, 127.0f);
 
-  _EIGEN_DECLARE_CONST_Packet16f(exp_hi, 88.3762626647950f);
-  _EIGEN_DECLARE_CONST_Packet16f(exp_lo, -88.3762626647949f);
+  EIGEN_DECLARE_CONST_Packet16f(exp_hi, 88.3762626647950f);
+  EIGEN_DECLARE_CONST_Packet16f(exp_lo, -88.3762626647949f);
 
-  _EIGEN_DECLARE_CONST_Packet16f(cephes_LOG2EF, 1.44269504088896341f);
+  EIGEN_DECLARE_CONST_Packet16f(cephes_LOG2EF, 1.44269504088896341f);
 
-  _EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p0, 1.9875691500E-4f);
-  _EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p1, 1.3981999507E-3f);
-  _EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p2, 8.3334519073E-3f);
-  _EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p3, 4.1665795894E-2f);
-  _EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p4, 1.6666665459E-1f);
-  _EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p5, 5.0000001201E-1f);
+  EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p0, 1.9875691500E-4f);
+  EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p1, 1.3981999507E-3f);
+  EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p2, 8.3334519073E-3f);
+  EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p3, 4.1665795894E-2f);
+  EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p4, 1.6666665459E-1f);
+  EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p5, 5.0000001201E-1f);
 
   // Clamp x.
   Packet16f x = pmax(pmin(_x, p16f_exp_hi), p16f_exp_lo);
@@ -96,7 +97,7 @@ pexp<Packet16f>(const Packet16f& _x) {
 
   // Get r = x - m*ln(2). Note that we can do this without losing more than one
   // ulp precision due to the FMA instruction.
-  _EIGEN_DECLARE_CONST_Packet16f(nln2, -0.6931471805599453f);
+  EIGEN_DECLARE_CONST_Packet16f(nln2, -0.6931471805599453f);
   Packet16f r = _mm512_fmadd_ps(m, p16f_nln2, x);
   Packet16f r2 = pmul(r, r);
   Packet16f r3 = pmul(r2, r);
@@ -120,7 +121,7 @@ pexp<Packet16f>(const Packet16f& _x) {
 }
 
 template <>
-EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8d
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8d
 pexp<Packet8d>(const Packet8d& _x) {
   return pexp_double(_x);
 }
@@ -154,49 +155,18 @@ EIGEN_STRONG_INLINE Packet16bf pldexp(const Packet16bf& a, const Packet16bf& exp
   return F32ToBf16(pldexp<Packet16f>(Bf16ToF32(a), Bf16ToF32(exponent)));
 }
 
-// Functions for sqrt.
-// The EIGEN_FAST_MATH version uses the _mm_rsqrt_ps approximation and one step
-// of Newton's method, at a cost of 1-2 bits of precision as opposed to the
-// exact solution. The main advantage of this approach is not just speed, but
-// also the fact that it can be inlined and pipelined with other computations,
-// further reducing its effective latency.
 #if EIGEN_FAST_MATH
 template <>
-EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16f
 psqrt<Packet16f>(const Packet16f& _x) {
-  Packet16f neg_half = pmul(_x, pset1<Packet16f>(-.5f));
-  __mmask16 denormal_mask = _mm512_kand(
-      _mm512_cmp_ps_mask(_x, pset1<Packet16f>((std::numeric_limits<float>::min)()),
-                        _CMP_LT_OQ),
-      _mm512_cmp_ps_mask(_x, _mm512_setzero_ps(), _CMP_GE_OQ));
-
-  Packet16f x = _mm512_rsqrt14_ps(_x);
-
-  // Do a single step of Newton's iteration.
-  x = pmul(x, pmadd(neg_half, pmul(x, x), pset1<Packet16f>(1.5f)));
-
-  // Flush results for denormals to zero.
-  return _mm512_mask_blend_ps(denormal_mask, pmul(_x,x), _mm512_setzero_ps());
+  return generic_sqrt_newton_step<Packet16f>::run(_x, _mm512_rsqrt14_ps(_x));
 }
 
 template <>
-EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8d
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8d
 psqrt<Packet8d>(const Packet8d& _x) {
-  Packet8d neg_half = pmul(_x, pset1<Packet8d>(-.5));
-  __mmask16 denormal_mask = _mm512_kand(
-      _mm512_cmp_pd_mask(_x, pset1<Packet8d>((std::numeric_limits<double>::min)()),
-                        _CMP_LT_OQ),
-      _mm512_cmp_pd_mask(_x, _mm512_setzero_pd(), _CMP_GE_OQ));
-
-  Packet8d x = _mm512_rsqrt14_pd(_x);
-
-  // Do a single step of Newton's iteration.
-  x = pmul(x, pmadd(neg_half, pmul(x, x), pset1<Packet8d>(1.5)));
-
-  // Do a second step of Newton's iteration.
-  x = pmul(x, pmadd(neg_half, pmul(x, x), pset1<Packet8d>(1.5)));
-
-  return _mm512_mask_blend_pd(denormal_mask, pmul(_x,x), _mm512_setzero_pd());
+  // Double requires 2 Newton-Raphson steps for convergence.
+  return generic_sqrt_newton_step<Packet8d, /*Steps=*/2>::run(_x, _mm512_rsqrt14_pd(_x));
 }
 #else
 template <>
@@ -223,40 +193,9 @@ EIGEN_STRONG_INLINE Packet16f prsqrt<Packet16f>(const Packet16f& x) {
 #elif EIGEN_FAST_MATH
 
 template <>
-EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16f
 prsqrt<Packet16f>(const Packet16f& _x) {
-  _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(inf, 0x7f800000);
-  _EIGEN_DECLARE_CONST_Packet16f(one_point_five, 1.5f);
-  _EIGEN_DECLARE_CONST_Packet16f(minus_half, -0.5f);
-
-  Packet16f neg_half = pmul(_x, p16f_minus_half);
-
-  // Identity infinite, negative and denormal arguments.
-  __mmask16 inf_mask = _mm512_cmp_ps_mask(_x, p16f_inf, _CMP_EQ_OQ);
-  __mmask16 not_pos_mask = _mm512_cmp_ps_mask(_x, _mm512_setzero_ps(), _CMP_LE_OQ);
-  __mmask16 not_finite_pos_mask = not_pos_mask | inf_mask;
-
-  // Compute an approximate result using the rsqrt intrinsic, forcing +inf
-  // for denormals for consistency with AVX and SSE implementations.
-  Packet16f y_approx = _mm512_rsqrt14_ps(_x);
-
-  // Do a single step of Newton-Raphson iteration to improve the approximation.
-  // This uses the formula y_{n+1} = y_n * (1.5 - y_n * (0.5 * x) * y_n).
-  // It is essential to evaluate the inner term like this because forming
-  // y_n^2 may over- or underflow.
-  Packet16f y_newton = pmul(y_approx, pmadd(y_approx, pmul(neg_half, y_approx), p16f_one_point_five));
-
-  // Select the result of the Newton-Raphson step for positive finite arguments.
-  // For other arguments, choose the output of the intrinsic. This will
-  // return rsqrt(+inf) = 0, rsqrt(x) = NaN if x < 0, and rsqrt(0) = +inf.
-  return _mm512_mask_blend_ps(not_finite_pos_mask, y_newton, y_approx);
-}
-#else
-
-template <>
-EIGEN_STRONG_INLINE Packet16f prsqrt<Packet16f>(const Packet16f& x) {
-  _EIGEN_DECLARE_CONST_Packet16f(one, 1.0f);
-  return _mm512_div_ps(p16f_one, _mm512_sqrt_ps(x));
+  return generic_rsqrt_newton_step<Packet16f, /*Steps=*/1>::run(_x, _mm512_rsqrt14_ps(_x));
 }
 #endif
 
@@ -266,51 +205,28 @@ BF16_PACKET_FUNCTION(Packet16f, Packet16bf, prsqrt)
 // prsqrt for double.
 #if EIGEN_FAST_MATH
 template <>
-EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8d
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8d
 prsqrt<Packet8d>(const Packet8d& _x) {
-  _EIGEN_DECLARE_CONST_Packet8d(one_point_five, 1.5);
-  _EIGEN_DECLARE_CONST_Packet8d(minus_half, -0.5);
-  _EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(inf, 0x7ff0000000000000LL);
-
-  Packet8d neg_half = pmul(_x, p8d_minus_half);
-
-  // Identity infinite, negative and denormal arguments.
-  __mmask8 inf_mask = _mm512_cmp_pd_mask(_x, p8d_inf, _CMP_EQ_OQ);
-  __mmask8 not_pos_mask = _mm512_cmp_pd_mask(_x, _mm512_setzero_pd(), _CMP_LE_OQ);
-  __mmask8 not_finite_pos_mask = not_pos_mask | inf_mask;
-
-  // Compute an approximate result using the rsqrt intrinsic, forcing +inf
-  // for denormals for consistency with AVX and SSE implementations.
-#if defined(EIGEN_VECTORIZE_AVX512ER)
-  Packet8d y_approx = _mm512_rsqrt28_pd(_x);
-#else
-  Packet8d y_approx = _mm512_rsqrt14_pd(_x);
-#endif
-  // Do one or two steps of Newton-Raphson's to improve the approximation, depending on the
-  // starting accuracy (either 2^-14 or 2^-28, depending on whether AVX512ER is available).
-  // The Newton-Raphson algorithm has quadratic convergence and roughly doubles the number
-  // of correct digits for each step.
-  // This uses the formula y_{n+1} = y_n * (1.5 - y_n * (0.5 * x) * y_n).
-  // It is essential to evaluate the inner term like this because forming
-  // y_n^2 may over- or underflow.
-  Packet8d y_newton = pmul(y_approx, pmadd(neg_half, pmul(y_approx, y_approx), p8d_one_point_five));
-#if !defined(EIGEN_VECTORIZE_AVX512ER)
-  y_newton = pmul(y_newton, pmadd(y_newton, pmul(neg_half, y_newton), p8d_one_point_five));
-#endif
-  // Select the result of the Newton-Raphson step for positive finite arguments.
-  // For other arguments, choose the output of the intrinsic. This will
-  // return rsqrt(+inf) = 0, rsqrt(x) = NaN if x < 0, and rsqrt(0) = +inf.
-  return _mm512_mask_blend_pd(not_finite_pos_mask, y_newton, y_approx);
+  #ifdef EIGEN_VECTORIZE_AVX512ER
+    return generic_rsqrt_newton_step<Packet8d, /*Steps=*/1>::run(_x, _mm512_rsqrt28_pd(_x));
+  #else
+    return generic_rsqrt_newton_step<Packet8d, /*Steps=*/2>::run(_x, _mm512_rsqrt14_pd(_x));
+  #endif
 }
+
+template<> EIGEN_STRONG_INLINE Packet16f preciprocal<Packet16f>(const Packet16f& a) {
+#ifdef EIGEN_VECTORIZE_AVX512ER
+  return _mm512_rcp28_ps(a);
 #else
-template <>
-EIGEN_STRONG_INLINE Packet8d prsqrt<Packet8d>(const Packet8d& x) {
-  _EIGEN_DECLARE_CONST_Packet8d(one, 1.0f);
-  return _mm512_div_pd(p8d_one, _mm512_sqrt_pd(x));
+  return generic_reciprocal_newton_step<Packet16f, /*Steps=*/1>::run(a, _mm512_rcp14_ps(a));
+#endif
 }
+
+F16_PACKET_FUNCTION(Packet16f, Packet16h, preciprocal)
+BF16_PACKET_FUNCTION(Packet16f, Packet16bf, preciprocal)
 #endif
 
-template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet16f plog1p<Packet16f>(const Packet16f& _x) {
   return generic_plog1p(_x);
 }
@@ -318,7 +234,7 @@ Packet16f plog1p<Packet16f>(const Packet16f& _x) {
 F16_PACKET_FUNCTION(Packet16f, Packet16h, plog1p)
 BF16_PACKET_FUNCTION(Packet16f, Packet16bf, plog1p)
 
-template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet16f pexpm1<Packet16f>(const Packet16f& _x) {
   return generic_expm1(_x);
 }
@@ -326,23 +242,47 @@ Packet16f pexpm1<Packet16f>(const Packet16f& _x) {
 F16_PACKET_FUNCTION(Packet16f, Packet16h, pexpm1)
 BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pexpm1)
 
-#endif
+#endif  // EIGEN_HAS_AVX512_MATH
 
 
 template <>
-EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16f
 psin<Packet16f>(const Packet16f& _x) {
   return psin_float(_x);
 }
 
 template <>
-EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16f
 pcos<Packet16f>(const Packet16f& _x) {
   return pcos_float(_x);
 }
 
 template <>
-EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16f
+pacos<Packet16f>(const Packet16f& _x) {
+  return pacos_float(_x);
+}
+
+template <>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16f
+pasin<Packet16f>(const Packet16f& _x) {
+  return pasin_float(_x);
+}
+
+template <>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16f
+patan<Packet16f>(const Packet16f& _x) {
+  return patan_float(_x);
+}
+
+template <>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8d
+patan<Packet8d>(const Packet8d& _x) {
+  return patan_double(_x);
+}
+
+template <>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16f
 ptanh<Packet16f>(const Packet16f& _x) {
   return internal::generic_fast_tanh_float(_x);
 }
diff --git a/libs/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h b/libs/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h
index 34d49ab..159ae3e 100644
--- a/libs/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h
+++ b/libs/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_PACKET_MATH_AVX512_H
 #define EIGEN_PACKET_MATH_AVX512_H
 
+#include "../../InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
@@ -28,10 +30,19 @@ namespace internal {
 #endif
 #endif
 
+// Disable the code for older versions of gcc that don't support many of the required avx512 math instrinsics.
+#if EIGEN_GNUC_AT_LEAST(5, 3) || EIGEN_COMP_CLANG || EIGEN_COMP_MSVC >= 1923 || EIGEN_COMP_ICC >= 1900
+#define EIGEN_HAS_AVX512_MATH 1
+#else
+#define EIGEN_HAS_AVX512_MATH 0
+#endif
+
 typedef __m512 Packet16f;
 typedef __m512i Packet16i;
 typedef __m512d Packet8d;
+#ifndef EIGEN_VECTORIZE_AVX512FP16
 typedef eigen_packet_wrapper<__m256i, 1> Packet16h;
+#endif
 typedef eigen_packet_wrapper<__m256i, 2> Packet16bf;
 
 template <>
@@ -47,6 +58,7 @@ struct is_arithmetic<__m512d> {
   enum { value = true };
 };
 
+#ifndef EIGEN_VECTORIZE_AVX512FP16
 template<> struct is_arithmetic<Packet16h> { enum { value = true }; };
 
 template <>
@@ -72,12 +84,14 @@ struct packet_traits<half> : default_packet_traits {
     HasMax    = 1,
     HasConj   = 1,
     HasSetLinear = 0,
-    HasLog    = 1,
-    HasLog1p  = 1,
-    HasExpm1  = 1,
-    HasExp    = 1,
-    HasSqrt   = 1,
-    HasRsqrt  = 1,
+    HasLog    = EIGEN_HAS_AVX512_MATH,
+    HasLog1p  = EIGEN_HAS_AVX512_MATH,
+    HasExp    = EIGEN_HAS_AVX512_MATH,
+    HasExpm1  = EIGEN_HAS_AVX512_MATH,
+    HasSqrt   = EIGEN_HAS_AVX512_MATH,
+    HasRsqrt  = EIGEN_HAS_AVX512_MATH,
+    HasBessel = EIGEN_HAS_AVX512_MATH,
+    HasNdtri  = EIGEN_HAS_AVX512_MATH,
     HasSin    = EIGEN_FAST_MATH,
     HasCos    = EIGEN_FAST_MATH,
     HasTanh   = EIGEN_FAST_MATH,
@@ -86,11 +100,10 @@ struct packet_traits<half> : default_packet_traits {
     HasRound  = 1,
     HasFloor  = 1,
     HasCeil   = 1,
-    HasRint   = 1,
-    HasBessel = 1,
-    HasNdtri  = 1
+    HasRint   = 1
   };
 };
+#endif
 
 template<> struct packet_traits<float>  : default_packet_traits
 {
@@ -109,7 +122,10 @@ template<> struct packet_traits<float>  : default_packet_traits
     HasBlend = 0,
     HasSin = EIGEN_FAST_MATH,
     HasCos = EIGEN_FAST_MATH,
-#if EIGEN_GNUC_AT_LEAST(5, 3) || (!EIGEN_COMP_GNUC_STRICT)
+    HasACos = 1,
+    HasASin = 1,
+    HasATan = 1,
+#if EIGEN_HAS_AVX512_MATH
     HasLog = 1,
     HasLog1p  = 1,
     HasExpm1  = 1,
@@ -118,6 +134,7 @@ template<> struct packet_traits<float>  : default_packet_traits
     HasExp = 1,
     HasSqrt = EIGEN_FAST_MATH,
     HasRsqrt = EIGEN_FAST_MATH,
+    HasReciprocal = EIGEN_FAST_MATH,
     HasTanh = EIGEN_FAST_MATH,
     HasErf = EIGEN_FAST_MATH,
 #endif
@@ -138,12 +155,13 @@ template<> struct packet_traits<double> : default_packet_traits
     AlignedOnScalar = 1,
     size = 8,
     HasHalfPacket = 1,
-#if EIGEN_GNUC_AT_LEAST(5, 3) || (!EIGEN_COMP_GNUC_STRICT)
+#if EIGEN_HAS_AVX512_MATH
     HasLog  = 1,
     HasExp = 1,
     HasSqrt = EIGEN_FAST_MATH,
     HasRsqrt = EIGEN_FAST_MATH,
 #endif
+    HasATan = 1,
     HasCmp  = 1,
     HasDiv = 1,
     HasRound = 1,
@@ -153,17 +171,18 @@ template<> struct packet_traits<double> : default_packet_traits
   };
 };
 
-/* TODO Implement AVX512 for integers
-template<> struct packet_traits<int>    : default_packet_traits
+template<> struct packet_traits<int> : default_packet_traits
 {
   typedef Packet16i type;
+  typedef Packet8i half;
   enum {
     Vectorizable = 1,
     AlignedOnScalar = 1,
-    size=8
+    HasCmp = 1,
+    HasDiv = 1,
+    size=16
   };
 };
-*/
 
 template <>
 struct unpacket_traits<Packet16f> {
@@ -171,27 +190,30 @@ struct unpacket_traits<Packet16f> {
   typedef Packet8f half;
   typedef Packet16i integer_packet;
   typedef uint16_t mask_t;
-  enum { size = 16, alignment=Aligned64, vectorizable=true, masked_load_available=true, masked_store_available=true };
+  enum { size = 16, alignment=Aligned64, vectorizable=true, masked_load_available=true, masked_store_available=true, masked_fpops_available=true };
 };
 template <>
 struct unpacket_traits<Packet8d> {
   typedef double type;
   typedef Packet4d half;
-  enum { size = 8, alignment=Aligned64, vectorizable=true, masked_load_available=false, masked_store_available=false };
+  typedef uint8_t mask_t;
+  enum { size = 8, alignment=Aligned64, vectorizable=true, masked_load_available=true, masked_store_available=true, masked_fpops_available=true };
 };
 template <>
 struct unpacket_traits<Packet16i> {
   typedef int type;
   typedef Packet8i half;
-  enum { size = 16, alignment=Aligned64, vectorizable=false, masked_load_available=false, masked_store_available=false };
+  enum { size = 16, alignment=Aligned64, vectorizable=true, masked_load_available=false, masked_store_available=false };
 };
 
+#ifndef EIGEN_VECTORIZE_AVX512FP16
 template<>
 struct unpacket_traits<Packet16h> {
   typedef Eigen::half type;
   typedef Packet8h half;
   enum {size=16, alignment=Aligned32, vectorizable=true, masked_load_available=false, masked_store_available=false};
 };
+#endif
 
 template <>
 EIGEN_STRONG_INLINE Packet16f pset1<Packet16f>(const float& from) {
@@ -235,11 +257,25 @@ template<> EIGEN_STRONG_INLINE Packet8d peven_mask(const Packet8d& /*a*/) {
 
 template <>
 EIGEN_STRONG_INLINE Packet16f pload1<Packet16f>(const float* from) {
+#if (EIGEN_COMP_GNUC != 0) || (EIGEN_COMP_CLANG != 0)
+  // Inline asm here helps reduce some register spilling in TRSM kernels.
+  // See note in unrolls::gemm::microKernel in TrsmKernel.h
+  Packet16f ret;
+  __asm__  ("vbroadcastss %[mem], %[dst]" : [dst] "=v" (ret) : [mem] "m" (*from));
+  return ret;
+#else
   return _mm512_broadcastss_ps(_mm_load_ps1(from));
+#endif
 }
 template <>
 EIGEN_STRONG_INLINE Packet8d pload1<Packet8d>(const double* from) {
+#if (EIGEN_COMP_GNUC != 0) || (EIGEN_COMP_CLANG != 0)
+  Packet8d ret;
+  __asm__  ("vbroadcastsd %[mem], %[dst]" : [dst] "=v" (ret) : [mem] "m" (*from));
+  return ret;
+#else
   return _mm512_set1_pd(*from);
+#endif
 }
 
 template <>
@@ -254,6 +290,12 @@ EIGEN_STRONG_INLINE Packet8d plset<Packet8d>(const double& a) {
   return _mm512_add_pd(_mm512_set1_pd(a),
                        _mm512_set_pd(7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, 0.0));
 }
+template <>
+EIGEN_STRONG_INLINE Packet16i plset<Packet16i>(const int& a) {
+  return _mm512_add_epi32(
+      _mm512_set1_epi32(a),
+      _mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0));
+}
 
 template <>
 EIGEN_STRONG_INLINE Packet16f padd<Packet16f>(const Packet16f& a,
@@ -271,6 +313,21 @@ EIGEN_STRONG_INLINE Packet16i padd<Packet16i>(const Packet16i& a,
   return _mm512_add_epi32(a, b);
 }
 
+template <>
+EIGEN_STRONG_INLINE Packet16f padd<Packet16f>(const Packet16f& a,
+                                              const Packet16f& b,
+                                              uint16_t umask) {
+  __mmask16 mask = static_cast<__mmask16>(umask);
+  return _mm512_maskz_add_ps(mask, a, b);
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d padd<Packet8d>(const Packet8d& a,
+                                            const Packet8d& b,
+                                            uint8_t umask) {
+  __mmask8 mask = static_cast<__mmask8>(umask);
+  return _mm512_maskz_add_pd(mask, a, b);
+}
+
 template <>
 EIGEN_STRONG_INLINE Packet16f psub<Packet16f>(const Packet16f& a,
                                               const Packet16f& b) {
@@ -289,11 +346,17 @@ EIGEN_STRONG_INLINE Packet16i psub<Packet16i>(const Packet16i& a,
 
 template <>
 EIGEN_STRONG_INLINE Packet16f pnegate(const Packet16f& a) {
-  return _mm512_sub_ps(_mm512_set1_ps(0.0), a);
+  const __m512i mask = _mm512_set1_epi32(0x80000000);
+  return _mm512_castsi512_ps(_mm512_xor_epi32(_mm512_castps_si512(a), mask));
 }
 template <>
 EIGEN_STRONG_INLINE Packet8d pnegate(const Packet8d& a) {
-  return _mm512_sub_pd(_mm512_set1_pd(0.0), a);
+  const __m512i mask = _mm512_set1_epi64(0x8000000000000000ULL);
+  return _mm512_castsi512_pd(_mm512_xor_epi64(_mm512_castpd_si512(a), mask));
+}
+template <>
+EIGEN_STRONG_INLINE Packet16i pnegate(const Packet16i& a) {
+  return _mm512_sub_epi32(_mm512_set1_epi32(0), a);
 }
 
 template <>
@@ -330,12 +393,21 @@ EIGEN_STRONG_INLINE Packet16f pdiv<Packet16f>(const Packet16f& a,
                                               const Packet16f& b) {
   return _mm512_div_ps(a, b);
 }
+
 template <>
 EIGEN_STRONG_INLINE Packet8d pdiv<Packet8d>(const Packet8d& a,
                                             const Packet8d& b) {
   return _mm512_div_pd(a, b);
 }
 
+template <>
+EIGEN_STRONG_INLINE Packet16i pdiv<Packet16i>(const Packet16i& a,
+                                              const Packet16i& b) {
+  Packet8i q_lo = pdiv<Packet8i>(_mm512_extracti64x4_epi64(a, 0), _mm512_extracti64x4_epi64(b,0));
+  Packet8i q_hi = pdiv<Packet8i>(_mm512_extracti64x4_epi64(a, 1), _mm512_extracti64x4_epi64(b, 1));
+  return _mm512_inserti64x4(_mm512_castsi256_si512(q_lo), q_hi, 1);
+}
+
 #ifdef EIGEN_VECTORIZE_FMA
 template <>
 EIGEN_STRONG_INLINE Packet16f pmadd(const Packet16f& a, const Packet16f& b,
@@ -347,6 +419,39 @@ EIGEN_STRONG_INLINE Packet8d pmadd(const Packet8d& a, const Packet8d& b,
                                    const Packet8d& c) {
   return _mm512_fmadd_pd(a, b, c);
 }
+
+template <>
+EIGEN_STRONG_INLINE Packet16f pmsub(const Packet16f& a, const Packet16f& b,
+                                    const Packet16f& c) {
+  return _mm512_fmsub_ps(a, b, c);
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d pmsub(const Packet8d& a, const Packet8d& b,
+                                   const Packet8d& c) {
+  return _mm512_fmsub_pd(a, b, c);
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet16f pnmadd(const Packet16f& a, const Packet16f& b,
+                                    const Packet16f& c) {
+  return _mm512_fnmadd_ps(a, b, c);
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d pnmadd(const Packet8d& a, const Packet8d& b,
+                                   const Packet8d& c) {
+  return _mm512_fnmadd_pd(a, b, c);
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet16f pnmsub(const Packet16f& a, const Packet16f& b,
+                                    const Packet16f& c) {
+  return _mm512_fnmsub_ps(a, b, c);
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d pnmsub(const Packet8d& a, const Packet8d& b,
+                                   const Packet8d& c) {
+  return _mm512_fnmsub_pd(a, b, c);
+}
 #endif
 
 template <>
@@ -379,6 +484,11 @@ EIGEN_STRONG_INLINE Packet8d pmin<Packet8d>(const Packet8d& a,
   // Arguments are reversed to match NaN propagation behavior of std::min.
   return _mm512_min_pd(b, a);
 }
+template <>
+EIGEN_STRONG_INLINE Packet16i pmin<Packet16i>(const Packet16i& a,
+                                              const Packet16i& b) {
+  return _mm512_min_epi32(b, a);
+}
 
 template <>
 EIGEN_STRONG_INLINE Packet16f pmax<Packet16f>(const Packet16f& a,
@@ -392,6 +502,11 @@ EIGEN_STRONG_INLINE Packet8d pmax<Packet8d>(const Packet8d& a,
   // Arguments are reversed to match NaN propagation behavior of std::max.
   return _mm512_max_pd(b, a);
 }
+template <>
+EIGEN_STRONG_INLINE Packet16i pmax<Packet16i>(const Packet16i& a,
+                                              const Packet16i& b) {
+  return _mm512_max_epi32(b, a);
+}
 
 // Add specializations for min/max with prescribed NaN progation.
 template<>
@@ -493,10 +608,17 @@ template<> EIGEN_STRONG_INLINE Packet16f pcmp_lt_or_nan(const Packet16f& a, cons
 }
 
 template<> EIGEN_STRONG_INLINE Packet16i pcmp_eq(const Packet16i& a, const Packet16i& b) {
-  __mmask16 mask = _mm512_cmp_epi32_mask(a, b, _CMP_EQ_OQ);
+  __mmask16 mask = _mm512_cmp_epi32_mask(a, b, _MM_CMPINT_EQ);
+  return _mm512_mask_set1_epi32(_mm512_set1_epi32(0), mask, 0xffffffffu);
+}
+template<> EIGEN_STRONG_INLINE Packet16i pcmp_le(const Packet16i& a, const Packet16i& b) {
+  __mmask16 mask = _mm512_cmp_epi32_mask(a, b, _MM_CMPINT_LE);
+  return _mm512_mask_set1_epi32(_mm512_set1_epi32(0), mask, 0xffffffffu);
+}
+template<> EIGEN_STRONG_INLINE Packet16i pcmp_lt(const Packet16i& a, const Packet16i& b) {
+  __mmask16 mask = _mm512_cmp_epi32_mask(a, b, _MM_CMPINT_LT);
   return _mm512_mask_set1_epi32(_mm512_set1_epi32(0), mask, 0xffffffffu);
 }
-
 
 template <>
 EIGEN_STRONG_INLINE Packet8d pcmp_eq(const Packet8d& a, const Packet8d& b) {
@@ -686,7 +808,7 @@ EIGEN_STRONG_INLINE Packet8d pload<Packet8d>(const double* from) {
 template <>
 EIGEN_STRONG_INLINE Packet16i pload<Packet16i>(const int* from) {
   EIGEN_DEBUG_ALIGNED_LOAD return _mm512_load_si512(
-      reinterpret_cast<const __m512i*>(from));
+    reinterpret_cast<const __m512i*>(from));
 }
 
 template <>
@@ -708,6 +830,11 @@ EIGEN_STRONG_INLINE Packet16f ploadu<Packet16f>(const float* from, uint16_t umas
   __mmask16 mask = static_cast<__mmask16>(umask);
   EIGEN_DEBUG_UNALIGNED_LOAD return _mm512_maskz_loadu_ps(mask, from);
 }
+template <>
+EIGEN_STRONG_INLINE Packet8d ploadu<Packet8d>(const double* from, uint8_t umask) {
+  __mmask8 mask = static_cast<__mmask8>(umask);
+  EIGEN_DEBUG_UNALIGNED_LOAD return _mm512_maskz_loadu_pd(mask, from);
+}
 
 // Loads 8 floats from memory a returns the packet
 // {a0, a0  a1, a1, a2, a2, a3, a3, a4, a4, a5, a5, a6, a6, a7, a7}
@@ -746,6 +873,16 @@ EIGEN_STRONG_INLINE Packet8d ploaddup<Packet8d>(const double* from) {
 }
 #endif
 
+// Loads 8 integers from memory and returns the packet
+// {a0, a0  a1, a1, a2, a2, a3, a3, a4, a4, a5, a5, a6, a6, a7, a7}
+template <>
+EIGEN_STRONG_INLINE Packet16i ploaddup<Packet16i>(const int* from) {
+  __m256i low_half = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(from));
+  __m512 even_elements = _mm512_castsi512_ps(_mm512_cvtepu32_epi64(low_half));
+  __m512 pairs = _mm512_permute_ps(even_elements, _MM_SHUFFLE(2, 2, 0, 0));
+  return _mm512_castps_si512(pairs);
+}
+
 // Loads 4 floats from memory a returns the packet
 // {a0, a0  a0, a0, a1, a1, a1, a1, a2, a2, a2, a2, a3, a3, a3, a3}
 template <>
@@ -766,6 +903,15 @@ EIGEN_STRONG_INLINE Packet8d ploadquad<Packet8d>(const double* from) {
   return _mm512_insertf64x4(tmp, lane1, 1);
 }
 
+// Loads 4 integers from memory and returns the packet
+// {a0, a0  a0, a0, a1, a1, a1, a1, a2, a2, a2, a2, a3, a3, a3, a3}
+template <>
+EIGEN_STRONG_INLINE Packet16i ploadquad<Packet16i>(const int* from) {
+  Packet16i tmp = _mm512_castsi128_si512(ploadu<Packet4i>(from));
+  const Packet16i scatter_mask = _mm512_set_epi32(3,3,3,3, 2,2,2,2, 1,1,1,1, 0,0,0,0);
+  return _mm512_permutexvar_epi32(scatter_mask, tmp);
+}
+
 template <>
 EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet16f& from) {
   EIGEN_DEBUG_ALIGNED_STORE _mm512_store_ps(to, from);
@@ -798,6 +944,40 @@ EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet16f& from, uint16
   __mmask16 mask = static_cast<__mmask16>(umask);
   EIGEN_DEBUG_UNALIGNED_STORE return _mm512_mask_storeu_ps(to, mask, from);
 }
+template <>
+EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet8d& from, uint8_t umask) {
+  __mmask8 mask = static_cast<__mmask8>(umask);
+  EIGEN_DEBUG_UNALIGNED_STORE return _mm512_mask_storeu_pd(to, mask, from);
+}
+
+template <typename Scalar, typename Packet>
+EIGEN_DEVICE_FUNC inline Packet pgather(const Packet& src, const Scalar* from,
+    Index stride, typename unpacket_traits<Packet>::mask_t umask);
+template <>
+EIGEN_DEVICE_FUNC inline Packet16f pgather<float, Packet16f>(const Packet16f& src,
+                                                             const float* from,
+                                                             Index stride,
+                                                             uint16_t umask) {
+  Packet16i stride_vector = _mm512_set1_epi32(convert_index<int>(stride));
+  Packet16i stride_multiplier =
+      _mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+  Packet16i indices = _mm512_mullo_epi32(stride_vector, stride_multiplier);
+  __mmask16 mask = static_cast<__mmask16>(umask);
+
+  return _mm512_mask_i32gather_ps(src, mask, indices, from, 4);
+}
+template <>
+EIGEN_DEVICE_FUNC inline Packet8d pgather<double, Packet8d>(const Packet8d& src,
+                                                            const double* from,
+                                                            Index stride,
+                                                            uint8_t umask) {
+  Packet8i stride_vector = _mm256_set1_epi32(convert_index<int>(stride));
+  Packet8i stride_multiplier = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
+  Packet8i indices = _mm256_mullo_epi32(stride_vector, stride_multiplier);
+  __mmask8 mask = static_cast<__mmask8>(umask);
+
+  return _mm512_mask_i32gather_pd(src, mask, indices, from, 8);
+}
 
 template <>
 EIGEN_DEVICE_FUNC inline Packet16f pgather<float, Packet16f>(const float* from,
@@ -818,6 +998,42 @@ EIGEN_DEVICE_FUNC inline Packet8d pgather<double, Packet8d>(const double* from,
 
   return _mm512_i32gather_pd(indices, from, 8);
 }
+template <>
+EIGEN_DEVICE_FUNC inline Packet16i pgather<int, Packet16i>(const int* from,
+                                                           Index stride) {
+  Packet16i stride_vector = _mm512_set1_epi32(convert_index<int>(stride));
+  Packet16i stride_multiplier =
+      _mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+  Packet16i indices = _mm512_mullo_epi32(stride_vector, stride_multiplier);
+  return _mm512_i32gather_epi32(indices, from, 4);
+}
+
+template <typename Scalar, typename Packet>
+EIGEN_DEVICE_FUNC inline void pscatter(Scalar* to, const Packet& from,
+    Index stride, typename unpacket_traits<Packet>::mask_t umask);
+template <>
+EIGEN_DEVICE_FUNC inline void pscatter<float, Packet16f>(float* to,
+                                                         const Packet16f& from,
+                                                         Index stride,
+                                                         uint16_t umask) {
+  Packet16i stride_vector = _mm512_set1_epi32(convert_index<int>(stride));
+  Packet16i stride_multiplier =
+      _mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+  Packet16i indices = _mm512_mullo_epi32(stride_vector, stride_multiplier);
+  __mmask16 mask = static_cast<__mmask16>(umask);
+  _mm512_mask_i32scatter_ps(to, mask, indices, from, 4);
+}
+template <>
+EIGEN_DEVICE_FUNC inline void pscatter<double, Packet8d>(double* to,
+                                                         const Packet8d& from,
+                                                         Index stride,
+                                                         uint8_t umask) {
+  Packet8i stride_vector = _mm256_set1_epi32(convert_index<int>(stride));
+  Packet8i stride_multiplier = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
+  Packet8i indices = _mm256_mullo_epi32(stride_vector, stride_multiplier);
+  __mmask8 mask = static_cast<__mmask8>(umask);
+  _mm512_mask_i32scatter_pd(to, mask, indices, from, 8);
+}
 
 template <>
 EIGEN_DEVICE_FUNC inline void pscatter<float, Packet16f>(float* to,
@@ -838,6 +1054,16 @@ EIGEN_DEVICE_FUNC inline void pscatter<double, Packet8d>(double* to,
   Packet8i indices = _mm256_mullo_epi32(stride_vector, stride_multiplier);
   _mm512_i32scatter_pd(to, indices, from, 8);
 }
+template <>
+EIGEN_DEVICE_FUNC inline void pscatter<int, Packet16i>(int* to,
+                                                       const Packet16i& from,
+                                                       Index stride) {
+  Packet16i stride_vector = _mm512_set1_epi32(convert_index<int>(stride));
+  Packet16i stride_multiplier =
+      _mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+  Packet16i indices = _mm512_mullo_epi32(stride_vector, stride_multiplier);
+  _mm512_i32scatter_epi32(to, indices, from, 4);
+}
 
 template <>
 EIGEN_STRONG_INLINE void pstore1<Packet16f>(float* to, const float& a) {
@@ -882,6 +1108,11 @@ template<> EIGEN_STRONG_INLINE Packet8d preverse(const Packet8d& a)
   return _mm512_permutexvar_pd(_mm512_set_epi32(0, 0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7), a);
 }
 
+template<> EIGEN_STRONG_INLINE Packet16i preverse(const Packet16i& a)
+{
+  return _mm512_permutexvar_epi32(_mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15), a);
+}
+
 template<> EIGEN_STRONG_INLINE Packet16f pabs(const Packet16f& a)
 {
   // _mm512_abs_ps intrinsic not found, so hack around it
@@ -893,6 +1124,15 @@ EIGEN_STRONG_INLINE Packet8d pabs(const Packet8d& a) {
   return _mm512_castsi512_pd(_mm512_and_si512(_mm512_castpd_si512(a),
                                    _mm512_set1_epi64(0x7fffffffffffffff)));
 }
+template<> EIGEN_STRONG_INLINE Packet16i pabs(const Packet16i& a)
+{
+  return _mm512_abs_epi32(a);
+}
+
+template<> EIGEN_STRONG_INLINE Packet16h  psignbit(const Packet16h&  a) { return _mm256_srai_epi16(a, 15); }
+template<> EIGEN_STRONG_INLINE Packet16bf psignbit(const Packet16bf& a) { return _mm256_srai_epi16(a, 15); }
+template<> EIGEN_STRONG_INLINE Packet16f  psignbit(const Packet16f&  a) { return _mm512_castsi512_ps(_mm512_srai_epi32(_mm512_castps_si512(a), 31)); }
+template<> EIGEN_STRONG_INLINE Packet8d   psignbit(const Packet8d&   a) { return _mm512_castsi512_pd(_mm512_srai_epi64(_mm512_castpd_si512(a), 63)); }
 
 template<>
 EIGEN_STRONG_INLINE Packet16f pfrexp<Packet16f>(const Packet16f& a, Packet16f& exponent){
@@ -901,7 +1141,7 @@ EIGEN_STRONG_INLINE Packet16f pfrexp<Packet16f>(const Packet16f& a, Packet16f& e
 
 // Extract exponent without existence of Packet8l.
 template<>
-EIGEN_STRONG_INLINE  
+EIGEN_STRONG_INLINE
 Packet8d pfrexp_generic_get_biased_exponent(const Packet8d& a) {
   const Packet8d cst_exp_mask  = pset1frombits<Packet8d>(static_cast<uint64_t>(0x7ff0000000000000ull));
   #ifdef EIGEN_VECTORIZE_AVX512DQ
@@ -924,11 +1164,11 @@ template<> EIGEN_STRONG_INLINE Packet8d pldexp<Packet8d>(const Packet8d& a, cons
   // Clamp exponent to [-2099, 2099]
   const Packet8d max_exponent = pset1<Packet8d>(2099.0);
   const Packet8i e = _mm512_cvtpd_epi32(pmin(pmax(exponent, pnegate(max_exponent)), max_exponent));
-  
+
   // Split 2^e into four factors and multiply.
   const Packet8i bias = pset1<Packet8i>(1023);
   Packet8i b = parithmetic_shift_right<2>(e);  // floor(e/4)
-  
+
   // 2^b
   const Packet8i permute_idx = _mm256_setr_epi32(0, 4, 1, 5, 2, 6, 3, 7);
   Packet8i hi = _mm256_permutevar8x32_epi32(padd(b, bias), permute_idx);
@@ -936,7 +1176,7 @@ template<> EIGEN_STRONG_INLINE Packet8d pldexp<Packet8d>(const Packet8d& a, cons
   hi = _mm256_slli_epi64(_mm256_srli_epi64(hi, 32), 52);
   Packet8d c = _mm512_castsi512_pd(_mm512_inserti64x4(_mm512_castsi256_si512(lo), hi, 1));
   Packet8d out = pmul(pmul(pmul(a, c), c), c);  // a * 2^(3b)
-  
+
   // 2^(e - 3b)
   b = psub(psub(psub(e, b), b), b);  // e - 3b
   hi = _mm256_permutevar8x32_epi32(padd(b, bias), permute_idx);
@@ -952,6 +1192,11 @@ template<> EIGEN_STRONG_INLINE Packet8d pldexp<Packet8d>(const Packet8d& a, cons
 #define EIGEN_EXTRACT_8f_FROM_16f(INPUT, OUTPUT)                           \
   __m256 OUTPUT##_0 = _mm512_extractf32x8_ps(INPUT, 0);                    \
   __m256 OUTPUT##_1 = _mm512_extractf32x8_ps(INPUT, 1)
+
+// AVX512F does not define _mm512_extracti32x8_epi32 to extract _m256i from _m512i
+#define EIGEN_EXTRACT_8i_FROM_16i(INPUT, OUTPUT)                           \
+  __m256i OUTPUT##_0 = _mm512_extracti32x8_epi32(INPUT, 0);                \
+  __m256i OUTPUT##_1 = _mm512_extracti32x8_epi32(INPUT, 1)
 #else
 #define EIGEN_EXTRACT_8f_FROM_16f(INPUT, OUTPUT)                \
   __m256 OUTPUT##_0 = _mm256_insertf128_ps(                     \
@@ -959,12 +1204,23 @@ template<> EIGEN_STRONG_INLINE Packet8d pldexp<Packet8d>(const Packet8d& a, cons
       _mm512_extractf32x4_ps(INPUT, 1), 1);                     \
   __m256 OUTPUT##_1 = _mm256_insertf128_ps(                     \
       _mm256_castps128_ps256(_mm512_extractf32x4_ps(INPUT, 2)), \
-      _mm512_extractf32x4_ps(INPUT, 3), 1);
+      _mm512_extractf32x4_ps(INPUT, 3), 1)
+
+#define EIGEN_EXTRACT_8i_FROM_16i(INPUT, OUTPUT)                    \
+  __m256i OUTPUT##_0 = _mm256_insertf128_si256(                     \
+      _mm256_castsi128_si256(_mm512_extracti32x4_epi32(INPUT, 0)),  \
+      _mm512_extracti32x4_epi32(INPUT, 1), 1);                      \
+  __m256i OUTPUT##_1 = _mm256_insertf128_si256(                     \
+      _mm256_castsi128_si256(_mm512_extracti32x4_epi32(INPUT, 2)),  \
+      _mm512_extracti32x4_epi32(INPUT, 3), 1)
 #endif
 
 #ifdef EIGEN_VECTORIZE_AVX512DQ
 #define EIGEN_INSERT_8f_INTO_16f(OUTPUT, INPUTA, INPUTB) \
   OUTPUT = _mm512_insertf32x8(_mm512_castps256_ps512(INPUTA), INPUTB, 1);
+
+#define EIGEN_INSERT_8i_INTO_16i(OUTPUT, INPUTA, INPUTB) \
+  OUTPUT = _mm512_inserti32x8(_mm512_castsi256_si512(INPUTA), INPUTB, 1);
 #else
 #define EIGEN_INSERT_8f_INTO_16f(OUTPUT, INPUTA, INPUTB)                    \
   OUTPUT = _mm512_undefined_ps();                                           \
@@ -972,6 +1228,13 @@ template<> EIGEN_STRONG_INLINE Packet8d pldexp<Packet8d>(const Packet8d& a, cons
   OUTPUT = _mm512_insertf32x4(OUTPUT, _mm256_extractf128_ps(INPUTA, 1), 1); \
   OUTPUT = _mm512_insertf32x4(OUTPUT, _mm256_extractf128_ps(INPUTB, 0), 2); \
   OUTPUT = _mm512_insertf32x4(OUTPUT, _mm256_extractf128_ps(INPUTB, 1), 3);
+
+#define EIGEN_INSERT_8i_INTO_16i(OUTPUT, INPUTA, INPUTB)                    \
+  OUTPUT = _mm512_undefined_epi32();                                           \
+  OUTPUT = _mm512_inserti32x4(OUTPUT, _mm256_extractf128_si256(INPUTA, 0), 0); \
+  OUTPUT = _mm512_inserti32x4(OUTPUT, _mm256_extractf128_si256(INPUTA, 1), 1); \
+  OUTPUT = _mm512_inserti32x4(OUTPUT, _mm256_extractf128_si256(INPUTB, 0), 2); \
+  OUTPUT = _mm512_inserti32x4(OUTPUT, _mm256_extractf128_si256(INPUTB, 1), 3);
 #endif
 
 template <>
@@ -1000,6 +1263,24 @@ EIGEN_STRONG_INLINE double predux<Packet8d>(const Packet8d& a) {
   __m256d tmp0 = _mm256_hadd_pd(sum, _mm256_permute2f128_pd(sum, sum, 1));
   return _mm_cvtsd_f64(_mm256_castpd256_pd128(_mm256_hadd_pd(tmp0, tmp0)));
 }
+template <>
+EIGEN_STRONG_INLINE int predux<Packet16i>(const Packet16i& a) {
+#ifdef EIGEN_VECTORIZE_AVX512DQ
+  __m256i lane0 = _mm512_extracti32x8_epi32(a, 0);
+  __m256i lane1 = _mm512_extracti32x8_epi32(a, 1);
+  Packet8i x = _mm256_add_epi32(lane0, lane1);
+  return predux<Packet8i>(x);
+#else
+  __m128i lane0 = _mm512_extracti32x4_epi32(a, 0);
+  __m128i lane1 = _mm512_extracti32x4_epi32(a, 1);
+  __m128i lane2 = _mm512_extracti32x4_epi32(a, 2);
+  __m128i lane3 = _mm512_extracti32x4_epi32(a, 3);
+  __m128i sum = _mm_add_epi32(_mm_add_epi32(lane0, lane1), _mm_add_epi32(lane2, lane3));
+  sum = _mm_hadd_epi32(sum, sum);
+  sum = _mm_hadd_epi32(sum, _mm_castps_si128(_mm_permute_ps(_mm_castsi128_ps(sum), 1)));
+  return _mm_cvtsi128_si32(sum);
+#endif
+}
 
 template <>
 EIGEN_STRONG_INLINE Packet8f predux_half_dowto4<Packet16f>(const Packet16f& a) {
@@ -1023,6 +1304,22 @@ EIGEN_STRONG_INLINE Packet4d predux_half_dowto4<Packet8d>(const Packet8d& a) {
   __m256d lane1 = _mm512_extractf64x4_pd(a, 1);
   return _mm256_add_pd(lane0, lane1);
 }
+template <>
+EIGEN_STRONG_INLINE Packet8i predux_half_dowto4<Packet16i>(const Packet16i& a) {
+#ifdef EIGEN_VECTORIZE_AVX512DQ
+  __m256i lane0 = _mm512_extracti32x8_epi32(a, 0);
+  __m256i lane1 = _mm512_extracti32x8_epi32(a, 1);
+  return _mm256_add_epi32(lane0, lane1);
+#else
+  __m128i lane0 = _mm512_extracti32x4_epi32(a, 0);
+  __m128i lane1 = _mm512_extracti32x4_epi32(a, 1);
+  __m128i lane2 = _mm512_extracti32x4_epi32(a, 2);
+  __m128i lane3 = _mm512_extracti32x4_epi32(a, 3);
+  __m128i sum0 = _mm_add_epi32(lane0, lane2);
+  __m128i sum1 = _mm_add_epi32(lane1, lane3);
+  return _mm256_inserti128_si256(_mm256_castsi128_si256(sum0), sum1, 1);
+#endif
+}
 
 template <>
 EIGEN_STRONG_INLINE float predux_mul<Packet16f>(const Packet16f& a) {
@@ -1099,7 +1396,11 @@ template<> EIGEN_STRONG_INLINE bool predux_any(const Packet16f& x)
   return !_mm512_kortestz(tmp,tmp);
 }
 
-
+template<> EIGEN_STRONG_INLINE bool predux_any(const Packet16i& x)
+{
+  __mmask16 tmp = _mm512_test_epi32_mask(x,x);
+  return !_mm512_kortestz(tmp,tmp);
+}
 
 #define PACK_OUTPUT(OUTPUT, INPUT, INDEX, STRIDE) \
   EIGEN_INSERT_8f_INTO_16f(OUTPUT[INDEX], INPUT[INDEX], INPUT[INDEX + STRIDE]);
@@ -1219,6 +1520,44 @@ EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet16f, 16>& kernel) {
   EIGEN_INSERT_8f_INTO_16f(OUTPUT[INDEX], INPUT[2 * INDEX], \
                            INPUT[2 * INDEX + STRIDE]);
 
+EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet16f, 8>& kernel) {
+  __m512 T0 = _mm512_unpacklo_ps(kernel.packet[0],kernel.packet[1]);
+  __m512 T1 = _mm512_unpackhi_ps(kernel.packet[0],kernel.packet[1]);
+  __m512 T2 = _mm512_unpacklo_ps(kernel.packet[2],kernel.packet[3]);
+  __m512 T3 = _mm512_unpackhi_ps(kernel.packet[2],kernel.packet[3]);
+  __m512 T4 = _mm512_unpacklo_ps(kernel.packet[4],kernel.packet[5]);
+  __m512 T5 = _mm512_unpackhi_ps(kernel.packet[4],kernel.packet[5]);
+  __m512 T6 = _mm512_unpacklo_ps(kernel.packet[6],kernel.packet[7]);
+  __m512 T7 = _mm512_unpackhi_ps(kernel.packet[6],kernel.packet[7]);
+
+  kernel.packet[0] = _mm512_castpd_ps(_mm512_unpacklo_pd(_mm512_castps_pd(T0),_mm512_castps_pd(T2)));
+  kernel.packet[1] = _mm512_castpd_ps(_mm512_unpackhi_pd(_mm512_castps_pd(T0),_mm512_castps_pd(T2)));
+  kernel.packet[2] = _mm512_castpd_ps(_mm512_unpacklo_pd(_mm512_castps_pd(T1),_mm512_castps_pd(T3)));
+  kernel.packet[3] = _mm512_castpd_ps(_mm512_unpackhi_pd(_mm512_castps_pd(T1),_mm512_castps_pd(T3)));
+  kernel.packet[4] = _mm512_castpd_ps(_mm512_unpacklo_pd(_mm512_castps_pd(T4),_mm512_castps_pd(T6)));
+  kernel.packet[5] = _mm512_castpd_ps(_mm512_unpackhi_pd(_mm512_castps_pd(T4),_mm512_castps_pd(T6)));
+  kernel.packet[6] = _mm512_castpd_ps(_mm512_unpacklo_pd(_mm512_castps_pd(T5),_mm512_castps_pd(T7)));
+  kernel.packet[7] = _mm512_castpd_ps(_mm512_unpackhi_pd(_mm512_castps_pd(T5),_mm512_castps_pd(T7)));
+
+  T0 = _mm512_shuffle_f32x4(kernel.packet[0], kernel.packet[4], 0x44);
+  T1 = _mm512_shuffle_f32x4(kernel.packet[0], kernel.packet[4], 0xee);
+  T2 = _mm512_shuffle_f32x4(kernel.packet[1], kernel.packet[5], 0x44);
+  T3 = _mm512_shuffle_f32x4(kernel.packet[1], kernel.packet[5], 0xee);
+  T4 = _mm512_shuffle_f32x4(kernel.packet[2], kernel.packet[6], 0x44);
+  T5 = _mm512_shuffle_f32x4(kernel.packet[2], kernel.packet[6], 0xee);
+  T6 = _mm512_shuffle_f32x4(kernel.packet[3], kernel.packet[7], 0x44);
+  T7 = _mm512_shuffle_f32x4(kernel.packet[3], kernel.packet[7], 0xee);
+
+  kernel.packet[0] = _mm512_shuffle_f32x4(T0, T2, 0x88);
+  kernel.packet[2] = _mm512_shuffle_f32x4(T0, T2, 0xdd);
+  kernel.packet[1] = _mm512_shuffle_f32x4(T4, T6, 0x88);
+  kernel.packet[3] = _mm512_shuffle_f32x4(T4, T6, 0xdd);
+  kernel.packet[4] = _mm512_shuffle_f32x4(T1, T3, 0x88);
+  kernel.packet[6] = _mm512_shuffle_f32x4(T1, T3, 0xdd);
+  kernel.packet[5] = _mm512_shuffle_f32x4(T5, T7, 0x88);
+  kernel.packet[7] = _mm512_shuffle_f32x4(T5, T7, 0xdd);
+}
+
 EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet16f, 4>& kernel) {
   __m512 T0 = _mm512_unpacklo_ps(kernel.packet[0], kernel.packet[1]);
   __m512 T1 = _mm512_unpackhi_ps(kernel.packet[0], kernel.packet[1]);
@@ -1295,68 +1634,216 @@ EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet8d, 4>& kernel) {
 }
 
 EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet8d, 8>& kernel) {
-  __m512d T0 = _mm512_unpacklo_pd(kernel.packet[0], kernel.packet[1]);
-  __m512d T1 = _mm512_unpackhi_pd(kernel.packet[0], kernel.packet[1]);
-  __m512d T2 = _mm512_unpacklo_pd(kernel.packet[2], kernel.packet[3]);
-  __m512d T3 = _mm512_unpackhi_pd(kernel.packet[2], kernel.packet[3]);
-  __m512d T4 = _mm512_unpacklo_pd(kernel.packet[4], kernel.packet[5]);
-  __m512d T5 = _mm512_unpackhi_pd(kernel.packet[4], kernel.packet[5]);
-  __m512d T6 = _mm512_unpacklo_pd(kernel.packet[6], kernel.packet[7]);
-  __m512d T7 = _mm512_unpackhi_pd(kernel.packet[6], kernel.packet[7]);
+    __m512d T0 = _mm512_unpacklo_pd(kernel.packet[0],kernel.packet[1]);
+    __m512d T1 = _mm512_unpackhi_pd(kernel.packet[0],kernel.packet[1]);
+    __m512d T2 = _mm512_unpacklo_pd(kernel.packet[2],kernel.packet[3]);
+    __m512d T3 = _mm512_unpackhi_pd(kernel.packet[2],kernel.packet[3]);
+    __m512d T4 = _mm512_unpacklo_pd(kernel.packet[4],kernel.packet[5]);
+    __m512d T5 = _mm512_unpackhi_pd(kernel.packet[4],kernel.packet[5]);
+    __m512d T6 = _mm512_unpacklo_pd(kernel.packet[6],kernel.packet[7]);
+    __m512d T7 = _mm512_unpackhi_pd(kernel.packet[6],kernel.packet[7]);
 
-  PacketBlock<Packet4d, 16> tmp;
+    kernel.packet[0] = _mm512_permutex_pd(T2, 0x4E);
+    kernel.packet[0] = _mm512_mask_blend_pd(0xCC, T0, kernel.packet[0]);
+    kernel.packet[2] = _mm512_permutex_pd(T0, 0x4E);
+    kernel.packet[2] = _mm512_mask_blend_pd(0xCC, kernel.packet[2], T2);
+    kernel.packet[1] = _mm512_permutex_pd(T3, 0x4E);
+    kernel.packet[1] = _mm512_mask_blend_pd(0xCC, T1, kernel.packet[1]);
+    kernel.packet[3] = _mm512_permutex_pd(T1, 0x4E);
+    kernel.packet[3] = _mm512_mask_blend_pd(0xCC, kernel.packet[3], T3);
+    kernel.packet[4] = _mm512_permutex_pd(T6, 0x4E);
+    kernel.packet[4] = _mm512_mask_blend_pd(0xCC, T4, kernel.packet[4]);
+    kernel.packet[6] = _mm512_permutex_pd(T4, 0x4E);
+    kernel.packet[6] = _mm512_mask_blend_pd(0xCC, kernel.packet[6], T6);
+    kernel.packet[5] = _mm512_permutex_pd(T7, 0x4E);
+    kernel.packet[5] = _mm512_mask_blend_pd(0xCC, T5, kernel.packet[5]);
+    kernel.packet[7] = _mm512_permutex_pd(T5, 0x4E);
+    kernel.packet[7] = _mm512_mask_blend_pd(0xCC, kernel.packet[7], T7);
 
-  tmp.packet[0] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T0, 0),
-                                         _mm512_extractf64x4_pd(T2, 0), 0x20);
-  tmp.packet[1] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T1, 0),
-                                         _mm512_extractf64x4_pd(T3, 0), 0x20);
-  tmp.packet[2] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T0, 0),
-                                         _mm512_extractf64x4_pd(T2, 0), 0x31);
-  tmp.packet[3] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T1, 0),
-                                         _mm512_extractf64x4_pd(T3, 0), 0x31);
+    T0 = _mm512_shuffle_f64x2(kernel.packet[4], kernel.packet[4], 0x4E);
+    T0 = _mm512_mask_blend_pd(0xF0, kernel.packet[0], T0);
+    T4 = _mm512_shuffle_f64x2(kernel.packet[0], kernel.packet[0], 0x4E);
+    T4 = _mm512_mask_blend_pd(0xF0, T4, kernel.packet[4]);
+    T1 = _mm512_shuffle_f64x2(kernel.packet[5], kernel.packet[5], 0x4E);
+    T1 = _mm512_mask_blend_pd(0xF0, kernel.packet[1], T1);
+    T5 = _mm512_shuffle_f64x2(kernel.packet[1], kernel.packet[1], 0x4E);
+    T5 = _mm512_mask_blend_pd(0xF0, T5, kernel.packet[5]);
+    T2 = _mm512_shuffle_f64x2(kernel.packet[6], kernel.packet[6], 0x4E);
+    T2 = _mm512_mask_blend_pd(0xF0, kernel.packet[2], T2);
+    T6 = _mm512_shuffle_f64x2(kernel.packet[2], kernel.packet[2], 0x4E);
+    T6 = _mm512_mask_blend_pd(0xF0, T6, kernel.packet[6]);
+    T3 = _mm512_shuffle_f64x2(kernel.packet[7], kernel.packet[7], 0x4E);
+    T3 = _mm512_mask_blend_pd(0xF0, kernel.packet[3], T3);
+    T7 = _mm512_shuffle_f64x2(kernel.packet[3], kernel.packet[3], 0x4E);
+    T7 = _mm512_mask_blend_pd(0xF0, T7, kernel.packet[7]);
 
-  tmp.packet[4] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T0, 1),
-                                         _mm512_extractf64x4_pd(T2, 1), 0x20);
-  tmp.packet[5] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T1, 1),
-                                         _mm512_extractf64x4_pd(T3, 1), 0x20);
-  tmp.packet[6] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T0, 1),
-                                         _mm512_extractf64x4_pd(T2, 1), 0x31);
-  tmp.packet[7] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T1, 1),
-                                         _mm512_extractf64x4_pd(T3, 1), 0x31);
-
-  tmp.packet[8] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T4, 0),
-                                         _mm512_extractf64x4_pd(T6, 0), 0x20);
-  tmp.packet[9] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T5, 0),
-                                         _mm512_extractf64x4_pd(T7, 0), 0x20);
-  tmp.packet[10] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T4, 0),
-                                          _mm512_extractf64x4_pd(T6, 0), 0x31);
-  tmp.packet[11] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T5, 0),
-                                          _mm512_extractf64x4_pd(T7, 0), 0x31);
-
-  tmp.packet[12] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T4, 1),
-                                          _mm512_extractf64x4_pd(T6, 1), 0x20);
-  tmp.packet[13] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T5, 1),
-                                          _mm512_extractf64x4_pd(T7, 1), 0x20);
-  tmp.packet[14] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T4, 1),
-                                          _mm512_extractf64x4_pd(T6, 1), 0x31);
-  tmp.packet[15] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T5, 1),
-                                          _mm512_extractf64x4_pd(T7, 1), 0x31);
-
-  PACK_OUTPUT_SQ_D(kernel.packet, tmp.packet, 0, 8);
-  PACK_OUTPUT_SQ_D(kernel.packet, tmp.packet, 1, 8);
-  PACK_OUTPUT_SQ_D(kernel.packet, tmp.packet, 2, 8);
-  PACK_OUTPUT_SQ_D(kernel.packet, tmp.packet, 3, 8);
-
-  PACK_OUTPUT_SQ_D(kernel.packet, tmp.packet, 4, 8);
-  PACK_OUTPUT_SQ_D(kernel.packet, tmp.packet, 5, 8);
-  PACK_OUTPUT_SQ_D(kernel.packet, tmp.packet, 6, 8);
-  PACK_OUTPUT_SQ_D(kernel.packet, tmp.packet, 7, 8);
+    kernel.packet[0] = T0; kernel.packet[1] = T1;
+    kernel.packet[2] = T2; kernel.packet[3] = T3;
+    kernel.packet[4] = T4; kernel.packet[5] = T5;
+    kernel.packet[6] = T6; kernel.packet[7] = T7;
 }
+
+#define PACK_OUTPUT_I32(OUTPUT, INPUT, INDEX, STRIDE) \
+  EIGEN_INSERT_8i_INTO_16i(OUTPUT[INDEX], INPUT[INDEX], INPUT[INDEX + STRIDE]);
+
+#define PACK_OUTPUT_I32_2(OUTPUT, INPUT, INDEX, STRIDE)     \
+  EIGEN_INSERT_8i_INTO_16i(OUTPUT[INDEX], INPUT[2 * INDEX], \
+                           INPUT[2 * INDEX + STRIDE]);
+
+#define SHUFFLE_EPI32(A, B, M) \
+  _mm512_castps_si512(_mm512_shuffle_ps(_mm512_castsi512_ps(A), _mm512_castsi512_ps(B), M))
+
+EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet16i, 16>& kernel) {
+  __m512i T0 = _mm512_unpacklo_epi32(kernel.packet[0], kernel.packet[1]);
+  __m512i T1 = _mm512_unpackhi_epi32(kernel.packet[0], kernel.packet[1]);
+  __m512i T2 = _mm512_unpacklo_epi32(kernel.packet[2], kernel.packet[3]);
+  __m512i T3 = _mm512_unpackhi_epi32(kernel.packet[2], kernel.packet[3]);
+  __m512i T4 = _mm512_unpacklo_epi32(kernel.packet[4], kernel.packet[5]);
+  __m512i T5 = _mm512_unpackhi_epi32(kernel.packet[4], kernel.packet[5]);
+  __m512i T6 = _mm512_unpacklo_epi32(kernel.packet[6], kernel.packet[7]);
+  __m512i T7 = _mm512_unpackhi_epi32(kernel.packet[6], kernel.packet[7]);
+  __m512i T8 = _mm512_unpacklo_epi32(kernel.packet[8], kernel.packet[9]);
+  __m512i T9 = _mm512_unpackhi_epi32(kernel.packet[8], kernel.packet[9]);
+  __m512i T10 = _mm512_unpacklo_epi32(kernel.packet[10], kernel.packet[11]);
+  __m512i T11 = _mm512_unpackhi_epi32(kernel.packet[10], kernel.packet[11]);
+  __m512i T12 = _mm512_unpacklo_epi32(kernel.packet[12], kernel.packet[13]);
+  __m512i T13 = _mm512_unpackhi_epi32(kernel.packet[12], kernel.packet[13]);
+  __m512i T14 = _mm512_unpacklo_epi32(kernel.packet[14], kernel.packet[15]);
+  __m512i T15 = _mm512_unpackhi_epi32(kernel.packet[14], kernel.packet[15]);
+  __m512i S0 = SHUFFLE_EPI32(T0, T2, _MM_SHUFFLE(1, 0, 1, 0));
+  __m512i S1 = SHUFFLE_EPI32(T0, T2, _MM_SHUFFLE(3, 2, 3, 2));
+  __m512i S2 = SHUFFLE_EPI32(T1, T3, _MM_SHUFFLE(1, 0, 1, 0));
+  __m512i S3 = SHUFFLE_EPI32(T1, T3, _MM_SHUFFLE(3, 2, 3, 2));
+  __m512i S4 = SHUFFLE_EPI32(T4, T6, _MM_SHUFFLE(1, 0, 1, 0));
+  __m512i S5 = SHUFFLE_EPI32(T4, T6, _MM_SHUFFLE(3, 2, 3, 2));
+  __m512i S6 = SHUFFLE_EPI32(T5, T7, _MM_SHUFFLE(1, 0, 1, 0));
+  __m512i S7 = SHUFFLE_EPI32(T5, T7, _MM_SHUFFLE(3, 2, 3, 2));
+  __m512i S8 = SHUFFLE_EPI32(T8, T10, _MM_SHUFFLE(1, 0, 1, 0));
+  __m512i S9 = SHUFFLE_EPI32(T8, T10, _MM_SHUFFLE(3, 2, 3, 2));
+  __m512i S10 = SHUFFLE_EPI32(T9, T11, _MM_SHUFFLE(1, 0, 1, 0));
+  __m512i S11 = SHUFFLE_EPI32(T9, T11, _MM_SHUFFLE(3, 2, 3, 2));
+  __m512i S12 = SHUFFLE_EPI32(T12, T14, _MM_SHUFFLE(1, 0, 1, 0));
+  __m512i S13 = SHUFFLE_EPI32(T12, T14, _MM_SHUFFLE(3, 2, 3, 2));
+  __m512i S14 = SHUFFLE_EPI32(T13, T15, _MM_SHUFFLE(1, 0, 1, 0));
+  __m512i S15 = SHUFFLE_EPI32(T13, T15, _MM_SHUFFLE(3, 2, 3, 2));
+
+  EIGEN_EXTRACT_8i_FROM_16i(S0, S0);
+  EIGEN_EXTRACT_8i_FROM_16i(S1, S1);
+  EIGEN_EXTRACT_8i_FROM_16i(S2, S2);
+  EIGEN_EXTRACT_8i_FROM_16i(S3, S3);
+  EIGEN_EXTRACT_8i_FROM_16i(S4, S4);
+  EIGEN_EXTRACT_8i_FROM_16i(S5, S5);
+  EIGEN_EXTRACT_8i_FROM_16i(S6, S6);
+  EIGEN_EXTRACT_8i_FROM_16i(S7, S7);
+  EIGEN_EXTRACT_8i_FROM_16i(S8, S8);
+  EIGEN_EXTRACT_8i_FROM_16i(S9, S9);
+  EIGEN_EXTRACT_8i_FROM_16i(S10, S10);
+  EIGEN_EXTRACT_8i_FROM_16i(S11, S11);
+  EIGEN_EXTRACT_8i_FROM_16i(S12, S12);
+  EIGEN_EXTRACT_8i_FROM_16i(S13, S13);
+  EIGEN_EXTRACT_8i_FROM_16i(S14, S14);
+  EIGEN_EXTRACT_8i_FROM_16i(S15, S15);
+
+  PacketBlock<Packet8i, 32> tmp;
+
+  tmp.packet[0] = _mm256_permute2f128_si256(S0_0, S4_0, 0x20);
+  tmp.packet[1] = _mm256_permute2f128_si256(S1_0, S5_0, 0x20);
+  tmp.packet[2] = _mm256_permute2f128_si256(S2_0, S6_0, 0x20);
+  tmp.packet[3] = _mm256_permute2f128_si256(S3_0, S7_0, 0x20);
+  tmp.packet[4] = _mm256_permute2f128_si256(S0_0, S4_0, 0x31);
+  tmp.packet[5] = _mm256_permute2f128_si256(S1_0, S5_0, 0x31);
+  tmp.packet[6] = _mm256_permute2f128_si256(S2_0, S6_0, 0x31);
+  tmp.packet[7] = _mm256_permute2f128_si256(S3_0, S7_0, 0x31);
+
+  tmp.packet[8] = _mm256_permute2f128_si256(S0_1, S4_1, 0x20);
+  tmp.packet[9] = _mm256_permute2f128_si256(S1_1, S5_1, 0x20);
+  tmp.packet[10] = _mm256_permute2f128_si256(S2_1, S6_1, 0x20);
+  tmp.packet[11] = _mm256_permute2f128_si256(S3_1, S7_1, 0x20);
+  tmp.packet[12] = _mm256_permute2f128_si256(S0_1, S4_1, 0x31);
+  tmp.packet[13] = _mm256_permute2f128_si256(S1_1, S5_1, 0x31);
+  tmp.packet[14] = _mm256_permute2f128_si256(S2_1, S6_1, 0x31);
+  tmp.packet[15] = _mm256_permute2f128_si256(S3_1, S7_1, 0x31);
+
+  // Second set of _m256 outputs
+  tmp.packet[16] = _mm256_permute2f128_si256(S8_0, S12_0, 0x20);
+  tmp.packet[17] = _mm256_permute2f128_si256(S9_0, S13_0, 0x20);
+  tmp.packet[18] = _mm256_permute2f128_si256(S10_0, S14_0, 0x20);
+  tmp.packet[19] = _mm256_permute2f128_si256(S11_0, S15_0, 0x20);
+  tmp.packet[20] = _mm256_permute2f128_si256(S8_0, S12_0, 0x31);
+  tmp.packet[21] = _mm256_permute2f128_si256(S9_0, S13_0, 0x31);
+  tmp.packet[22] = _mm256_permute2f128_si256(S10_0, S14_0, 0x31);
+  tmp.packet[23] = _mm256_permute2f128_si256(S11_0, S15_0, 0x31);
+
+  tmp.packet[24] = _mm256_permute2f128_si256(S8_1, S12_1, 0x20);
+  tmp.packet[25] = _mm256_permute2f128_si256(S9_1, S13_1, 0x20);
+  tmp.packet[26] = _mm256_permute2f128_si256(S10_1, S14_1, 0x20);
+  tmp.packet[27] = _mm256_permute2f128_si256(S11_1, S15_1, 0x20);
+  tmp.packet[28] = _mm256_permute2f128_si256(S8_1, S12_1, 0x31);
+  tmp.packet[29] = _mm256_permute2f128_si256(S9_1, S13_1, 0x31);
+  tmp.packet[30] = _mm256_permute2f128_si256(S10_1, S14_1, 0x31);
+  tmp.packet[31] = _mm256_permute2f128_si256(S11_1, S15_1, 0x31);
+
+  // Pack them into the output
+  PACK_OUTPUT_I32(kernel.packet, tmp.packet, 0, 16);
+  PACK_OUTPUT_I32(kernel.packet, tmp.packet, 1, 16);
+  PACK_OUTPUT_I32(kernel.packet, tmp.packet, 2, 16);
+  PACK_OUTPUT_I32(kernel.packet, tmp.packet, 3, 16);
+
+  PACK_OUTPUT_I32(kernel.packet, tmp.packet, 4, 16);
+  PACK_OUTPUT_I32(kernel.packet, tmp.packet, 5, 16);
+  PACK_OUTPUT_I32(kernel.packet, tmp.packet, 6, 16);
+  PACK_OUTPUT_I32(kernel.packet, tmp.packet, 7, 16);
+
+  PACK_OUTPUT_I32(kernel.packet, tmp.packet, 8, 16);
+  PACK_OUTPUT_I32(kernel.packet, tmp.packet, 9, 16);
+  PACK_OUTPUT_I32(kernel.packet, tmp.packet, 10, 16);
+  PACK_OUTPUT_I32(kernel.packet, tmp.packet, 11, 16);
+
+  PACK_OUTPUT_I32(kernel.packet, tmp.packet, 12, 16);
+  PACK_OUTPUT_I32(kernel.packet, tmp.packet, 13, 16);
+  PACK_OUTPUT_I32(kernel.packet, tmp.packet, 14, 16);
+  PACK_OUTPUT_I32(kernel.packet, tmp.packet, 15, 16);
+}
+
+EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet16i, 4>& kernel) {
+  __m512i T0 = _mm512_unpacklo_epi32(kernel.packet[0], kernel.packet[1]);
+  __m512i T1 = _mm512_unpackhi_epi32(kernel.packet[0], kernel.packet[1]);
+  __m512i T2 = _mm512_unpacklo_epi32(kernel.packet[2], kernel.packet[3]);
+  __m512i T3 = _mm512_unpackhi_epi32(kernel.packet[2], kernel.packet[3]);
+
+  __m512i S0 = SHUFFLE_EPI32(T0, T2, _MM_SHUFFLE(1, 0, 1, 0));
+  __m512i S1 = SHUFFLE_EPI32(T0, T2, _MM_SHUFFLE(3, 2, 3, 2));
+  __m512i S2 = SHUFFLE_EPI32(T1, T3, _MM_SHUFFLE(1, 0, 1, 0));
+  __m512i S3 = SHUFFLE_EPI32(T1, T3, _MM_SHUFFLE(3, 2, 3, 2));
+
+  EIGEN_EXTRACT_8i_FROM_16i(S0, S0);
+  EIGEN_EXTRACT_8i_FROM_16i(S1, S1);
+  EIGEN_EXTRACT_8i_FROM_16i(S2, S2);
+  EIGEN_EXTRACT_8i_FROM_16i(S3, S3);
+
+  PacketBlock<Packet8i, 8> tmp;
+
+  tmp.packet[0] = _mm256_permute2f128_si256(S0_0, S1_0, 0x20);
+  tmp.packet[1] = _mm256_permute2f128_si256(S2_0, S3_0, 0x20);
+  tmp.packet[2] = _mm256_permute2f128_si256(S0_0, S1_0, 0x31);
+  tmp.packet[3] = _mm256_permute2f128_si256(S2_0, S3_0, 0x31);
+
+  tmp.packet[4] = _mm256_permute2f128_si256(S0_1, S1_1, 0x20);
+  tmp.packet[5] = _mm256_permute2f128_si256(S2_1, S3_1, 0x20);
+  tmp.packet[6] = _mm256_permute2f128_si256(S0_1, S1_1, 0x31);
+  tmp.packet[7] = _mm256_permute2f128_si256(S2_1, S3_1, 0x31);
+
+  PACK_OUTPUT_I32_2(kernel.packet, tmp.packet, 0, 1);
+  PACK_OUTPUT_I32_2(kernel.packet, tmp.packet, 1, 1);
+  PACK_OUTPUT_I32_2(kernel.packet, tmp.packet, 2, 1);
+  PACK_OUTPUT_I32_2(kernel.packet, tmp.packet, 3, 1);
+}
+
 template <>
 EIGEN_STRONG_INLINE Packet16f pblend(const Selector<16>& /*ifPacket*/,
                                      const Packet16f& /*thenPacket*/,
                                      const Packet16f& /*elsePacket*/) {
-  assert(false && "To be implemented");
+  eigen_assert(false && "To be implemented");
   return Packet16f();
 }
 template <>
@@ -1426,64 +1913,15 @@ ploadquad(const Eigen::half* from) {
 }
 
 EIGEN_STRONG_INLINE Packet16f half2float(const Packet16h& a) {
-#ifdef EIGEN_HAS_FP16_C
   return _mm512_cvtph_ps(a);
-#else
-  EIGEN_ALIGN64 half aux[16];
-  pstore(aux, a);
-  float f0(aux[0]);
-  float f1(aux[1]);
-  float f2(aux[2]);
-  float f3(aux[3]);
-  float f4(aux[4]);
-  float f5(aux[5]);
-  float f6(aux[6]);
-  float f7(aux[7]);
-  float f8(aux[8]);
-  float f9(aux[9]);
-  float fa(aux[10]);
-  float fb(aux[11]);
-  float fc(aux[12]);
-  float fd(aux[13]);
-  float fe(aux[14]);
-  float ff(aux[15]);
-
-  return _mm512_set_ps(
-      ff, fe, fd, fc, fb, fa, f9, f8, f7, f6, f5, f4, f3, f2, f1, f0);
-#endif
 }
 
 EIGEN_STRONG_INLINE Packet16h float2half(const Packet16f& a) {
-#ifdef EIGEN_HAS_FP16_C
   return _mm512_cvtps_ph(a, _MM_FROUND_TO_NEAREST_INT|_MM_FROUND_NO_EXC);
-#else
-  EIGEN_ALIGN64 float aux[16];
-  pstore(aux, a);
-  half h0(aux[0]);
-  half h1(aux[1]);
-  half h2(aux[2]);
-  half h3(aux[3]);
-  half h4(aux[4]);
-  half h5(aux[5]);
-  half h6(aux[6]);
-  half h7(aux[7]);
-  half h8(aux[8]);
-  half h9(aux[9]);
-  half ha(aux[10]);
-  half hb(aux[11]);
-  half hc(aux[12]);
-  half hd(aux[13]);
-  half he(aux[14]);
-  half hf(aux[15]);
-
-  return _mm256_set_epi16(
-      hf.x, he.x, hd.x, hc.x, hb.x, ha.x, h9.x, h8.x,
-      h7.x, h6.x, h5.x, h4.x, h3.x, h2.x, h1.x, h0.x);
-#endif
 }
 
 template<> EIGEN_STRONG_INLINE Packet16h ptrue(const Packet16h& a) {
-  return ptrue(Packet8i(a));
+  return Packet16h(ptrue(Packet8i(a)));
 }
 
 template <>
@@ -1512,16 +1950,16 @@ EIGEN_STRONG_INLINE Packet16h plset<Packet16h>(const half& a) {
 template<> EIGEN_STRONG_INLINE Packet16h por(const Packet16h& a,const Packet16h& b) {
   // in some cases Packet8i is a wrapper around __m256i, so we need to
   // cast to Packet8i to call the correct overload.
-  return por(Packet8i(a),Packet8i(b));
+  return Packet16h(por(Packet8i(a),Packet8i(b)));
 }
 template<> EIGEN_STRONG_INLINE Packet16h pxor(const Packet16h& a,const Packet16h& b) {
-  return pxor(Packet8i(a),Packet8i(b));
+  return Packet16h(pxor(Packet8i(a),Packet8i(b)));
 }
 template<> EIGEN_STRONG_INLINE Packet16h pand(const Packet16h& a,const Packet16h& b) {
-  return pand(Packet8i(a),Packet8i(b));
+  return Packet16h(pand(Packet8i(a),Packet8i(b)));
 }
 template<> EIGEN_STRONG_INLINE Packet16h pandnot(const Packet16h& a,const Packet16h& b) {
-  return pandnot(Packet8i(a),Packet8i(b));
+  return Packet16h(pandnot(Packet8i(a),Packet8i(b)));
 }
 
 template<> EIGEN_STRONG_INLINE Packet16h pselect(const Packet16h& mask, const Packet16h& a, const Packet16h& b) {
@@ -1569,6 +2007,7 @@ template<> EIGEN_STRONG_INLINE Packet16h pnegate(const Packet16h& a) {
   return _mm256_xor_si256(a, sign_mask);
 }
 
+#ifndef EIGEN_VECTORIZE_AVX512FP16
 template<> EIGEN_STRONG_INLINE Packet16h padd<Packet16h>(const Packet16h& a, const Packet16h& b) {
   Packet16f af = half2float(a);
   Packet16f bf = half2float(b);
@@ -1602,6 +2041,8 @@ template<> EIGEN_STRONG_INLINE half predux<Packet16h>(const Packet16h& from) {
   return half(predux(from_float));
 }
 
+#endif
+
 template <>
 EIGEN_STRONG_INLINE Packet8h predux_half_dowto4<Packet16h>(const Packet16h& a) {
   Packet8h lane0 = _mm256_extractf128_si256(a, 0);
@@ -1852,7 +2293,7 @@ struct packet_traits<bfloat16> : default_packet_traits {
     HasInsert = 1,
     HasSin = EIGEN_FAST_MATH,
     HasCos = EIGEN_FAST_MATH,
-#if EIGEN_GNUC_AT_LEAST(5, 3) || (!EIGEN_COMP_GNUC_STRICT)
+#if EIGEN_HAS_AVX512_MATH
 #ifdef EIGEN_VECTORIZE_AVX512DQ
     HasLog = 1,  // Currently fails test with bad accuracy.
     HasLog1p  = 1,
@@ -1915,7 +2356,6 @@ EIGEN_STRONG_INLINE void pstoreu<bfloat16>(bfloat16* to,
 
 template<> EIGEN_STRONG_INLINE Packet16bf
 ploaddup<Packet16bf>(const bfloat16* from) {
-  Packet16bf r;
   unsigned short a = from[0].value;
   unsigned short b = from[1].value;
   unsigned short c = from[2].value;
@@ -1929,7 +2369,6 @@ ploaddup<Packet16bf>(const bfloat16* from) {
 
 template<> EIGEN_STRONG_INLINE Packet16bf
 ploadquad(const bfloat16* from) {
-  Packet16bf r;
   unsigned short a = from[0].value;
   unsigned short b = from[1].value;
   unsigned short c = from[2].value;
@@ -1947,7 +2386,7 @@ EIGEN_STRONG_INLINE Packet16bf F32ToBf16(const Packet16f& a) {
 
 #if defined(EIGEN_VECTORIZE_AVX512BF16) && EIGEN_GNUC_AT_LEAST(10, 1)
   // Since GCC 10.1 supports avx512bf16 and C style explicit cast
-  // (C++ static_cast is not supported yet), do converion via intrinsic
+  // (C++ static_cast is not supported yet), do conversion via intrinsic
   // and register path for performance.
   r = (__m256i)(_mm512_cvtneps_pbh(a));
 
@@ -1978,28 +2417,28 @@ EIGEN_STRONG_INLINE Packet16bf F32ToBf16(const Packet16f& a) {
 
 template <>
 EIGEN_STRONG_INLINE Packet16bf ptrue(const Packet16bf& a) {
-  return ptrue<Packet8i>(a);
+  return Packet16bf(ptrue<Packet8i>(Packet8i(a)));
 }
 
 template <>
 EIGEN_STRONG_INLINE Packet16bf por(const Packet16bf& a, const Packet16bf& b) {
-  return por<Packet8i>(a, b);
+  return Packet16bf(por<Packet8i>(Packet8i(a), Packet8i(b)));
 }
 
 template <>
 EIGEN_STRONG_INLINE Packet16bf pxor(const Packet16bf& a, const Packet16bf& b) {
-  return pxor<Packet8i>(a, b);
+  return Packet16bf(pxor<Packet8i>(Packet8i(a), Packet8i(b)));
 }
 
 template <>
 EIGEN_STRONG_INLINE Packet16bf pand(const Packet16bf& a, const Packet16bf& b) {
-  return pand<Packet8i>(a, b);
+  return Packet16bf(pand<Packet8i>(Packet8i(a), Packet8i(b)));
 }
 
 template <>
 EIGEN_STRONG_INLINE Packet16bf pandnot(const Packet16bf& a,
                                        const Packet16bf& b) {
-  return pandnot<Packet8i>(a, b);
+  return Packet16bf(pandnot<Packet8i>(Packet8i(a), Packet8i(b)));
 }
 
 template <>
diff --git a/libs/eigen/Eigen/src/Core/arch/AVX512/PacketMathFP16.h b/libs/eigen/Eigen/src/Core/arch/AVX512/PacketMathFP16.h
new file mode 100644
index 0000000..13f285e
--- /dev/null
+++ b/libs/eigen/Eigen/src/Core/arch/AVX512/PacketMathFP16.h
@@ -0,0 +1,877 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+//
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_PACKET_MATH_FP16_AVX512_H
+#define EIGEN_PACKET_MATH_FP16_AVX512_H
+
+#include "../../InternalHeaderCheck.h"
+
+namespace Eigen {
+
+namespace internal {
+
+// Disable the code for older versions of gcc that don't support many of the required avx512 math instrinsics.
+#if EIGEN_GNUC_AT_LEAST(5, 3) || EIGEN_COMP_CLANG || EIGEN_COMP_MSVC >= 1923 || EIGEN_COMP_ICC >= 1900
+#define EIGEN_HAS_AVX512_MATH 1
+#else
+#define EIGEN_HAS_AVX512_MATH 0
+#endif
+
+typedef __m512h Packet32h;
+typedef eigen_packet_wrapper<__m256i, 1> Packet16h;
+typedef eigen_packet_wrapper<__m128i, 2> Packet8h;
+
+template <>
+struct is_arithmetic<Packet8h> {
+  enum { value = true };
+};
+
+template <>
+struct packet_traits<half> : default_packet_traits {
+  typedef Packet32h type;
+  typedef Packet16h half;
+  enum {
+    Vectorizable = 1,
+    AlignedOnScalar = 1,
+    size = 32,
+    HasHalfPacket = 1,
+
+    HasCmp = 1,
+    HasAdd = 1,
+    HasSub = 1,
+    HasMul = 1,
+    HasDiv = 1,
+    HasNegate = 1,
+    HasAbs = 1,
+    HasAbs2 = 0,
+    HasMin = 1,
+    HasMax = 1,
+    HasConj = 1,
+    HasSetLinear = 0,
+    // These ones should be implemented in future
+    HasLog = EIGEN_HAS_AVX512_MATH,
+    HasLog1p = EIGEN_HAS_AVX512_MATH,
+    HasExp = EIGEN_HAS_AVX512_MATH,
+    HasExpm1 = EIGEN_HAS_AVX512_MATH,
+    HasSqrt = EIGEN_HAS_AVX512_MATH,
+    HasRsqrt = EIGEN_HAS_AVX512_MATH,
+    HasBessel = 0,  // EIGEN_HAS_AVX512_MATH,
+    HasNdtri = 0,   // EIGEN_HAS_AVX512_MATH,
+    HasSin = EIGEN_FAST_MATH,
+    HasCos = EIGEN_FAST_MATH,
+    HasTanh = EIGEN_FAST_MATH,
+    HasErf = 0,  // EIGEN_FAST_MATH,
+    HasBlend = 0,
+    HasRound = 1,
+    HasFloor = 1,
+    HasCeil = 1,
+    HasRint = 1
+  };
+};
+
+template <>
+struct unpacket_traits<Packet32h> {
+  typedef Eigen::half type;
+  typedef Packet16h half;
+  enum {
+    size = 32,
+    alignment = Aligned64,
+    vectorizable = true,
+    masked_load_available = false,
+    masked_store_available = false
+  };
+};
+
+template <>
+struct unpacket_traits<Packet16h> {
+  typedef Eigen::half type;
+  typedef Packet8h half;
+  enum {
+    size = 16,
+    alignment = Aligned32,
+    vectorizable = true,
+    masked_load_available = false,
+    masked_store_available = false
+  };
+};
+
+template <>
+struct unpacket_traits<Packet8h> {
+  typedef Eigen::half type;
+  typedef Packet8h half;
+  enum {
+    size = 8,
+    alignment = Aligned16,
+    vectorizable = true,
+    masked_load_available = false,
+    masked_store_available = false
+  };
+};
+
+// Memory functions
+
+// pset1
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pset1<Packet32h>(const Eigen::half& from) {
+  return _mm512_set1_ph(static_cast<_Float16>(from));
+}
+
+// pset1frombits
+template <>
+EIGEN_STRONG_INLINE Packet32h pset1frombits<Packet32h>(unsigned short from) {
+  return _mm512_castsi512_ph(_mm512_set1_epi16(from));
+}
+
+// pfirst
+
+template <>
+EIGEN_STRONG_INLINE Eigen::half pfirst<Packet32h>(const Packet32h& from) {
+#ifdef EIGEN_VECTORIZE_AVX512DQ
+  return half_impl::raw_uint16_to_half(
+      static_cast<unsigned short>(_mm256_extract_epi16(_mm512_extracti32x8_epi32(_mm512_castph_si512(from), 0), 0)));
+#else
+  Eigen::half dest[32];
+  _mm512_storeu_ph(dest, from);
+  return dest[0];
+#endif
+}
+
+// pload
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pload<Packet32h>(const Eigen::half* from) {
+  EIGEN_DEBUG_ALIGNED_LOAD return _mm512_load_ph(from);
+}
+
+// ploadu
+
+template <>
+EIGEN_STRONG_INLINE Packet32h ploadu<Packet32h>(const Eigen::half* from) {
+  EIGEN_DEBUG_UNALIGNED_LOAD return _mm512_loadu_ph(from);
+}
+
+// pstore
+
+template <>
+EIGEN_STRONG_INLINE void pstore<half>(Eigen::half* to, const Packet32h& from) {
+  EIGEN_DEBUG_ALIGNED_STORE _mm512_store_ph(to, from);
+}
+
+// pstoreu
+
+template <>
+EIGEN_STRONG_INLINE void pstoreu<half>(Eigen::half* to, const Packet32h& from) {
+  EIGEN_DEBUG_UNALIGNED_STORE _mm512_storeu_ph(to, from);
+}
+
+// ploaddup
+template <>
+EIGEN_STRONG_INLINE Packet32h ploaddup<Packet32h>(const Eigen::half* from) {
+  __m512h a = _mm512_castph256_ph512(_mm256_loadu_ph(from));
+  return _mm512_permutexvar_ph(_mm512_set_epi16(15, 15, 14, 14, 13, 13, 12, 12, 11, 11, 10, 10, 9, 9, 8, 8, 7, 7, 6, 6,
+                                                5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0, 0),
+                               a);
+}
+
+// ploadquad
+template <>
+EIGEN_STRONG_INLINE Packet32h ploadquad<Packet32h>(const Eigen::half* from) {
+  __m512h a = _mm512_castph128_ph512(_mm_loadu_ph(from));
+  return _mm512_permutexvar_ph(
+      _mm512_set_epi16(7, 7, 7, 7, 6, 6, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0),
+      a);
+}
+
+// pabs
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pabs<Packet32h>(const Packet32h& a) {
+  return _mm512_abs_ph(a);
+}
+
+// psignbit
+
+template <>
+EIGEN_STRONG_INLINE Packet32h psignbit<Packet32h>(const Packet32h& a) {
+  return _mm512_castsi512_ph(_mm512_srai_epi16(_mm512_castph_si512(a), 15));
+}
+
+// pmin
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pmin<Packet32h>(const Packet32h& a, const Packet32h& b) {
+  return _mm512_min_ph(a, b);
+}
+
+// pmax
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pmax<Packet32h>(const Packet32h& a, const Packet32h& b) {
+  return _mm512_max_ph(a, b);
+}
+
+// plset
+template <>
+EIGEN_STRONG_INLINE Packet32h plset<Packet32h>(const half& a) {
+  return _mm512_add_ph(_mm512_set1_ph(a),
+                       _mm512_set_ph(31.0f, 30.0f, 29.0f, 28.0f, 27.0f, 26.0f, 25.0f, 24.0f, 23.0f, 22.0f, 21.0f, 20.0f,
+                                     19.0f, 18.0f, 17.0f, 16.0f, 15.0f, 14.0f, 13.0f, 12.0f, 11.0f, 10.0f, 9.0f, 8.0f,
+                                     7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 0.0f));
+}
+
+// por
+
+template <>
+EIGEN_STRONG_INLINE Packet32h por(const Packet32h& a, const Packet32h& b) {
+  return _mm512_castsi512_ph(_mm512_or_si512(_mm512_castph_si512(a), _mm512_castph_si512(b)));
+}
+
+// pxor
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pxor(const Packet32h& a, const Packet32h& b) {
+  return _mm512_castsi512_ph(_mm512_xor_si512(_mm512_castph_si512(a), _mm512_castph_si512(b)));
+}
+
+// pand
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pand(const Packet32h& a, const Packet32h& b) {
+  return _mm512_castsi512_ph(_mm512_and_si512(_mm512_castph_si512(a), _mm512_castph_si512(b)));
+}
+
+// pandnot
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pandnot(const Packet32h& a, const Packet32h& b) {
+  return _mm512_castsi512_ph(_mm512_andnot_si512(_mm512_castph_si512(b), _mm512_castph_si512(a)));
+}
+
+// pselect
+
+template <>
+EIGEN_DEVICE_FUNC inline Packet32h pselect(const Packet32h& mask, const Packet32h& a, const Packet32h& b) {
+  __mmask32 mask32 = _mm512_cmp_epi16_mask(_mm512_castph_si512(mask), _mm512_setzero_epi32(), _MM_CMPINT_EQ);
+  return _mm512_mask_blend_ph(mask32, a, b);
+}
+
+// pcmp_eq
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pcmp_eq(const Packet32h& a, const Packet32h& b) {
+  __mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_EQ_OQ);
+  return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi32(0), mask, 0xffffu));
+}
+
+// pcmp_le
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pcmp_le(const Packet32h& a, const Packet32h& b) {
+  __mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_LE_OQ);
+  return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi32(0), mask, 0xffffu));
+}
+
+// pcmp_lt
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pcmp_lt(const Packet32h& a, const Packet32h& b) {
+  __mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_LT_OQ);
+  return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi32(0), mask, 0xffffu));
+}
+
+// pcmp_lt_or_nan
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pcmp_lt_or_nan(const Packet32h& a, const Packet32h& b) {
+  __mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_NGE_UQ);
+  return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi16(0), mask, 0xffffu));
+}
+
+// padd
+
+template <>
+EIGEN_STRONG_INLINE Packet32h padd<Packet32h>(const Packet32h& a, const Packet32h& b) {
+  return _mm512_add_ph(a, b);
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet16h padd<Packet16h>(const Packet16h& a, const Packet16h& b) {
+  return _mm256_castph_si256(_mm256_add_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b)));
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet8h padd<Packet8h>(const Packet8h& a, const Packet8h& b) {
+  return _mm_castph_si128(_mm_add_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b)));
+}
+
+// psub
+
+template <>
+EIGEN_STRONG_INLINE Packet32h psub<Packet32h>(const Packet32h& a, const Packet32h& b) {
+  return _mm512_sub_ph(a, b);
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet16h psub<Packet16h>(const Packet16h& a, const Packet16h& b) {
+  return _mm256_castph_si256(_mm256_sub_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b)));
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet8h psub<Packet8h>(const Packet8h& a, const Packet8h& b) {
+  return _mm_castph_si128(_mm_sub_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b)));
+}
+
+// pmul
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pmul<Packet32h>(const Packet32h& a, const Packet32h& b) {
+  return _mm512_mul_ph(a, b);
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet16h pmul<Packet16h>(const Packet16h& a, const Packet16h& b) {
+  return _mm256_castph_si256(_mm256_mul_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b)));
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet8h pmul<Packet8h>(const Packet8h& a, const Packet8h& b) {
+  return _mm_castph_si128(_mm_mul_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b)));
+}
+
+// pdiv
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pdiv<Packet32h>(const Packet32h& a, const Packet32h& b) {
+  return _mm512_div_ph(a, b);
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet16h pdiv<Packet16h>(const Packet16h& a, const Packet16h& b) {
+  return _mm256_castph_si256(_mm256_div_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b)));
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet8h pdiv<Packet8h>(const Packet8h& a, const Packet8h& b) {
+  return _mm_castph_si128(_mm_div_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b)));
+}
+
+// pround
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pround<Packet32h>(const Packet32h& a) {
+  // Work-around for default std::round rounding mode.
+
+  // Mask for the sign bit
+  const Packet32h signMask = pset1frombits<Packet32h>(static_cast<numext::uint16_t>(0x8000u));
+  // The largest half-preicision float less than 0.5
+  const Packet32h prev0dot5 = pset1frombits<Packet32h>(static_cast<numext::uint16_t>(0x37FFu));
+
+  return _mm512_roundscale_ph(padd(por(pand(a, signMask), prev0dot5), a), _MM_FROUND_TO_ZERO);
+}
+
+// print
+
+template <>
+EIGEN_STRONG_INLINE Packet32h print<Packet32h>(const Packet32h& a) {
+  return _mm512_roundscale_ph(a, _MM_FROUND_CUR_DIRECTION);
+}
+
+// pceil
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pceil<Packet32h>(const Packet32h& a) {
+  return _mm512_roundscale_ph(a, _MM_FROUND_TO_POS_INF);
+}
+
+// pfloor
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pfloor<Packet32h>(const Packet32h& a) {
+  return _mm512_roundscale_ph(a, _MM_FROUND_TO_NEG_INF);
+}
+
+// predux
+template <>
+EIGEN_STRONG_INLINE half predux<Packet32h>(const Packet32h& a) {
+  return (half)_mm512_reduce_add_ph(a);
+}
+
+template <>
+EIGEN_STRONG_INLINE half predux<Packet16h>(const Packet16h& a) {
+  return (half)_mm256_reduce_add_ph(_mm256_castsi256_ph(a));
+}
+
+template <>
+EIGEN_STRONG_INLINE half predux<Packet8h>(const Packet8h& a) {
+  return (half)_mm_reduce_add_ph(_mm_castsi128_ph(a));
+}
+
+// predux_half_dowto4
+template <>
+EIGEN_STRONG_INLINE Packet16h predux_half_dowto4<Packet32h>(const Packet32h& a) {
+#ifdef EIGEN_VECTORIZE_AVX512DQ
+  __m256i lowHalf = _mm256_castps_si256(_mm512_extractf32x8_ps(_mm512_castph_ps(a), 0));
+  __m256i highHalf = _mm256_castps_si256(_mm512_extractf32x8_ps(_mm512_castph_ps(a), 1));
+
+  return Packet16h(padd<Packet16h>(lowHalf, highHalf));
+#else
+  Eigen::half data[32];
+  _mm512_storeu_ph(data, a);
+
+  __m256i lowHalf = _mm256_castph_si256(_mm256_loadu_ph(data));
+  __m256i highHalf = _mm256_castph_si256(_mm256_loadu_ph(data + 16));
+
+  return Packet16h(padd<Packet16h>(lowHalf, highHalf));
+#endif
+}
+
+// predux_max
+
+// predux_min
+
+// predux_mul
+
+#ifdef EIGEN_VECTORIZE_FMA
+
+// pmadd
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pmadd(const Packet32h& a, const Packet32h& b, const Packet32h& c) {
+  return _mm512_fmadd_ph(a, b, c);
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet16h pmadd(const Packet16h& a, const Packet16h& b, const Packet16h& c) {
+  return _mm256_castph_si256(_mm256_fmadd_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c)));
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet8h pmadd(const Packet8h& a, const Packet8h& b, const Packet8h& c) {
+  return _mm_castph_si128(_mm_fmadd_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c)));
+}
+
+// pmsub
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pmsub(const Packet32h& a, const Packet32h& b, const Packet32h& c) {
+  return _mm512_fmsub_ph(a, b, c);
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet16h pmsub(const Packet16h& a, const Packet16h& b, const Packet16h& c) {
+  return _mm256_castph_si256(_mm256_fmsub_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c)));
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet8h pmsub(const Packet8h& a, const Packet8h& b, const Packet8h& c) {
+  return _mm_castph_si128(_mm_fmsub_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c)));
+}
+
+// pnmadd
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pnmadd(const Packet32h& a, const Packet32h& b, const Packet32h& c) {
+  return _mm512_fnmadd_ph(a, b, c);
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet16h pnmadd(const Packet16h& a, const Packet16h& b, const Packet16h& c) {
+  return _mm256_castph_si256(_mm256_fnmadd_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c)));
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet8h pnmadd(const Packet8h& a, const Packet8h& b, const Packet8h& c) {
+  return _mm_castph_si128(_mm_fnmadd_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c)));
+}
+
+// pnmsub
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pnmsub(const Packet32h& a, const Packet32h& b, const Packet32h& c) {
+  return _mm512_fnmsub_ph(a, b, c);
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet16h pnmsub(const Packet16h& a, const Packet16h& b, const Packet16h& c) {
+  return _mm256_castph_si256(_mm256_fnmsub_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c)));
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet8h pnmsub(const Packet8h& a, const Packet8h& b, const Packet8h& c) {
+  return _mm_castph_si128(_mm_fnmsub_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c)));
+}
+
+#endif
+
+// pnegate
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pnegate<Packet32h>(const Packet32h& a) {
+  return _mm512_sub_ph(_mm512_set1_ph(0.0), a);
+}
+
+// pconj
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pconj<Packet32h>(const Packet32h& a) {
+  return a;
+}
+
+// psqrt
+
+template <>
+EIGEN_STRONG_INLINE Packet32h psqrt<Packet32h>(const Packet32h& a) {
+  return _mm512_sqrt_ph(a);
+}
+
+// prsqrt
+
+template <>
+EIGEN_STRONG_INLINE Packet32h prsqrt<Packet32h>(const Packet32h& a) {
+  return _mm512_rsqrt_ph(a);
+}
+
+// preciprocal
+
+template <>
+EIGEN_STRONG_INLINE Packet32h preciprocal<Packet32h>(const Packet32h& a) {
+  return _mm512_rcp_ph(a);
+}
+
+// ptranspose
+
+EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet32h, 32>& a) {
+  __m512i t[32];
+
+  EIGEN_UNROLL_LOOP
+  for (int i = 0; i < 16; i++) {
+    t[2 * i] = _mm512_unpacklo_epi16(_mm512_castph_si512(a.packet[2 * i]), _mm512_castph_si512(a.packet[2 * i + 1]));
+    t[2 * i + 1] =
+        _mm512_unpackhi_epi16(_mm512_castph_si512(a.packet[2 * i]), _mm512_castph_si512(a.packet[2 * i + 1]));
+  }
+
+  __m512i p[32];
+
+  EIGEN_UNROLL_LOOP
+  for (int i = 0; i < 8; i++) {
+    p[4 * i] = _mm512_unpacklo_epi32(t[4 * i], t[4 * i + 2]);
+    p[4 * i + 1] = _mm512_unpackhi_epi32(t[4 * i], t[4 * i + 2]);
+    p[4 * i + 2] = _mm512_unpacklo_epi32(t[4 * i + 1], t[4 * i + 3]);
+    p[4 * i + 3] = _mm512_unpackhi_epi32(t[4 * i + 1], t[4 * i + 3]);
+  }
+
+  __m512i q[32];
+
+  EIGEN_UNROLL_LOOP
+  for (int i = 0; i < 4; i++) {
+    q[8 * i] = _mm512_unpacklo_epi64(p[8 * i], p[8 * i + 4]);
+    q[8 * i + 1] = _mm512_unpackhi_epi64(p[8 * i], p[8 * i + 4]);
+    q[8 * i + 2] = _mm512_unpacklo_epi64(p[8 * i + 1], p[8 * i + 5]);
+    q[8 * i + 3] = _mm512_unpackhi_epi64(p[8 * i + 1], p[8 * i + 5]);
+    q[8 * i + 4] = _mm512_unpacklo_epi64(p[8 * i + 2], p[8 * i + 6]);
+    q[8 * i + 5] = _mm512_unpackhi_epi64(p[8 * i + 2], p[8 * i + 6]);
+    q[8 * i + 6] = _mm512_unpacklo_epi64(p[8 * i + 3], p[8 * i + 7]);
+    q[8 * i + 7] = _mm512_unpackhi_epi64(p[8 * i + 3], p[8 * i + 7]);
+  }
+
+  __m512i f[32];
+
+#define PACKET32H_TRANSPOSE_HELPER(X, Y)                                                            \
+  do {                                                                                              \
+    f[Y * 8] = _mm512_inserti32x4(f[Y * 8], _mm512_extracti32x4_epi32(q[X * 8], Y), X);             \
+    f[Y * 8 + 1] = _mm512_inserti32x4(f[Y * 8 + 1], _mm512_extracti32x4_epi32(q[X * 8 + 1], Y), X); \
+    f[Y * 8 + 2] = _mm512_inserti32x4(f[Y * 8 + 2], _mm512_extracti32x4_epi32(q[X * 8 + 2], Y), X); \
+    f[Y * 8 + 3] = _mm512_inserti32x4(f[Y * 8 + 3], _mm512_extracti32x4_epi32(q[X * 8 + 3], Y), X); \
+    f[Y * 8 + 4] = _mm512_inserti32x4(f[Y * 8 + 4], _mm512_extracti32x4_epi32(q[X * 8 + 4], Y), X); \
+    f[Y * 8 + 5] = _mm512_inserti32x4(f[Y * 8 + 5], _mm512_extracti32x4_epi32(q[X * 8 + 5], Y), X); \
+    f[Y * 8 + 6] = _mm512_inserti32x4(f[Y * 8 + 6], _mm512_extracti32x4_epi32(q[X * 8 + 6], Y), X); \
+    f[Y * 8 + 7] = _mm512_inserti32x4(f[Y * 8 + 7], _mm512_extracti32x4_epi32(q[X * 8 + 7], Y), X); \
+  } while (false);
+
+  PACKET32H_TRANSPOSE_HELPER(0, 0);
+  PACKET32H_TRANSPOSE_HELPER(1, 1);
+  PACKET32H_TRANSPOSE_HELPER(2, 2);
+  PACKET32H_TRANSPOSE_HELPER(3, 3);
+
+  PACKET32H_TRANSPOSE_HELPER(1, 0);
+  PACKET32H_TRANSPOSE_HELPER(2, 0);
+  PACKET32H_TRANSPOSE_HELPER(3, 0);
+  PACKET32H_TRANSPOSE_HELPER(2, 1);
+  PACKET32H_TRANSPOSE_HELPER(3, 1);
+  PACKET32H_TRANSPOSE_HELPER(3, 2);
+
+  PACKET32H_TRANSPOSE_HELPER(0, 1);
+  PACKET32H_TRANSPOSE_HELPER(0, 2);
+  PACKET32H_TRANSPOSE_HELPER(0, 3);
+  PACKET32H_TRANSPOSE_HELPER(1, 2);
+  PACKET32H_TRANSPOSE_HELPER(1, 3);
+  PACKET32H_TRANSPOSE_HELPER(2, 3);
+
+#undef PACKET32H_TRANSPOSE_HELPER
+
+  EIGEN_UNROLL_LOOP
+  for (int i = 0; i < 32; i++) {
+    a.packet[i] = _mm512_castsi512_ph(f[i]);
+  }
+}
+
+EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet32h, 4>& a) {
+  __m512i p0, p1, p2, p3, t0, t1, t2, t3, a0, a1, a2, a3;
+  t0 = _mm512_unpacklo_epi16(_mm512_castph_si512(a.packet[0]), _mm512_castph_si512(a.packet[1]));
+  t1 = _mm512_unpackhi_epi16(_mm512_castph_si512(a.packet[0]), _mm512_castph_si512(a.packet[1]));
+  t2 = _mm512_unpacklo_epi16(_mm512_castph_si512(a.packet[2]), _mm512_castph_si512(a.packet[3]));
+  t3 = _mm512_unpackhi_epi16(_mm512_castph_si512(a.packet[2]), _mm512_castph_si512(a.packet[3]));
+
+  p0 = _mm512_unpacklo_epi32(t0, t2);
+  p1 = _mm512_unpackhi_epi32(t0, t2);
+  p2 = _mm512_unpacklo_epi32(t1, t3);
+  p3 = _mm512_unpackhi_epi32(t1, t3);
+
+  a0 = p0;
+  a1 = p1;
+  a2 = p2;
+  a3 = p3;
+
+  a0 = _mm512_inserti32x4(a0, _mm512_extracti32x4_epi32(p1, 0), 1);
+  a1 = _mm512_inserti32x4(a1, _mm512_extracti32x4_epi32(p0, 1), 0);
+
+  a0 = _mm512_inserti32x4(a0, _mm512_extracti32x4_epi32(p2, 0), 2);
+  a2 = _mm512_inserti32x4(a2, _mm512_extracti32x4_epi32(p0, 2), 0);
+
+  a0 = _mm512_inserti32x4(a0, _mm512_extracti32x4_epi32(p3, 0), 3);
+  a3 = _mm512_inserti32x4(a3, _mm512_extracti32x4_epi32(p0, 3), 0);
+
+  a1 = _mm512_inserti32x4(a1, _mm512_extracti32x4_epi32(p2, 1), 2);
+  a2 = _mm512_inserti32x4(a2, _mm512_extracti32x4_epi32(p1, 2), 1);
+
+  a2 = _mm512_inserti32x4(a2, _mm512_extracti32x4_epi32(p3, 2), 3);
+  a3 = _mm512_inserti32x4(a3, _mm512_extracti32x4_epi32(p2, 3), 2);
+
+  a1 = _mm512_inserti32x4(a1, _mm512_extracti32x4_epi32(p3, 1), 3);
+  a3 = _mm512_inserti32x4(a3, _mm512_extracti32x4_epi32(p1, 3), 1);
+
+  a.packet[0] = _mm512_castsi512_ph(a0);
+  a.packet[1] = _mm512_castsi512_ph(a1);
+  a.packet[2] = _mm512_castsi512_ph(a2);
+  a.packet[3] = _mm512_castsi512_ph(a3);
+}
+
+// preverse
+
+template <>
+EIGEN_STRONG_INLINE Packet32h preverse(const Packet32h& a) {
+  return _mm512_permutexvar_ph(_mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+                                                20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31),
+                               a);
+}
+
+// pscatter
+
+template <>
+EIGEN_STRONG_INLINE void pscatter<half, Packet32h>(half* to, const Packet32h& from, Index stride) {
+  EIGEN_ALIGN64 half aux[32];
+  pstore(aux, from);
+
+  EIGEN_UNROLL_LOOP
+  for (int i = 0; i < 32; i++) {
+    to[stride * i] = aux[i];
+  }
+}
+
+// pgather
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pgather<Eigen::half, Packet32h>(const Eigen::half* from, Index stride) {
+  return _mm512_castsi512_ph(_mm512_set_epi16(
+      from[31 * stride].x, from[30 * stride].x, from[29 * stride].x, from[28 * stride].x, from[27 * stride].x,
+      from[26 * stride].x, from[25 * stride].x, from[24 * stride].x, from[23 * stride].x, from[22 * stride].x,
+      from[21 * stride].x, from[20 * stride].x, from[19 * stride].x, from[18 * stride].x, from[17 * stride].x,
+      from[16 * stride].x, from[15 * stride].x, from[14 * stride].x, from[13 * stride].x, from[12 * stride].x,
+      from[11 * stride].x, from[10 * stride].x, from[9 * stride].x, from[8 * stride].x, from[7 * stride].x,
+      from[6 * stride].x, from[5 * stride].x, from[4 * stride].x, from[3 * stride].x, from[2 * stride].x,
+      from[1 * stride].x, from[0 * stride].x));
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet16h pcos<Packet16h>(const Packet16h&);
+template <>
+EIGEN_STRONG_INLINE Packet16h psin<Packet16h>(const Packet16h&);
+template <>
+EIGEN_STRONG_INLINE Packet16h plog<Packet16h>(const Packet16h&);
+template <>
+EIGEN_STRONG_INLINE Packet16h plog2<Packet16h>(const Packet16h&);
+template <>
+EIGEN_STRONG_INLINE Packet16h plog1p<Packet16h>(const Packet16h&);
+template <>
+EIGEN_STRONG_INLINE Packet16h pexp<Packet16h>(const Packet16h&);
+template <>
+EIGEN_STRONG_INLINE Packet16h pexpm1<Packet16h>(const Packet16h&);
+template <>
+EIGEN_STRONG_INLINE Packet16h ptanh<Packet16h>(const Packet16h&);
+template <>
+EIGEN_STRONG_INLINE Packet16h pfrexp<Packet16h>(const Packet16h&, Packet16h&);
+template <>
+EIGEN_STRONG_INLINE Packet16h pldexp<Packet16h>(const Packet16h&, const Packet16h&);
+
+EIGEN_STRONG_INLINE Packet32h combine2Packet16h(const Packet16h& a, const Packet16h& b) {
+  __m512d result = _mm512_undefined_pd();
+  result = _mm512_insertf64x4(result, _mm256_castsi256_pd(a), 0);
+  result = _mm512_insertf64x4(result, _mm256_castsi256_pd(b), 1);
+  return _mm512_castpd_ph(result);
+}
+
+EIGEN_STRONG_INLINE void extract2Packet16h(const Packet32h& x, Packet16h& a, Packet16h& b) {
+  a = _mm256_castpd_si256(_mm512_extractf64x4_pd(_mm512_castph_pd(x), 0));
+  b = _mm256_castpd_si256(_mm512_extractf64x4_pd(_mm512_castph_pd(x), 1));
+}
+
+// psin
+template <>
+EIGEN_STRONG_INLINE Packet32h psin<Packet32h>(const Packet32h& a) {
+  Packet16h low;
+  Packet16h high;
+  extract2Packet16h(a, low, high);
+
+  Packet16h lowOut = psin(low);
+  Packet16h highOut = psin(high);
+
+  return combine2Packet16h(lowOut, highOut);
+}
+
+// pcos
+template <>
+EIGEN_STRONG_INLINE Packet32h pcos<Packet32h>(const Packet32h& a) {
+  Packet16h low;
+  Packet16h high;
+  extract2Packet16h(a, low, high);
+
+  Packet16h lowOut = pcos(low);
+  Packet16h highOut = pcos(high);
+
+  return combine2Packet16h(lowOut, highOut);
+}
+
+// plog
+template <>
+EIGEN_STRONG_INLINE Packet32h plog<Packet32h>(const Packet32h& a) {
+  Packet16h low;
+  Packet16h high;
+  extract2Packet16h(a, low, high);
+
+  Packet16h lowOut = plog(low);
+  Packet16h highOut = plog(high);
+
+  return combine2Packet16h(lowOut, highOut);
+}
+
+// plog2
+template <>
+EIGEN_STRONG_INLINE Packet32h plog2<Packet32h>(const Packet32h& a) {
+  Packet16h low;
+  Packet16h high;
+  extract2Packet16h(a, low, high);
+
+  Packet16h lowOut = plog2(low);
+  Packet16h highOut = plog2(high);
+
+  return combine2Packet16h(lowOut, highOut);
+}
+
+// plog1p
+template <>
+EIGEN_STRONG_INLINE Packet32h plog1p<Packet32h>(const Packet32h& a) {
+  Packet16h low;
+  Packet16h high;
+  extract2Packet16h(a, low, high);
+
+  Packet16h lowOut = plog1p(low);
+  Packet16h highOut = plog1p(high);
+
+  return combine2Packet16h(lowOut, highOut);
+}
+
+// pexp
+template <>
+EIGEN_STRONG_INLINE Packet32h pexp<Packet32h>(const Packet32h& a) {
+  Packet16h low;
+  Packet16h high;
+  extract2Packet16h(a, low, high);
+
+  Packet16h lowOut = pexp(low);
+  Packet16h highOut = pexp(high);
+
+  return combine2Packet16h(lowOut, highOut);
+}
+
+// pexpm1
+template <>
+EIGEN_STRONG_INLINE Packet32h pexpm1<Packet32h>(const Packet32h& a) {
+  Packet16h low;
+  Packet16h high;
+  extract2Packet16h(a, low, high);
+
+  Packet16h lowOut = pexpm1(low);
+  Packet16h highOut = pexpm1(high);
+
+  return combine2Packet16h(lowOut, highOut);
+}
+
+// ptanh
+template <>
+EIGEN_STRONG_INLINE Packet32h ptanh<Packet32h>(const Packet32h& a) {
+  Packet16h low;
+  Packet16h high;
+  extract2Packet16h(a, low, high);
+
+  Packet16h lowOut = ptanh(low);
+  Packet16h highOut = ptanh(high);
+
+  return combine2Packet16h(lowOut, highOut);
+}
+
+// pfrexp
+template <>
+EIGEN_STRONG_INLINE Packet32h pfrexp<Packet32h>(const Packet32h& a, Packet32h& exponent) {
+  Packet16h low;
+  Packet16h high;
+  extract2Packet16h(a, low, high);
+
+  Packet16h exp1 = _mm256_undefined_si256();
+  Packet16h exp2 = _mm256_undefined_si256();
+
+  Packet16h lowOut = pfrexp(low, exp1);
+  Packet16h highOut = pfrexp(high, exp2);
+
+  exponent = combine2Packet16h(exp1, exp2);
+
+  return combine2Packet16h(lowOut, highOut);
+}
+
+// pldexp
+template <>
+EIGEN_STRONG_INLINE Packet32h pldexp<Packet32h>(const Packet32h& a, const Packet32h& exponent) {
+  Packet16h low;
+  Packet16h high;
+  extract2Packet16h(a, low, high);
+
+  Packet16h exp1;
+  Packet16h exp2;
+  extract2Packet16h(exponent, exp1, exp2);
+
+  Packet16h lowOut = pldexp(low, exp1);
+  Packet16h highOut = pldexp(high, exp2);
+
+  return combine2Packet16h(lowOut, highOut);
+}
+
+}  // end namespace internal
+}  // end namespace Eigen
+
+#endif  // EIGEN_PACKET_MATH_FP16_AVX512_H
\ No newline at end of file
diff --git a/libs/eigen/Eigen/src/Core/arch/AVX512/TrsmKernel.h b/libs/eigen/Eigen/src/Core/arch/AVX512/TrsmKernel.h
new file mode 100644
index 0000000..edd6ef3
--- /dev/null
+++ b/libs/eigen/Eigen/src/Core/arch/AVX512/TrsmKernel.h
@@ -0,0 +1,1185 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2022 Intel Corporation
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_CORE_ARCH_AVX512_TRSM_KERNEL_H
+#define EIGEN_CORE_ARCH_AVX512_TRSM_KERNEL_H
+
+#include "../../InternalHeaderCheck.h"
+
+#if !defined(EIGEN_USE_AVX512_TRSM_KERNELS)
+#define EIGEN_USE_AVX512_TRSM_KERNELS 1
+#endif
+
+#if EIGEN_USE_AVX512_TRSM_KERNELS
+#if !defined(EIGEN_USE_AVX512_TRSM_R_KERNELS)
+#define EIGEN_USE_AVX512_TRSM_R_KERNELS 1
+#endif
+#if !defined(EIGEN_USE_AVX512_TRSM_L_KERNELS)
+#define EIGEN_USE_AVX512_TRSM_L_KERNELS 1
+#endif
+#else  // EIGEN_USE_AVX512_TRSM_KERNELS == 0
+#define EIGEN_USE_AVX512_TRSM_R_KERNELS 0
+#define EIGEN_USE_AVX512_TRSM_L_KERNELS 0
+#endif
+
+// Need this for some std::min calls.
+#ifdef min
+#undef min
+#endif
+
+namespace Eigen {
+namespace internal {
+
+#define EIGEN_AVX_MAX_NUM_ACC (int64_t(24))
+#define EIGEN_AVX_MAX_NUM_ROW (int64_t(8))  // Denoted L in code.
+#define EIGEN_AVX_MAX_K_UNROL (int64_t(4))
+#define EIGEN_AVX_B_LOAD_SETS (int64_t(2))
+#define EIGEN_AVX_MAX_A_BCAST (int64_t(2))
+typedef Packet16f vecFullFloat;
+typedef Packet8d vecFullDouble;
+typedef Packet8f vecHalfFloat;
+typedef Packet4d vecHalfDouble;
+
+// Compile-time unrolls are implemented here.
+// Note: this depends on macros and typedefs above.
+#include "TrsmUnrolls.inc"
+
+#if (EIGEN_USE_AVX512_TRSM_KERNELS) && (EIGEN_COMP_CLANG != 0)
+/**
+ * For smaller problem sizes, and certain compilers, using the optimized kernels trsmKernelL/R directly
+ * is faster than the packed versions in TriangularSolverMatrix.h.
+ *
+ * The current heuristic is based on having having all arrays used in the largest gemm-update
+ * in triSolve fit in roughly L2Cap (percentage) of the L2 cache. These cutoffs are a bit conservative and could be
+ * larger for some trsm cases.
+ * The formula:
+ *
+ *   (L*M + M*N + L*N)*sizeof(Scalar) < L2Cache*L2Cap
+ *
+ *  L = number of rows to solve at a time
+ *  N = number of rhs
+ *  M = Dimension of triangular matrix
+ *
+ */
+#if !defined(EIGEN_ENABLE_AVX512_NOCOPY_TRSM_CUTOFFS)
+#define EIGEN_ENABLE_AVX512_NOCOPY_TRSM_CUTOFFS 1
+#endif
+
+#if EIGEN_ENABLE_AVX512_NOCOPY_TRSM_CUTOFFS
+
+#if EIGEN_USE_AVX512_TRSM_R_KERNELS
+#if !defined(EIGEN_ENABLE_AVX512_NOCOPY_TRSM_R_CUTOFFS)
+#define EIGEN_ENABLE_AVX512_NOCOPY_TRSM_R_CUTOFFS 1
+#endif  // !defined(EIGEN_ENABLE_AVX512_NOCOPY_TRSM_R_CUTOFFS)
+#endif
+
+#if EIGEN_USE_AVX512_TRSM_L_KERNELS
+#if !defined(EIGEN_ENABLE_AVX512_NOCOPY_TRSM_L_CUTOFFS)
+#define EIGEN_ENABLE_AVX512_NOCOPY_TRSM_L_CUTOFFS 1
+#endif
+#endif  // EIGEN_USE_AVX512_TRSM_L_KERNELS
+
+#else  // EIGEN_ENABLE_AVX512_NOCOPY_TRSM_CUTOFFS == 0
+#define EIGEN_ENABLE_AVX512_NOCOPY_TRSM_R_CUTOFFS 0
+#define EIGEN_ENABLE_AVX512_NOCOPY_TRSM_L_CUTOFFS 0
+#endif  // EIGEN_ENABLE_AVX512_NOCOPY_TRSM_CUTOFFS
+
+template <typename Scalar>
+int64_t avx512_trsm_cutoff(int64_t L2Size, int64_t N, double L2Cap) {
+  const int64_t U3 = 3 * packet_traits<Scalar>::size;
+  const int64_t MaxNb = 5 * U3;
+  int64_t Nb = std::min(MaxNb, N);
+  double cutoff_d =
+      (((L2Size * L2Cap) / (sizeof(Scalar))) - (EIGEN_AVX_MAX_NUM_ROW)*Nb) / ((EIGEN_AVX_MAX_NUM_ROW) + Nb);
+  int64_t cutoff_l = static_cast<int64_t>(cutoff_d);
+  return (cutoff_l / EIGEN_AVX_MAX_NUM_ROW) * EIGEN_AVX_MAX_NUM_ROW;
+}
+#else // !(EIGEN_USE_AVX512_TRSM_KERNELS) || !(EIGEN_COMP_CLANG != 0)
+#define EIGEN_ENABLE_AVX512_NOCOPY_TRSM_CUTOFFS 0
+#define EIGEN_ENABLE_AVX512_NOCOPY_TRSM_R_CUTOFFS 0
+#define EIGEN_ENABLE_AVX512_NOCOPY_TRSM_L_CUTOFFS 0
+#endif
+
+/**
+ * Used by gemmKernel for the case A/B row-major and C col-major.
+ */
+template <typename Scalar, typename vec, int64_t unrollM, int64_t unrollN, bool remM, bool remN>
+static EIGEN_ALWAYS_INLINE void transStoreC(PacketBlock<vec, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS> &zmm,
+                                            Scalar *C_arr, int64_t LDC, int64_t remM_ = 0, int64_t remN_ = 0) {
+  EIGEN_UNUSED_VARIABLE(remN_);
+  EIGEN_UNUSED_VARIABLE(remM_);
+  using urolls = unrolls::trans<Scalar>;
+
+  constexpr int64_t U3 = urolls::PacketSize * 3;
+  constexpr int64_t U2 = urolls::PacketSize * 2;
+  constexpr int64_t U1 = urolls::PacketSize * 1;
+
+  static_assert(unrollN == U1 || unrollN == U2 || unrollN == U3, "unrollN should be a multiple of PacketSize");
+  static_assert(unrollM == EIGEN_AVX_MAX_NUM_ROW, "unrollM should be equal to EIGEN_AVX_MAX_NUM_ROW");
+
+  urolls::template transpose<unrollN, 0>(zmm);
+  EIGEN_IF_CONSTEXPR(unrollN > U2) urolls::template transpose<unrollN, 2>(zmm);
+  EIGEN_IF_CONSTEXPR(unrollN > U1) urolls::template transpose<unrollN, 1>(zmm);
+
+  static_assert((remN && unrollN == U1) || !remN, "When handling N remainder set unrollN=U1");
+  EIGEN_IF_CONSTEXPR(!remN) {
+    urolls::template storeC<std::min(unrollN, U1), unrollN, 0, remM>(C_arr, LDC, zmm, remM_);
+    EIGEN_IF_CONSTEXPR(unrollN > U1) {
+      constexpr int64_t unrollN_ = std::min(unrollN - U1, U1);
+      urolls::template storeC<unrollN_, unrollN, 1, remM>(C_arr + U1 * LDC, LDC, zmm, remM_);
+    }
+    EIGEN_IF_CONSTEXPR(unrollN > U2) {
+      constexpr int64_t unrollN_ = std::min(unrollN - U2, U1);
+      urolls::template storeC<unrollN_, unrollN, 2, remM>(C_arr + U2 * LDC, LDC, zmm, remM_);
+    }
+  }
+  else {
+    EIGEN_IF_CONSTEXPR((std::is_same<Scalar, float>::value)) {
+      // Note: without "if constexpr" this section of code will also be
+      // parsed by the compiler so each of the storeC will still be instantiated.
+      // We use enable_if in aux_storeC to set it to an empty function for
+      // these cases.
+      if (remN_ == 15)
+        urolls::template storeC<15, unrollN, 0, remM>(C_arr, LDC, zmm, remM_);
+      else if (remN_ == 14)
+        urolls::template storeC<14, unrollN, 0, remM>(C_arr, LDC, zmm, remM_);
+      else if (remN_ == 13)
+        urolls::template storeC<13, unrollN, 0, remM>(C_arr, LDC, zmm, remM_);
+      else if (remN_ == 12)
+        urolls::template storeC<12, unrollN, 0, remM>(C_arr, LDC, zmm, remM_);
+      else if (remN_ == 11)
+        urolls::template storeC<11, unrollN, 0, remM>(C_arr, LDC, zmm, remM_);
+      else if (remN_ == 10)
+        urolls::template storeC<10, unrollN, 0, remM>(C_arr, LDC, zmm, remM_);
+      else if (remN_ == 9)
+        urolls::template storeC<9, unrollN, 0, remM>(C_arr, LDC, zmm, remM_);
+      else if (remN_ == 8)
+        urolls::template storeC<8, unrollN, 0, remM>(C_arr, LDC, zmm, remM_);
+      else if (remN_ == 7)
+        urolls::template storeC<7, unrollN, 0, remM>(C_arr, LDC, zmm, remM_);
+      else if (remN_ == 6)
+        urolls::template storeC<6, unrollN, 0, remM>(C_arr, LDC, zmm, remM_);
+      else if (remN_ == 5)
+        urolls::template storeC<5, unrollN, 0, remM>(C_arr, LDC, zmm, remM_);
+      else if (remN_ == 4)
+        urolls::template storeC<4, unrollN, 0, remM>(C_arr, LDC, zmm, remM_);
+      else if (remN_ == 3)
+        urolls::template storeC<3, unrollN, 0, remM>(C_arr, LDC, zmm, remM_);
+      else if (remN_ == 2)
+        urolls::template storeC<2, unrollN, 0, remM>(C_arr, LDC, zmm, remM_);
+      else if (remN_ == 1)
+        urolls::template storeC<1, unrollN, 0, remM>(C_arr, LDC, zmm, remM_);
+    }
+    else {
+      if (remN_ == 7)
+        urolls::template storeC<7, unrollN, 0, remM>(C_arr, LDC, zmm, remM_);
+      else if (remN_ == 6)
+        urolls::template storeC<6, unrollN, 0, remM>(C_arr, LDC, zmm, remM_);
+      else if (remN_ == 5)
+        urolls::template storeC<5, unrollN, 0, remM>(C_arr, LDC, zmm, remM_);
+      else if (remN_ == 4)
+        urolls::template storeC<4, unrollN, 0, remM>(C_arr, LDC, zmm, remM_);
+      else if (remN_ == 3)
+        urolls::template storeC<3, unrollN, 0, remM>(C_arr, LDC, zmm, remM_);
+      else if (remN_ == 2)
+        urolls::template storeC<2, unrollN, 0, remM>(C_arr, LDC, zmm, remM_);
+      else if (remN_ == 1)
+        urolls::template storeC<1, unrollN, 0, remM>(C_arr, LDC, zmm, remM_);
+    }
+  }
+}
+
+/**
+ * GEMM like operation for trsm panel updates.
+ * Computes: C -= A*B
+ * K must be multipe of 4.
+ *
+ * Unrolls used are {1,2,4,8}x{U1,U2,U3};
+ * For good performance we want K to be large with M/N relatively small, but also large enough
+ * to use the {8,U3} unroll block.
+ *
+ * isARowMajor: is A_arr row-major?
+ * isCRowMajor: is C_arr row-major? (B_arr is assumed to be row-major).
+ * isAdd: C += A*B or C -= A*B (used by trsm)
+ * handleKRem: Handle arbitrary K? This is not needed for trsm.
+ */
+template <typename Scalar, bool isARowMajor, bool isCRowMajor, bool isAdd, bool handleKRem>
+void gemmKernel(Scalar *A_arr, Scalar *B_arr, Scalar *C_arr, int64_t M, int64_t N, int64_t K, int64_t LDA, int64_t LDB,
+                int64_t LDC) {
+  using urolls = unrolls::gemm<Scalar, isAdd>;
+  constexpr int64_t U3 = urolls::PacketSize * 3;
+  constexpr int64_t U2 = urolls::PacketSize * 2;
+  constexpr int64_t U1 = urolls::PacketSize * 1;
+  using vec = typename std::conditional<std::is_same<Scalar, float>::value, vecFullFloat, vecFullDouble>::type;
+  int64_t N_ = (N / U3) * U3;
+  int64_t M_ = (M / EIGEN_AVX_MAX_NUM_ROW) * EIGEN_AVX_MAX_NUM_ROW;
+  int64_t K_ = (K / EIGEN_AVX_MAX_K_UNROL) * EIGEN_AVX_MAX_K_UNROL;
+  int64_t j = 0;
+  for (; j < N_; j += U3) {
+    constexpr int64_t EIGEN_AVX_MAX_B_LOAD = EIGEN_AVX_B_LOAD_SETS * 3;
+    int64_t i = 0;
+    for (; i < M_; i += EIGEN_AVX_MAX_NUM_ROW) {
+      Scalar *A_t = &A_arr[idA<isARowMajor>(i, 0, LDA)], *B_t = &B_arr[0 * LDB + j];
+      PacketBlock<vec, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS> zmm;
+      urolls::template setzero<3, EIGEN_AVX_MAX_NUM_ROW>(zmm);
+      for (int64_t k = 0; k < K_; k += EIGEN_AVX_MAX_K_UNROL) {
+        urolls::template microKernel<isARowMajor, 3, EIGEN_AVX_MAX_NUM_ROW, EIGEN_AVX_MAX_K_UNROL, EIGEN_AVX_MAX_B_LOAD,
+                                     EIGEN_AVX_MAX_A_BCAST>(B_t, A_t, LDB, LDA, zmm);
+        B_t += EIGEN_AVX_MAX_K_UNROL * LDB;
+        EIGEN_IF_CONSTEXPR(isARowMajor) A_t += EIGEN_AVX_MAX_K_UNROL;
+        else A_t += EIGEN_AVX_MAX_K_UNROL * LDA;
+      }
+      EIGEN_IF_CONSTEXPR(handleKRem) {
+        for (int64_t k = K_; k < K; k++) {
+          urolls::template microKernel<isARowMajor, 3, EIGEN_AVX_MAX_NUM_ROW, 1, EIGEN_AVX_B_LOAD_SETS * 3,
+                                       EIGEN_AVX_MAX_A_BCAST>(B_t, A_t, LDB, LDA, zmm);
+          B_t += LDB;
+          EIGEN_IF_CONSTEXPR(isARowMajor) A_t++;
+          else A_t += LDA;
+        }
+      }
+      EIGEN_IF_CONSTEXPR(isCRowMajor) {
+        urolls::template updateC<3, EIGEN_AVX_MAX_NUM_ROW>(&C_arr[i * LDC + j], LDC, zmm);
+        urolls::template storeC<3, EIGEN_AVX_MAX_NUM_ROW>(&C_arr[i * LDC + j], LDC, zmm);
+      }
+      else {
+        transStoreC<Scalar, vec, EIGEN_AVX_MAX_NUM_ROW, U3, false, false>(zmm, &C_arr[i + j * LDC], LDC);
+      }
+    }
+    if (M - i >= 4) {  // Note: this block assumes EIGEN_AVX_MAX_NUM_ROW = 8. Should be removed otherwise
+      Scalar *A_t = &A_arr[idA<isARowMajor>(i, 0, LDA)];
+      Scalar *B_t = &B_arr[0 * LDB + j];
+      PacketBlock<vec, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS> zmm;
+      urolls::template setzero<3, 4>(zmm);
+      for (int64_t k = 0; k < K_; k += EIGEN_AVX_MAX_K_UNROL) {
+        urolls::template microKernel<isARowMajor, 3, 4, EIGEN_AVX_MAX_K_UNROL, EIGEN_AVX_B_LOAD_SETS * 3,
+                                     EIGEN_AVX_MAX_A_BCAST>(B_t, A_t, LDB, LDA, zmm);
+        B_t += EIGEN_AVX_MAX_K_UNROL * LDB;
+        EIGEN_IF_CONSTEXPR(isARowMajor) A_t += EIGEN_AVX_MAX_K_UNROL;
+        else A_t += EIGEN_AVX_MAX_K_UNROL * LDA;
+      }
+      EIGEN_IF_CONSTEXPR(handleKRem) {
+        for (int64_t k = K_; k < K; k++) {
+          urolls::template microKernel<isARowMajor, 3, 4, 1, EIGEN_AVX_B_LOAD_SETS * 3, EIGEN_AVX_MAX_A_BCAST>(
+              B_t, A_t, LDB, LDA, zmm);
+          B_t += LDB;
+          EIGEN_IF_CONSTEXPR(isARowMajor) A_t++;
+          else A_t += LDA;
+        }
+      }
+      EIGEN_IF_CONSTEXPR(isCRowMajor) {
+        urolls::template updateC<3, 4>(&C_arr[i * LDC + j], LDC, zmm);
+        urolls::template storeC<3, 4>(&C_arr[i * LDC + j], LDC, zmm);
+      }
+      else {
+        transStoreC<Scalar, vec, EIGEN_AVX_MAX_NUM_ROW, U3, true, false>(zmm, &C_arr[i + j * LDC], LDC, 4);
+      }
+      i += 4;
+    }
+    if (M - i >= 2) {
+      Scalar *A_t = &A_arr[idA<isARowMajor>(i, 0, LDA)];
+      Scalar *B_t = &B_arr[0 * LDB + j];
+      PacketBlock<vec, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS> zmm;
+      urolls::template setzero<3, 2>(zmm);
+      for (int64_t k = 0; k < K_; k += EIGEN_AVX_MAX_K_UNROL) {
+        urolls::template microKernel<isARowMajor, 3, 2, EIGEN_AVX_MAX_K_UNROL, EIGEN_AVX_B_LOAD_SETS * 3,
+                                     EIGEN_AVX_MAX_A_BCAST>(B_t, A_t, LDB, LDA, zmm);
+        B_t += EIGEN_AVX_MAX_K_UNROL * LDB;
+        EIGEN_IF_CONSTEXPR(isARowMajor) A_t += EIGEN_AVX_MAX_K_UNROL;
+        else A_t += EIGEN_AVX_MAX_K_UNROL * LDA;
+      }
+      EIGEN_IF_CONSTEXPR(handleKRem) {
+        for (int64_t k = K_; k < K; k++) {
+          urolls::template microKernel<isARowMajor, 3, 2, 1, EIGEN_AVX_B_LOAD_SETS * 3, EIGEN_AVX_MAX_A_BCAST>(
+              B_t, A_t, LDB, LDA, zmm);
+          B_t += LDB;
+          EIGEN_IF_CONSTEXPR(isARowMajor) A_t++;
+          else A_t += LDA;
+        }
+      }
+      EIGEN_IF_CONSTEXPR(isCRowMajor) {
+        urolls::template updateC<3, 2>(&C_arr[i * LDC + j], LDC, zmm);
+        urolls::template storeC<3, 2>(&C_arr[i * LDC + j], LDC, zmm);
+      }
+      else {
+        transStoreC<Scalar, vec, EIGEN_AVX_MAX_NUM_ROW, U3, true, false>(zmm, &C_arr[i + j * LDC], LDC, 2);
+      }
+      i += 2;
+    }
+    if (M - i > 0) {
+      Scalar *A_t = &A_arr[idA<isARowMajor>(i, 0, LDA)];
+      Scalar *B_t = &B_arr[0 * LDB + j];
+      PacketBlock<vec, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS> zmm;
+      urolls::template setzero<3, 1>(zmm);
+      {
+        for (int64_t k = 0; k < K_; k += EIGEN_AVX_MAX_K_UNROL) {
+          urolls::template microKernel<isARowMajor, 3, 1, EIGEN_AVX_MAX_K_UNROL, EIGEN_AVX_B_LOAD_SETS * 3, 1>(
+              B_t, A_t, LDB, LDA, zmm);
+          B_t += EIGEN_AVX_MAX_K_UNROL * LDB;
+          EIGEN_IF_CONSTEXPR(isARowMajor) A_t += EIGEN_AVX_MAX_K_UNROL;
+          else A_t += EIGEN_AVX_MAX_K_UNROL * LDA;
+        }
+        EIGEN_IF_CONSTEXPR(handleKRem) {
+          for (int64_t k = K_; k < K; k++) {
+            urolls::template microKernel<isARowMajor, 3, 1, 1, EIGEN_AVX_B_LOAD_SETS * 3, 1>(B_t, A_t, LDB, LDA, zmm);
+            B_t += LDB;
+            EIGEN_IF_CONSTEXPR(isARowMajor) A_t++;
+            else A_t += LDA;
+          }
+        }
+        EIGEN_IF_CONSTEXPR(isCRowMajor) {
+          urolls::template updateC<3, 1>(&C_arr[i * LDC + j], LDC, zmm);
+          urolls::template storeC<3, 1>(&C_arr[i * LDC + j], LDC, zmm);
+        }
+        else {
+          transStoreC<Scalar, vec, EIGEN_AVX_MAX_NUM_ROW, U3, true, false>(zmm, &C_arr[i + j * LDC], LDC, 1);
+        }
+      }
+    }
+  }
+  if (N - j >= U2) {
+    constexpr int64_t EIGEN_AVX_MAX_B_LOAD = EIGEN_AVX_B_LOAD_SETS * 2;
+    int64_t i = 0;
+    for (; i < M_; i += EIGEN_AVX_MAX_NUM_ROW) {
+      Scalar *A_t = &A_arr[idA<isARowMajor>(i, 0, LDA)], *B_t = &B_arr[0 * LDB + j];
+      EIGEN_IF_CONSTEXPR(isCRowMajor) B_t = &B_arr[0 * LDB + j];
+      PacketBlock<vec, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS> zmm;
+      urolls::template setzero<2, EIGEN_AVX_MAX_NUM_ROW>(zmm);
+      for (int64_t k = 0; k < K_; k += EIGEN_AVX_MAX_K_UNROL) {
+        urolls::template microKernel<isARowMajor, 2, EIGEN_AVX_MAX_NUM_ROW, EIGEN_AVX_MAX_K_UNROL, EIGEN_AVX_MAX_B_LOAD,
+                                     EIGEN_AVX_MAX_A_BCAST>(B_t, A_t, LDB, LDA, zmm);
+        B_t += EIGEN_AVX_MAX_K_UNROL * LDB;
+        EIGEN_IF_CONSTEXPR(isARowMajor) A_t += EIGEN_AVX_MAX_K_UNROL;
+        else A_t += EIGEN_AVX_MAX_K_UNROL * LDA;
+      }
+      EIGEN_IF_CONSTEXPR(handleKRem) {
+        for (int64_t k = K_; k < K; k++) {
+          urolls::template microKernel<isARowMajor, 2, EIGEN_AVX_MAX_NUM_ROW, 1, EIGEN_AVX_MAX_B_LOAD,
+                                       EIGEN_AVX_MAX_A_BCAST>(B_t, A_t, LDB, LDA, zmm);
+          B_t += LDB;
+          EIGEN_IF_CONSTEXPR(isARowMajor) A_t++;
+          else A_t += LDA;
+        }
+      }
+      EIGEN_IF_CONSTEXPR(isCRowMajor) {
+        urolls::template updateC<2, EIGEN_AVX_MAX_NUM_ROW>(&C_arr[i * LDC + j], LDC, zmm);
+        urolls::template storeC<2, EIGEN_AVX_MAX_NUM_ROW>(&C_arr[i * LDC + j], LDC, zmm);
+      }
+      else {
+        transStoreC<Scalar, vec, EIGEN_AVX_MAX_NUM_ROW, U2, false, false>(zmm, &C_arr[i + j * LDC], LDC);
+      }
+    }
+    if (M - i >= 4) {  // Note: this block assumes EIGEN_AVX_MAX_NUM_ROW = 8. Should be removed otherwise
+      Scalar *A_t = &A_arr[idA<isARowMajor>(i, 0, LDA)];
+      Scalar *B_t = &B_arr[0 * LDB + j];
+      PacketBlock<vec, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS> zmm;
+      urolls::template setzero<2, 4>(zmm);
+      for (int64_t k = 0; k < K_; k += EIGEN_AVX_MAX_K_UNROL) {
+        urolls::template microKernel<isARowMajor, 2, 4, EIGEN_AVX_MAX_K_UNROL, EIGEN_AVX_MAX_B_LOAD,
+                                     EIGEN_AVX_MAX_A_BCAST>(B_t, A_t, LDB, LDA, zmm);
+        B_t += EIGEN_AVX_MAX_K_UNROL * LDB;
+        EIGEN_IF_CONSTEXPR(isARowMajor) A_t += EIGEN_AVX_MAX_K_UNROL;
+        else A_t += EIGEN_AVX_MAX_K_UNROL * LDA;
+      }
+      EIGEN_IF_CONSTEXPR(handleKRem) {
+        for (int64_t k = K_; k < K; k++) {
+          urolls::template microKernel<isARowMajor, 2, 4, 1, EIGEN_AVX_MAX_B_LOAD, EIGEN_AVX_MAX_A_BCAST>(B_t, A_t, LDB,
+                                                                                                          LDA, zmm);
+          B_t += LDB;
+          EIGEN_IF_CONSTEXPR(isARowMajor) A_t++;
+          else A_t += LDA;
+        }
+      }
+      EIGEN_IF_CONSTEXPR(isCRowMajor) {
+        urolls::template updateC<2, 4>(&C_arr[i * LDC + j], LDC, zmm);
+        urolls::template storeC<2, 4>(&C_arr[i * LDC + j], LDC, zmm);
+      }
+      else {
+        transStoreC<Scalar, vec, EIGEN_AVX_MAX_NUM_ROW, U2, true, false>(zmm, &C_arr[i + j * LDC], LDC, 4);
+      }
+      i += 4;
+    }
+    if (M - i >= 2) {
+      Scalar *A_t = &A_arr[idA<isARowMajor>(i, 0, LDA)];
+      Scalar *B_t = &B_arr[0 * LDB + j];
+      PacketBlock<vec, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS> zmm;
+      urolls::template setzero<2, 2>(zmm);
+      for (int64_t k = 0; k < K_; k += EIGEN_AVX_MAX_K_UNROL) {
+        urolls::template microKernel<isARowMajor, 2, 2, EIGEN_AVX_MAX_K_UNROL, EIGEN_AVX_MAX_B_LOAD,
+                                     EIGEN_AVX_MAX_A_BCAST>(B_t, A_t, LDB, LDA, zmm);
+        B_t += EIGEN_AVX_MAX_K_UNROL * LDB;
+        EIGEN_IF_CONSTEXPR(isARowMajor) A_t += EIGEN_AVX_MAX_K_UNROL;
+        else A_t += EIGEN_AVX_MAX_K_UNROL * LDA;
+      }
+      EIGEN_IF_CONSTEXPR(handleKRem) {
+        for (int64_t k = K_; k < K; k++) {
+          urolls::template microKernel<isARowMajor, 2, 2, 1, EIGEN_AVX_MAX_B_LOAD, EIGEN_AVX_MAX_A_BCAST>(B_t, A_t, LDB,
+                                                                                                          LDA, zmm);
+          B_t += LDB;
+          EIGEN_IF_CONSTEXPR(isARowMajor) A_t++;
+          else A_t += LDA;
+        }
+      }
+      EIGEN_IF_CONSTEXPR(isCRowMajor) {
+        urolls::template updateC<2, 2>(&C_arr[i * LDC + j], LDC, zmm);
+        urolls::template storeC<2, 2>(&C_arr[i * LDC + j], LDC, zmm);
+      }
+      else {
+        transStoreC<Scalar, vec, EIGEN_AVX_MAX_NUM_ROW, U2, true, false>(zmm, &C_arr[i + j * LDC], LDC, 2);
+      }
+      i += 2;
+    }
+    if (M - i > 0) {
+      Scalar *A_t = &A_arr[idA<isARowMajor>(i, 0, LDA)];
+      Scalar *B_t = &B_arr[0 * LDB + j];
+      PacketBlock<vec, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS> zmm;
+      urolls::template setzero<2, 1>(zmm);
+      for (int64_t k = 0; k < K_; k += EIGEN_AVX_MAX_K_UNROL) {
+        urolls::template microKernel<isARowMajor, 2, 1, EIGEN_AVX_MAX_K_UNROL, EIGEN_AVX_MAX_B_LOAD, 1>(B_t, A_t, LDB,
+                                                                                                        LDA, zmm);
+        B_t += EIGEN_AVX_MAX_K_UNROL * LDB;
+        EIGEN_IF_CONSTEXPR(isARowMajor) A_t += EIGEN_AVX_MAX_K_UNROL;
+        else A_t += EIGEN_AVX_MAX_K_UNROL * LDA;
+      }
+      EIGEN_IF_CONSTEXPR(handleKRem) {
+        for (int64_t k = K_; k < K; k++) {
+          urolls::template microKernel<isARowMajor, 2, 1, 1, EIGEN_AVX_MAX_B_LOAD, 1>(B_t, A_t, LDB, LDA, zmm);
+          B_t += LDB;
+          EIGEN_IF_CONSTEXPR(isARowMajor) A_t++;
+          else A_t += LDA;
+        }
+      }
+      EIGEN_IF_CONSTEXPR(isCRowMajor) {
+        urolls::template updateC<2, 1>(&C_arr[i * LDC + j], LDC, zmm);
+        urolls::template storeC<2, 1>(&C_arr[i * LDC + j], LDC, zmm);
+      }
+      else {
+        transStoreC<Scalar, vec, EIGEN_AVX_MAX_NUM_ROW, U2, true, false>(zmm, &C_arr[i + j * LDC], LDC, 1);
+      }
+    }
+    j += U2;
+  }
+  if (N - j >= U1) {
+    constexpr int64_t EIGEN_AVX_MAX_B_LOAD = EIGEN_AVX_B_LOAD_SETS * 1;
+    int64_t i = 0;
+    for (; i < M_; i += EIGEN_AVX_MAX_NUM_ROW) {
+      Scalar *A_t = &A_arr[idA<isARowMajor>(i, 0, LDA)], *B_t = &B_arr[0 * LDB + j];
+      PacketBlock<vec, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS> zmm;
+      urolls::template setzero<1, EIGEN_AVX_MAX_NUM_ROW>(zmm);
+      for (int64_t k = 0; k < K_; k += EIGEN_AVX_MAX_K_UNROL) {
+        urolls::template microKernel<isARowMajor, 1, EIGEN_AVX_MAX_NUM_ROW, EIGEN_AVX_MAX_K_UNROL, EIGEN_AVX_MAX_B_LOAD,
+                                     EIGEN_AVX_MAX_A_BCAST>(B_t, A_t, LDB, LDA, zmm);
+        B_t += EIGEN_AVX_MAX_K_UNROL * LDB;
+        EIGEN_IF_CONSTEXPR(isARowMajor) A_t += EIGEN_AVX_MAX_K_UNROL;
+        else A_t += EIGEN_AVX_MAX_K_UNROL * LDA;
+      }
+      EIGEN_IF_CONSTEXPR(handleKRem) {
+        for (int64_t k = K_; k < K; k++) {
+          urolls::template microKernel<isARowMajor, 1, EIGEN_AVX_MAX_NUM_ROW, 1, EIGEN_AVX_B_LOAD_SETS * 1,
+                                       EIGEN_AVX_MAX_A_BCAST>(B_t, A_t, LDB, LDA, zmm);
+          B_t += LDB;
+          EIGEN_IF_CONSTEXPR(isARowMajor) A_t++;
+          else A_t += LDA;
+        }
+      }
+      EIGEN_IF_CONSTEXPR(isCRowMajor) {
+        urolls::template updateC<1, EIGEN_AVX_MAX_NUM_ROW>(&C_arr[i * LDC + j], LDC, zmm);
+        urolls::template storeC<1, EIGEN_AVX_MAX_NUM_ROW>(&C_arr[i * LDC + j], LDC, zmm);
+      }
+      else {
+        transStoreC<Scalar, vec, EIGEN_AVX_MAX_NUM_ROW, U1, false, false>(zmm, &C_arr[i + j * LDC], LDC);
+      }
+    }
+    if (M - i >= 4) {  // Note: this block assumes EIGEN_AVX_MAX_NUM_ROW = 8. Should be removed otherwise
+      Scalar *A_t = &A_arr[idA<isARowMajor>(i, 0, LDA)];
+      Scalar *B_t = &B_arr[0 * LDB + j];
+      PacketBlock<vec, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS> zmm;
+      urolls::template setzero<1, 4>(zmm);
+      for (int64_t k = 0; k < K_; k += EIGEN_AVX_MAX_K_UNROL) {
+        urolls::template microKernel<isARowMajor, 1, 4, EIGEN_AVX_MAX_K_UNROL, EIGEN_AVX_MAX_B_LOAD,
+                                     EIGEN_AVX_MAX_A_BCAST>(B_t, A_t, LDB, LDA, zmm);
+        B_t += EIGEN_AVX_MAX_K_UNROL * LDB;
+        EIGEN_IF_CONSTEXPR(isARowMajor) A_t += EIGEN_AVX_MAX_K_UNROL;
+        else A_t += EIGEN_AVX_MAX_K_UNROL * LDA;
+      }
+      EIGEN_IF_CONSTEXPR(handleKRem) {
+        for (int64_t k = K_; k < K; k++) {
+          urolls::template microKernel<isARowMajor, 1, 4, 1, EIGEN_AVX_MAX_B_LOAD, EIGEN_AVX_MAX_A_BCAST>(B_t, A_t, LDB,
+                                                                                                          LDA, zmm);
+          B_t += LDB;
+          EIGEN_IF_CONSTEXPR(isARowMajor) A_t++;
+          else A_t += LDA;
+        }
+      }
+      EIGEN_IF_CONSTEXPR(isCRowMajor) {
+        urolls::template updateC<1, 4>(&C_arr[i * LDC + j], LDC, zmm);
+        urolls::template storeC<1, 4>(&C_arr[i * LDC + j], LDC, zmm);
+      }
+      else {
+        transStoreC<Scalar, vec, EIGEN_AVX_MAX_NUM_ROW, U1, true, false>(zmm, &C_arr[i + j * LDC], LDC, 4);
+      }
+      i += 4;
+    }
+    if (M - i >= 2) {
+      Scalar *A_t = &A_arr[idA<isARowMajor>(i, 0, LDA)];
+      Scalar *B_t = &B_arr[0 * LDB + j];
+      PacketBlock<vec, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS> zmm;
+      urolls::template setzero<1, 2>(zmm);
+      for (int64_t k = 0; k < K_; k += EIGEN_AVX_MAX_K_UNROL) {
+        urolls::template microKernel<isARowMajor, 1, 2, EIGEN_AVX_MAX_K_UNROL, EIGEN_AVX_MAX_B_LOAD,
+                                     EIGEN_AVX_MAX_A_BCAST>(B_t, A_t, LDB, LDA, zmm);
+        B_t += EIGEN_AVX_MAX_K_UNROL * LDB;
+        EIGEN_IF_CONSTEXPR(isARowMajor) A_t += EIGEN_AVX_MAX_K_UNROL;
+        else A_t += EIGEN_AVX_MAX_K_UNROL * LDA;
+      }
+      EIGEN_IF_CONSTEXPR(handleKRem) {
+        for (int64_t k = K_; k < K; k++) {
+          urolls::template microKernel<isARowMajor, 1, 2, 1, EIGEN_AVX_MAX_B_LOAD, EIGEN_AVX_MAX_A_BCAST>(B_t, A_t, LDB,
+                                                                                                          LDA, zmm);
+          B_t += LDB;
+          EIGEN_IF_CONSTEXPR(isARowMajor) A_t++;
+          else A_t += LDA;
+        }
+      }
+      EIGEN_IF_CONSTEXPR(isCRowMajor) {
+        urolls::template updateC<1, 2>(&C_arr[i * LDC + j], LDC, zmm);
+        urolls::template storeC<1, 2>(&C_arr[i * LDC + j], LDC, zmm);
+      }
+      else {
+        transStoreC<Scalar, vec, EIGEN_AVX_MAX_NUM_ROW, U1, true, false>(zmm, &C_arr[i + j * LDC], LDC, 2);
+      }
+      i += 2;
+    }
+    if (M - i > 0) {
+      Scalar *A_t = &A_arr[idA<isARowMajor>(i, 0, LDA)];
+      Scalar *B_t = &B_arr[0 * LDB + j];
+      PacketBlock<vec, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS> zmm;
+      urolls::template setzero<1, 1>(zmm);
+      {
+        for (int64_t k = 0; k < K_; k += EIGEN_AVX_MAX_K_UNROL) {
+          urolls::template microKernel<isARowMajor, 1, 1, EIGEN_AVX_MAX_K_UNROL, EIGEN_AVX_MAX_B_LOAD, 1>(B_t, A_t, LDB,
+                                                                                                          LDA, zmm);
+          B_t += EIGEN_AVX_MAX_K_UNROL * LDB;
+          EIGEN_IF_CONSTEXPR(isARowMajor) A_t += EIGEN_AVX_MAX_K_UNROL;
+          else A_t += EIGEN_AVX_MAX_K_UNROL * LDA;
+        }
+        EIGEN_IF_CONSTEXPR(handleKRem) {
+          for (int64_t k = K_; k < K; k++) {
+            urolls::template microKernel<isARowMajor, 1, 1, 1, EIGEN_AVX_B_LOAD_SETS * 1, 1>(B_t, A_t, LDB, LDA, zmm);
+            B_t += LDB;
+            EIGEN_IF_CONSTEXPR(isARowMajor) A_t++;
+            else A_t += LDA;
+          }
+        }
+        EIGEN_IF_CONSTEXPR(isCRowMajor) {
+          urolls::template updateC<1, 1>(&C_arr[i * LDC + j], LDC, zmm);
+          urolls::template storeC<1, 1>(&C_arr[i * LDC + j], LDC, zmm);
+        }
+        else {
+          transStoreC<Scalar, vec, EIGEN_AVX_MAX_NUM_ROW, U1, true, false>(zmm, &C_arr[i + j * LDC], LDC, 1);
+        }
+      }
+    }
+    j += U1;
+  }
+  if (N - j > 0) {
+    constexpr int64_t EIGEN_AVX_MAX_B_LOAD = EIGEN_AVX_B_LOAD_SETS * 1;
+    int64_t i = 0;
+    for (; i < M_; i += EIGEN_AVX_MAX_NUM_ROW) {
+      Scalar *A_t = &A_arr[idA<isARowMajor>(i, 0, LDA)];
+      Scalar *B_t = &B_arr[0 * LDB + j];
+      PacketBlock<vec, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS> zmm;
+      urolls::template setzero<1, EIGEN_AVX_MAX_NUM_ROW>(zmm);
+      for (int64_t k = 0; k < K_; k += EIGEN_AVX_MAX_K_UNROL) {
+        urolls::template microKernel<isARowMajor, 1, EIGEN_AVX_MAX_NUM_ROW, EIGEN_AVX_MAX_K_UNROL, EIGEN_AVX_MAX_B_LOAD,
+                                     EIGEN_AVX_MAX_A_BCAST, true>(B_t, A_t, LDB, LDA, zmm, N - j);
+        B_t += EIGEN_AVX_MAX_K_UNROL * LDB;
+        EIGEN_IF_CONSTEXPR(isARowMajor) A_t += EIGEN_AVX_MAX_K_UNROL;
+        else A_t += EIGEN_AVX_MAX_K_UNROL * LDA;
+      }
+      EIGEN_IF_CONSTEXPR(handleKRem) {
+        for (int64_t k = K_; k < K; k++) {
+          urolls::template microKernel<isARowMajor, 1, EIGEN_AVX_MAX_NUM_ROW, 1, EIGEN_AVX_MAX_B_LOAD,
+                                       EIGEN_AVX_MAX_A_BCAST, true>(B_t, A_t, LDB, LDA, zmm, N - j);
+          B_t += LDB;
+          EIGEN_IF_CONSTEXPR(isARowMajor) A_t++;
+          else A_t += LDA;
+        }
+      }
+      EIGEN_IF_CONSTEXPR(isCRowMajor) {
+        urolls::template updateC<1, EIGEN_AVX_MAX_NUM_ROW, true>(&C_arr[i * LDC + j], LDC, zmm, N - j);
+        urolls::template storeC<1, EIGEN_AVX_MAX_NUM_ROW, true>(&C_arr[i * LDC + j], LDC, zmm, N - j);
+      }
+      else {
+        transStoreC<Scalar, vec, EIGEN_AVX_MAX_NUM_ROW, U1, false, true>(zmm, &C_arr[i + j * LDC], LDC, 0, N - j);
+      }
+    }
+    if (M - i >= 4) {  // Note: this block assumes EIGEN_AVX_MAX_NUM_ROW = 8. Should be removed otherwise
+      Scalar *A_t = &A_arr[idA<isARowMajor>(i, 0, LDA)];
+      Scalar *B_t = &B_arr[0 * LDB + j];
+      PacketBlock<vec, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS> zmm;
+      urolls::template setzero<1, 4>(zmm);
+      for (int64_t k = 0; k < K_; k += EIGEN_AVX_MAX_K_UNROL) {
+        urolls::template microKernel<isARowMajor, 1, 4, EIGEN_AVX_MAX_K_UNROL, EIGEN_AVX_MAX_B_LOAD,
+                                     EIGEN_AVX_MAX_A_BCAST, true>(B_t, A_t, LDB, LDA, zmm, N - j);
+        B_t += EIGEN_AVX_MAX_K_UNROL * LDB;
+        EIGEN_IF_CONSTEXPR(isARowMajor) A_t += EIGEN_AVX_MAX_K_UNROL;
+        else A_t += EIGEN_AVX_MAX_K_UNROL * LDA;
+      }
+      EIGEN_IF_CONSTEXPR(handleKRem) {
+        for (int64_t k = K_; k < K; k++) {
+          urolls::template microKernel<isARowMajor, 1, 4, 1, EIGEN_AVX_MAX_B_LOAD, EIGEN_AVX_MAX_A_BCAST, true>(
+              B_t, A_t, LDB, LDA, zmm, N - j);
+          B_t += LDB;
+          EIGEN_IF_CONSTEXPR(isARowMajor) A_t++;
+          else A_t += LDA;
+        }
+      }
+      EIGEN_IF_CONSTEXPR(isCRowMajor) {
+        urolls::template updateC<1, 4, true>(&C_arr[i * LDC + j], LDC, zmm, N - j);
+        urolls::template storeC<1, 4, true>(&C_arr[i * LDC + j], LDC, zmm, N - j);
+      }
+      else {
+        transStoreC<Scalar, vec, EIGEN_AVX_MAX_NUM_ROW, U1, true, true>(zmm, &C_arr[i + j * LDC], LDC, 4, N - j);
+      }
+      i += 4;
+    }
+    if (M - i >= 2) {
+      Scalar *A_t = &A_arr[idA<isARowMajor>(i, 0, LDA)];
+      Scalar *B_t = &B_arr[0 * LDB + j];
+      PacketBlock<vec, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS> zmm;
+      urolls::template setzero<1, 2>(zmm);
+      for (int64_t k = 0; k < K_; k += EIGEN_AVX_MAX_K_UNROL) {
+        urolls::template microKernel<isARowMajor, 1, 2, EIGEN_AVX_MAX_K_UNROL, EIGEN_AVX_MAX_B_LOAD,
+                                     EIGEN_AVX_MAX_A_BCAST, true>(B_t, A_t, LDB, LDA, zmm, N - j);
+        B_t += EIGEN_AVX_MAX_K_UNROL * LDB;
+        EIGEN_IF_CONSTEXPR(isARowMajor) A_t += EIGEN_AVX_MAX_K_UNROL;
+        else A_t += EIGEN_AVX_MAX_K_UNROL * LDA;
+      }
+      EIGEN_IF_CONSTEXPR(handleKRem) {
+        for (int64_t k = K_; k < K; k++) {
+          urolls::template microKernel<isARowMajor, 1, 2, 1, EIGEN_AVX_MAX_B_LOAD, EIGEN_AVX_MAX_A_BCAST, true>(
+              B_t, A_t, LDB, LDA, zmm, N - j);
+          B_t += LDB;
+          EIGEN_IF_CONSTEXPR(isARowMajor) A_t++;
+          else A_t += LDA;
+        }
+      }
+      EIGEN_IF_CONSTEXPR(isCRowMajor) {
+        urolls::template updateC<1, 2, true>(&C_arr[i * LDC + j], LDC, zmm, N - j);
+        urolls::template storeC<1, 2, true>(&C_arr[i * LDC + j], LDC, zmm, N - j);
+      }
+      else {
+        transStoreC<Scalar, vec, EIGEN_AVX_MAX_NUM_ROW, U1, true, true>(zmm, &C_arr[i + j * LDC], LDC, 2, N - j);
+      }
+      i += 2;
+    }
+    if (M - i > 0) {
+      Scalar *A_t = &A_arr[idA<isARowMajor>(i, 0, LDA)];
+      Scalar *B_t = &B_arr[0 * LDB + j];
+      PacketBlock<vec, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS> zmm;
+      urolls::template setzero<1, 1>(zmm);
+      for (int64_t k = 0; k < K_; k += EIGEN_AVX_MAX_K_UNROL) {
+        urolls::template microKernel<isARowMajor, 1, 1, EIGEN_AVX_MAX_K_UNROL, EIGEN_AVX_MAX_B_LOAD, 1, true>(
+            B_t, A_t, LDB, LDA, zmm, N - j);
+        B_t += EIGEN_AVX_MAX_K_UNROL * LDB;
+        EIGEN_IF_CONSTEXPR(isARowMajor) A_t += EIGEN_AVX_MAX_K_UNROL;
+        else A_t += EIGEN_AVX_MAX_K_UNROL * LDA;
+      }
+      EIGEN_IF_CONSTEXPR(handleKRem) {
+        for (int64_t k = K_; k < K; k++) {
+          urolls::template microKernel<isARowMajor, 1, 1, 1, EIGEN_AVX_MAX_B_LOAD, 1, true>(B_t, A_t, LDB, LDA, zmm,
+                                                                                            N - j);
+          B_t += LDB;
+          EIGEN_IF_CONSTEXPR(isARowMajor) A_t++;
+          else A_t += LDA;
+        }
+      }
+      EIGEN_IF_CONSTEXPR(isCRowMajor) {
+        urolls::template updateC<1, 1, true>(&C_arr[i * LDC + j], LDC, zmm, N - j);
+        urolls::template storeC<1, 1, true>(&C_arr[i * LDC + j], LDC, zmm, N - j);
+      }
+      else {
+        transStoreC<Scalar, vec, EIGEN_AVX_MAX_NUM_ROW, U1, true, true>(zmm, &C_arr[i + j * LDC], LDC, 1, N - j);
+      }
+    }
+  }
+}
+
+/**
+ * Triangular solve kernel with A on left with K number of rhs. dim(A) = unrollM
+ *
+ * unrollM: dimension of A matrix (triangular matrix). unrollM should be <= EIGEN_AVX_MAX_NUM_ROW
+ * isFWDSolve: is forward solve?
+ * isUnitDiag: is the diagonal of A all ones?
+ * The B matrix (RHS) is assumed to be row-major
+ */
+template <typename Scalar, typename vec, int64_t unrollM, bool isARowMajor, bool isFWDSolve, bool isUnitDiag>
+static EIGEN_ALWAYS_INLINE void triSolveKernel(Scalar *A_arr, Scalar *B_arr, int64_t K, int64_t LDA, int64_t LDB) {
+  static_assert(unrollM <= EIGEN_AVX_MAX_NUM_ROW, "unrollM should be equal to EIGEN_AVX_MAX_NUM_ROW");
+  using urolls = unrolls::trsm<Scalar>;
+  constexpr int64_t U3 = urolls::PacketSize * 3;
+  constexpr int64_t U2 = urolls::PacketSize * 2;
+  constexpr int64_t U1 = urolls::PacketSize * 1;
+
+  PacketBlock<vec, EIGEN_AVX_MAX_NUM_ACC> RHSInPacket;
+  PacketBlock<vec, EIGEN_AVX_MAX_NUM_ROW> AInPacket;
+
+  int64_t k = 0;
+  while (K - k >= U3) {
+    urolls::template loadRHS<isFWDSolve, unrollM, 3>(B_arr + k, LDB, RHSInPacket);
+    urolls::template triSolveMicroKernel<isARowMajor, isFWDSolve, isUnitDiag, unrollM, 3>(A_arr, LDA, RHSInPacket,
+                                                                                          AInPacket);
+    urolls::template storeRHS<isFWDSolve, unrollM, 3>(B_arr + k, LDB, RHSInPacket);
+    k += U3;
+  }
+  if (K - k >= U2) {
+    urolls::template loadRHS<isFWDSolve, unrollM, 2>(B_arr + k, LDB, RHSInPacket);
+    urolls::template triSolveMicroKernel<isARowMajor, isFWDSolve, isUnitDiag, unrollM, 2>(A_arr, LDA, RHSInPacket,
+                                                                                          AInPacket);
+    urolls::template storeRHS<isFWDSolve, unrollM, 2>(B_arr + k, LDB, RHSInPacket);
+    k += U2;
+  }
+  if (K - k >= U1) {
+    urolls::template loadRHS<isFWDSolve, unrollM, 1>(B_arr + k, LDB, RHSInPacket);
+    urolls::template triSolveMicroKernel<isARowMajor, isFWDSolve, isUnitDiag, unrollM, 1>(A_arr, LDA, RHSInPacket,
+                                                                                          AInPacket);
+    urolls::template storeRHS<isFWDSolve, unrollM, 1>(B_arr + k, LDB, RHSInPacket);
+    k += U1;
+  }
+  if (K - k > 0) {
+    // Handle remaining number of RHS
+    urolls::template loadRHS<isFWDSolve, unrollM, 1, true>(B_arr + k, LDB, RHSInPacket, K - k);
+    urolls::template triSolveMicroKernel<isARowMajor, isFWDSolve, isUnitDiag, unrollM, 1>(A_arr, LDA, RHSInPacket,
+                                                                                          AInPacket);
+    urolls::template storeRHS<isFWDSolve, unrollM, 1, true>(B_arr + k, LDB, RHSInPacket, K - k);
+  }
+}
+
+/**
+ * Triangular solve routine with A on left and dimension of at most L with K number of rhs. This is essentially
+ * a wrapper for triSolveMicrokernel for M = {1,2,3,4,5,6,7,8}.
+ *
+ * isFWDSolve: is forward solve?
+ * isUnitDiag: is the diagonal of A all ones?
+ * The B matrix (RHS) is assumed to be row-major
+ */
+template <typename Scalar, bool isARowMajor, bool isFWDSolve, bool isUnitDiag>
+void triSolveKernelLxK(Scalar *A_arr, Scalar *B_arr, int64_t M, int64_t K, int64_t LDA, int64_t LDB) {
+  // Note: this assumes EIGEN_AVX_MAX_NUM_ROW = 8. Unrolls should be adjusted
+  // accordingly if EIGEN_AVX_MAX_NUM_ROW is smaller.
+  using vec = typename std::conditional<std::is_same<Scalar, float>::value, vecFullFloat, vecFullDouble>::type;
+  if (M == 8)
+    triSolveKernel<Scalar, vec, 8, isARowMajor, isFWDSolve, isUnitDiag>(A_arr, B_arr, K, LDA, LDB);
+  else if (M == 7)
+    triSolveKernel<Scalar, vec, 7, isARowMajor, isFWDSolve, isUnitDiag>(A_arr, B_arr, K, LDA, LDB);
+  else if (M == 6)
+    triSolveKernel<Scalar, vec, 6, isARowMajor, isFWDSolve, isUnitDiag>(A_arr, B_arr, K, LDA, LDB);
+  else if (M == 5)
+    triSolveKernel<Scalar, vec, 5, isARowMajor, isFWDSolve, isUnitDiag>(A_arr, B_arr, K, LDA, LDB);
+  else if (M == 4)
+    triSolveKernel<Scalar, vec, 4, isARowMajor, isFWDSolve, isUnitDiag>(A_arr, B_arr, K, LDA, LDB);
+  else if (M == 3)
+    triSolveKernel<Scalar, vec, 3, isARowMajor, isFWDSolve, isUnitDiag>(A_arr, B_arr, K, LDA, LDB);
+  else if (M == 2)
+    triSolveKernel<Scalar, vec, 2, isARowMajor, isFWDSolve, isUnitDiag>(A_arr, B_arr, K, LDA, LDB);
+  else if (M == 1)
+    triSolveKernel<Scalar, vec, 1, isARowMajor, isFWDSolve, isUnitDiag>(A_arr, B_arr, K, LDA, LDB);
+  return;
+}
+
+/**
+ * This routine is used to copy B to/from a temporary array (row-major) for cases where B is column-major.
+ *
+ * toTemp: true => copy to temporary array, false => copy from temporary array
+ * remM: true = need to handle remainder values for M (M < EIGEN_AVX_MAX_NUM_ROW)
+ *
+ */
+template <typename Scalar, bool toTemp = true, bool remM = false>
+static EIGEN_ALWAYS_INLINE void copyBToRowMajor(Scalar *B_arr, int64_t LDB, int64_t K, Scalar *B_temp, int64_t LDB_,
+                                                int64_t remM_ = 0) {
+  EIGEN_UNUSED_VARIABLE(remM_);
+  using urolls = unrolls::transB<Scalar>;
+  using vecHalf = typename std::conditional<std::is_same<Scalar, float>::value, vecHalfFloat, vecFullDouble>::type;
+  PacketBlock<vecHalf, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS> ymm;
+  constexpr int64_t U3 = urolls::PacketSize * 3;
+  constexpr int64_t U2 = urolls::PacketSize * 2;
+  constexpr int64_t U1 = urolls::PacketSize * 1;
+  int64_t K_ = K / U3 * U3;
+  int64_t k = 0;
+
+  for (; k < K_; k += U3) {
+    urolls::template transB_kernel<U3, toTemp, remM>(B_arr + k * LDB, LDB, B_temp, LDB_, ymm, remM_);
+    B_temp += U3;
+  }
+  if (K - k >= U2) {
+    urolls::template transB_kernel<U2, toTemp, remM>(B_arr + k * LDB, LDB, B_temp, LDB_, ymm, remM_);
+    B_temp += U2;
+    k += U2;
+  }
+  if (K - k >= U1) {
+    urolls::template transB_kernel<U1, toTemp, remM>(B_arr + k * LDB, LDB, B_temp, LDB_, ymm, remM_);
+    B_temp += U1;
+    k += U1;
+  }
+  EIGEN_IF_CONSTEXPR(U1 > 8) {
+    // Note: without "if constexpr" this section of code will also be
+    // parsed by the compiler so there is an additional check in {load/store}BBlock
+    // to make sure the counter is not non-negative.
+    if (K - k >= 8) {
+      urolls::template transB_kernel<8, toTemp, remM>(B_arr + k * LDB, LDB, B_temp, LDB_, ymm, remM_);
+      B_temp += 8;
+      k += 8;
+    }
+  }
+  EIGEN_IF_CONSTEXPR(U1 > 4) {
+    // Note: without "if constexpr" this section of code will also be
+    // parsed by the compiler so there is an additional check in {load/store}BBlock
+    // to make sure the counter is not non-negative.
+    if (K - k >= 4) {
+      urolls::template transB_kernel<4, toTemp, remM>(B_arr + k * LDB, LDB, B_temp, LDB_, ymm, remM_);
+      B_temp += 4;
+      k += 4;
+    }
+  }
+  if (K - k >= 2) {
+    urolls::template transB_kernel<2, toTemp, remM>(B_arr + k * LDB, LDB, B_temp, LDB_, ymm, remM_);
+    B_temp += 2;
+    k += 2;
+  }
+  if (K - k >= 1) {
+    urolls::template transB_kernel<1, toTemp, remM>(B_arr + k * LDB, LDB, B_temp, LDB_, ymm, remM_);
+    B_temp += 1;
+    k += 1;
+  }
+}
+
+#if (EIGEN_USE_AVX512_TRSM_L_KERNELS) && defined(EIGEN_NO_MALLOC)
+/**
+ * Reduce blocking sizes so that the size of the temporary workspace needed is less than "limit" bytes,
+ *  - kB must be at least psize
+ *  - numM must be at least EIGEN_AVX_MAX_NUM_ROW
+ */
+template <typename Scalar, bool isBRowMajor>
+constexpr std::pair<int64_t, int64_t> trsmBlocking(const int64_t limit) {
+  constexpr int64_t psize = packet_traits<Scalar>::size;
+  int64_t kB = 15 * psize;
+  int64_t numM = 8 * EIGEN_AVX_MAX_NUM_ROW;
+  // If B is rowmajor, no temp workspace needed, so use default blocking sizes.
+  if (isBRowMajor) return {kB, numM};
+
+  // Very simple heuristic, prefer keeping kB as large as possible to fully use vector registers.
+  for (int64_t k = kB; k > psize; k -= psize) {
+    for (int64_t m = numM; m > EIGEN_AVX_MAX_NUM_ROW; m -= EIGEN_AVX_MAX_NUM_ROW) {
+      if ((((k + psize - 1) / psize + 4) * psize) * m * sizeof(Scalar) < limit) {
+        return {k, m};
+      }
+    }
+  }
+  return {psize, EIGEN_AVX_MAX_NUM_ROW};  // Minimum blocking size required
+}
+#endif  // (EIGEN_USE_AVX512_TRSM_L_KERNELS) && defined(EIGEN_NO_MALLOC)
+
+/**
+ * Main triangular solve driver
+ *
+ * Triangular solve with A on the left.
+ * Scalar:    Scalar precision, only float/double is supported.
+ * isARowMajor:  is A row-major?
+ * isBRowMajor:  is B row-major?
+ * isFWDSolve:   is this forward solve or backward (true => forward)?
+ * isUnitDiag: is diagonal of A unit or nonunit (true => A has unit diagonal)?
+ *
+ * M: dimension of A
+ * numRHS: number of right hand sides (coincides with K dimension for gemm updates)
+ *
+ * Here are the mapping between the different TRSM cases (col-major) and triSolve:
+ *
+ * LLN (left , lower, A non-transposed) ::  isARowMajor=false, isBRowMajor=false, isFWDSolve=true
+ * LUT (left , upper, A transposed)     ::  isARowMajor=true,  isBRowMajor=false, isFWDSolve=true
+ * LUN (left , upper, A non-transposed) ::  isARowMajor=false, isBRowMajor=false, isFWDSolve=false
+ * LLT (left , lower, A transposed)     ::  isARowMajor=true,  isBRowMajor=false, isFWDSolve=false
+ * RUN (right, upper, A non-transposed) ::  isARowMajor=true,  isBRowMajor=true,  isFWDSolve=true
+ * RLT (right, lower, A transposed)     ::  isARowMajor=false, isBRowMajor=true,  isFWDSolve=true
+ * RUT (right, upper, A transposed)     ::  isARowMajor=false, isBRowMajor=true,  isFWDSolve=false
+ * RLN (right, lower, A non-transposed) ::  isARowMajor=true,  isBRowMajor=true,  isFWDSolve=false
+ *
+ * Note: For RXX cases M,numRHS should be swapped.
+ *
+ */
+template <typename Scalar, bool isARowMajor = true, bool isBRowMajor = true, bool isFWDSolve = true,
+          bool isUnitDiag = false>
+void triSolve(Scalar *A_arr, Scalar *B_arr, int64_t M, int64_t numRHS, int64_t LDA, int64_t LDB) {
+  constexpr int64_t psize = packet_traits<Scalar>::size;
+  /**
+   * The values for kB, numM were determined experimentally.
+   * kB: Number of RHS we process at a time.
+   * numM: number of rows of B we will store in a temporary array (see below.) This should be a multiple of L.
+   *
+   * kB was determined by initially setting kB = numRHS and benchmarking triSolve (TRSM-RUN case)
+   * performance with M=numRHS.
+   * It was observed that performance started to drop around M=numRHS=240. This is likely machine dependent.
+   *
+   * numM was chosen "arbitrarily". It should be relatively small so B_temp is not too large, but it should be
+   * large enough to allow GEMM updates to have larger "K"s (see below.) No benchmarking has been done so far to
+   * determine optimal values for numM.
+   */
+#if (EIGEN_USE_AVX512_TRSM_L_KERNELS) && defined(EIGEN_NO_MALLOC)
+  /**
+   * If EIGEN_NO_MALLOC is requested, we try to reduce kB and numM so the maximum temp workspace required is less
+   * than EIGEN_STACK_ALLOCATION_LIMIT. Actual workspace size may be less, depending on the number of vectors to
+   * solve.
+   *  - kB must be at least psize
+   *  - numM must be at least EIGEN_AVX_MAX_NUM_ROW
+   *
+   * If B is row-major, the blocking sizes are not reduced (no temp workspace needed).
+   */
+  constexpr std::pair<int64_t, int64_t> blocking_ = trsmBlocking<Scalar, isBRowMajor>(EIGEN_STACK_ALLOCATION_LIMIT);
+  constexpr int64_t kB = blocking_.first;
+  constexpr int64_t numM = blocking_.second;
+  /**
+   * If the temp workspace size exceeds EIGEN_STACK_ALLOCATION_LIMIT even with the minimum blocking sizes,
+   * we throw an assertion. Use -DEIGEN_USE_AVX512_TRSM_L_KERNELS=0 if necessary
+   */
+  static_assert(!(((((kB + psize - 1) / psize + 4) * psize) * numM * sizeof(Scalar) >= EIGEN_STACK_ALLOCATION_LIMIT) &&
+                  !isBRowMajor),
+                "Temp workspace required is too large.");
+#else
+  constexpr int64_t kB = (3 * psize) * 5;  // 5*U3
+  constexpr int64_t numM = 8 * EIGEN_AVX_MAX_NUM_ROW;
+#endif
+
+  int64_t sizeBTemp = 0;
+  Scalar *B_temp = NULL;
+  EIGEN_IF_CONSTEXPR(!isBRowMajor) {
+    /**
+     * If B is col-major, we copy it to a fixed-size temporary array of size at most ~numM*kB and
+     * transpose it to row-major. Call the solve routine, and copy+transpose it back to the original array.
+     * The updated row-major copy of B is reused in the GEMM updates.
+     */
+    sizeBTemp = (((std::min(kB, numRHS) + psize - 1) / psize + 4) * psize) * numM;
+  }
+
+#if !defined(EIGEN_NO_MALLOC)
+  EIGEN_IF_CONSTEXPR(!isBRowMajor) B_temp = (Scalar *)handmade_aligned_malloc(sizeof(Scalar) * sizeBTemp, 64);
+#elif (EIGEN_USE_AVX512_TRSM_L_KERNELS) && defined(EIGEN_NO_MALLOC)
+  // Use alloca if malloc not allowed, requested temp workspace size should be less than EIGEN_STACK_ALLOCATION_LIMIT
+  ei_declare_aligned_stack_constructed_variable(Scalar, B_temp_alloca, sizeBTemp, 0);
+  B_temp = B_temp_alloca;
+#endif
+
+  for (int64_t k = 0; k < numRHS; k += kB) {
+    int64_t bK = numRHS - k > kB ? kB : numRHS - k;
+    int64_t M_ = (M / EIGEN_AVX_MAX_NUM_ROW) * EIGEN_AVX_MAX_NUM_ROW, gemmOff = 0;
+
+    // bK rounded up to next multiple of L=EIGEN_AVX_MAX_NUM_ROW. When B_temp is used, we solve for bkL RHS
+    // instead of bK RHS in triSolveKernelLxK.
+    int64_t bkL = ((bK + (EIGEN_AVX_MAX_NUM_ROW - 1)) / EIGEN_AVX_MAX_NUM_ROW) * EIGEN_AVX_MAX_NUM_ROW;
+    const int64_t numScalarPerCache = 64 / sizeof(Scalar);
+    // Leading dimension of B_temp, will be a multiple of the cache line size.
+    int64_t LDT = ((bkL + (numScalarPerCache - 1)) / numScalarPerCache) * numScalarPerCache;
+    int64_t offsetBTemp = 0;
+    for (int64_t i = 0; i < M_; i += EIGEN_AVX_MAX_NUM_ROW) {
+      EIGEN_IF_CONSTEXPR(!isBRowMajor) {
+        int64_t indA_i = isFWDSolve ? i : M - 1 - i;
+        int64_t indB_i = isFWDSolve ? i : M - (i + EIGEN_AVX_MAX_NUM_ROW);
+        int64_t offB_1 = isFWDSolve ? offsetBTemp : sizeBTemp - EIGEN_AVX_MAX_NUM_ROW * LDT - offsetBTemp;
+        int64_t offB_2 = isFWDSolve ? offsetBTemp : sizeBTemp - LDT - offsetBTemp;
+        // Copy values from B to B_temp.
+        copyBToRowMajor<Scalar, true, false>(B_arr + indB_i + k * LDB, LDB, bK, B_temp + offB_1, LDT);
+        // Triangular solve with a small block of A and long horizontal blocks of B (or B_temp if B col-major)
+        triSolveKernelLxK<Scalar, isARowMajor, isFWDSolve, isUnitDiag>(
+            &A_arr[idA<isARowMajor>(indA_i, indA_i, LDA)], B_temp + offB_2, EIGEN_AVX_MAX_NUM_ROW, bkL, LDA, LDT);
+        // Copy values from B_temp back to B. B_temp will be reused in gemm call below.
+        copyBToRowMajor<Scalar, false, false>(B_arr + indB_i + k * LDB, LDB, bK, B_temp + offB_1, LDT);
+
+        offsetBTemp += EIGEN_AVX_MAX_NUM_ROW * LDT;
+      }
+      else {
+        int64_t ind = isFWDSolve ? i : M - 1 - i;
+        triSolveKernelLxK<Scalar, isARowMajor, isFWDSolve, isUnitDiag>(
+            &A_arr[idA<isARowMajor>(ind, ind, LDA)], B_arr + k + ind * LDB, EIGEN_AVX_MAX_NUM_ROW, bK, LDA, LDB);
+      }
+      if (i + EIGEN_AVX_MAX_NUM_ROW < M_) {
+        /**
+         * For the GEMM updates, we want "K" (K=i+8 in this case) to be large as soon as possible
+         * to reuse the accumulators in GEMM as much as possible. So we only update 8xbK blocks of
+         * B as follows:
+         *
+         *        A             B
+         *     __
+         *    |__|__           |__|
+         *    |__|__|__        |__|
+         *    |__|__|__|__     |__|
+         *    |********|__|    |**|
+         */
+        EIGEN_IF_CONSTEXPR(isBRowMajor) {
+          int64_t indA_i = isFWDSolve ? i + EIGEN_AVX_MAX_NUM_ROW : M - (i + 2 * EIGEN_AVX_MAX_NUM_ROW);
+          int64_t indA_j = isFWDSolve ? 0 : M - (i + EIGEN_AVX_MAX_NUM_ROW);
+          int64_t indB_i = isFWDSolve ? 0 : M - (i + EIGEN_AVX_MAX_NUM_ROW);
+          int64_t indB_i2 = isFWDSolve ? i + EIGEN_AVX_MAX_NUM_ROW : M - (i + 2 * EIGEN_AVX_MAX_NUM_ROW);
+          gemmKernel<Scalar, isARowMajor, isBRowMajor, false, false>(
+              &A_arr[idA<isARowMajor>(indA_i, indA_j, LDA)], B_arr + k + indB_i * LDB, B_arr + k + indB_i2 * LDB,
+              EIGEN_AVX_MAX_NUM_ROW, bK, i + EIGEN_AVX_MAX_NUM_ROW, LDA, LDB, LDB);
+        }
+        else {
+          if (offsetBTemp + EIGEN_AVX_MAX_NUM_ROW * LDT > sizeBTemp) {
+            /**
+             * Similar idea as mentioned above, but here we are limited by the number of updated values of B
+             * that can be stored (row-major) in B_temp.
+             *
+             * If there is not enough space to store the next batch of 8xbK of B in B_temp, we call GEMM
+             * update and partially update the remaining old values of B which depends on the new values
+             * of B stored in B_temp. These values are then no longer needed and can be overwritten.
+             */
+            int64_t indA_i = isFWDSolve ? i + EIGEN_AVX_MAX_NUM_ROW : 0;
+            int64_t indA_j = isFWDSolve ? gemmOff : M - (i + EIGEN_AVX_MAX_NUM_ROW);
+            int64_t indB_i = isFWDSolve ? i + EIGEN_AVX_MAX_NUM_ROW : 0;
+            int64_t offB_1 = isFWDSolve ? 0 : sizeBTemp - offsetBTemp;
+            gemmKernel<Scalar, isARowMajor, isBRowMajor, false, false>(
+                &A_arr[idA<isARowMajor>(indA_i, indA_j, LDA)], B_temp + offB_1, B_arr + indB_i + (k)*LDB,
+                M - (i + EIGEN_AVX_MAX_NUM_ROW), bK, i + EIGEN_AVX_MAX_NUM_ROW - gemmOff, LDA, LDT, LDB);
+            offsetBTemp = 0;
+            gemmOff = i + EIGEN_AVX_MAX_NUM_ROW;
+          } else {
+            /**
+             * If there is enough space in B_temp, we only update the next 8xbK values of B.
+             */
+            int64_t indA_i = isFWDSolve ? i + EIGEN_AVX_MAX_NUM_ROW : M - (i + 2 * EIGEN_AVX_MAX_NUM_ROW);
+            int64_t indA_j = isFWDSolve ? gemmOff : M - (i + EIGEN_AVX_MAX_NUM_ROW);
+            int64_t indB_i = isFWDSolve ? i + EIGEN_AVX_MAX_NUM_ROW : M - (i + 2 * EIGEN_AVX_MAX_NUM_ROW);
+            int64_t offB_1 = isFWDSolve ? 0 : sizeBTemp - offsetBTemp;
+            gemmKernel<Scalar, isARowMajor, isBRowMajor, false, false>(
+                &A_arr[idA<isARowMajor>(indA_i, indA_j, LDA)], B_temp + offB_1, B_arr + indB_i + (k)*LDB,
+                EIGEN_AVX_MAX_NUM_ROW, bK, i + EIGEN_AVX_MAX_NUM_ROW - gemmOff, LDA, LDT, LDB);
+          }
+        }
+      }
+    }
+    // Handle M remainder..
+    int64_t bM = M - M_;
+    if (bM > 0) {
+      if (M_ > 0) {
+        EIGEN_IF_CONSTEXPR(isBRowMajor) {
+          int64_t indA_i = isFWDSolve ? M_ : 0;
+          int64_t indA_j = isFWDSolve ? 0 : bM;
+          int64_t indB_i = isFWDSolve ? 0 : bM;
+          int64_t indB_i2 = isFWDSolve ? M_ : 0;
+          gemmKernel<Scalar, isARowMajor, isBRowMajor, false, false>(
+              &A_arr[idA<isARowMajor>(indA_i, indA_j, LDA)], B_arr + k + indB_i * LDB, B_arr + k + indB_i2 * LDB, bM,
+              bK, M_, LDA, LDB, LDB);
+        }
+        else {
+          int64_t indA_i = isFWDSolve ? M_ : 0;
+          int64_t indA_j = isFWDSolve ? gemmOff : bM;
+          int64_t indB_i = isFWDSolve ? M_ : 0;
+          int64_t offB_1 = isFWDSolve ? 0 : sizeBTemp - offsetBTemp;
+          gemmKernel<Scalar, isARowMajor, isBRowMajor, false, false>(&A_arr[idA<isARowMajor>(indA_i, indA_j, LDA)],
+                                                                     B_temp + offB_1, B_arr + indB_i + (k)*LDB, bM, bK,
+                                                                     M_ - gemmOff, LDA, LDT, LDB);
+        }
+      }
+      EIGEN_IF_CONSTEXPR(!isBRowMajor) {
+        int64_t indA_i = isFWDSolve ? M_ : M - 1 - M_;
+        int64_t indB_i = isFWDSolve ? M_ : 0;
+        int64_t offB_1 = isFWDSolve ? 0 : (bM - 1) * bkL;
+        copyBToRowMajor<Scalar, true, true>(B_arr + indB_i + k * LDB, LDB, bK, B_temp, bkL, bM);
+        triSolveKernelLxK<Scalar, isARowMajor, isFWDSolve, isUnitDiag>(&A_arr[idA<isARowMajor>(indA_i, indA_i, LDA)],
+                                                                       B_temp + offB_1, bM, bkL, LDA, bkL);
+        copyBToRowMajor<Scalar, false, true>(B_arr + indB_i + k * LDB, LDB, bK, B_temp, bkL, bM);
+      }
+      else {
+        int64_t ind = isFWDSolve ? M_ : M - 1 - M_;
+        triSolveKernelLxK<Scalar, isARowMajor, isFWDSolve, isUnitDiag>(&A_arr[idA<isARowMajor>(ind, ind, LDA)],
+                                                                       B_arr + k + ind * LDB, bM, bK, LDA, LDB);
+      }
+    }
+  }
+
+#if !defined(EIGEN_NO_MALLOC)
+  EIGEN_IF_CONSTEXPR(!isBRowMajor) handmade_aligned_free(B_temp);
+#endif
+}
+
+// Template specializations of trsmKernelL/R for float/double and inner strides of 1.
+#if (EIGEN_USE_AVX512_TRSM_KERNELS)
+#if (EIGEN_USE_AVX512_TRSM_R_KERNELS)
+template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride>
+struct trsmKernelR;
+
+template <typename Index, int Mode, int TriStorageOrder>
+struct trsmKernelR<float, Index, Mode, false, TriStorageOrder, 1> {
+  static void kernel(Index size, Index otherSize, const float *_tri, Index triStride, float *_other, Index otherIncr,
+                     Index otherStride);
+};
+
+template <typename Index, int Mode, int TriStorageOrder>
+struct trsmKernelR<double, Index, Mode, false, TriStorageOrder, 1> {
+  static void kernel(Index size, Index otherSize, const double *_tri, Index triStride, double *_other, Index otherIncr,
+                     Index otherStride);
+};
+
+template <typename Index, int Mode, int TriStorageOrder>
+EIGEN_DONT_INLINE void trsmKernelR<float, Index, Mode, false, TriStorageOrder, 1>::kernel(
+    Index size, Index otherSize, const float *_tri, Index triStride, float *_other, Index otherIncr,
+    Index otherStride) {
+  EIGEN_UNUSED_VARIABLE(otherIncr);
+  triSolve<float, TriStorageOrder != RowMajor, true, (Mode & Lower) != Lower, (Mode & UnitDiag) != 0>(
+      const_cast<float *>(_tri), _other, size, otherSize, triStride, otherStride);
+}
+
+template <typename Index, int Mode, int TriStorageOrder>
+EIGEN_DONT_INLINE void trsmKernelR<double, Index, Mode, false, TriStorageOrder, 1>::kernel(
+    Index size, Index otherSize, const double *_tri, Index triStride, double *_other, Index otherIncr,
+    Index otherStride) {
+  EIGEN_UNUSED_VARIABLE(otherIncr);
+  triSolve<double, TriStorageOrder != RowMajor, true, (Mode & Lower) != Lower, (Mode & UnitDiag) != 0>(
+      const_cast<double *>(_tri), _other, size, otherSize, triStride, otherStride);
+}
+#endif  // (EIGEN_USE_AVX512_TRSM_R_KERNELS)
+
+// These trsm kernels require temporary memory allocation
+#if (EIGEN_USE_AVX512_TRSM_L_KERNELS)
+template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride>
+struct trsmKernelL;
+
+template <typename Index, int Mode, int TriStorageOrder>
+struct trsmKernelL<float, Index, Mode, false, TriStorageOrder, 1> {
+  static void kernel(Index size, Index otherSize, const float *_tri, Index triStride, float *_other, Index otherIncr,
+                     Index otherStride);
+};
+
+template <typename Index, int Mode, int TriStorageOrder>
+struct trsmKernelL<double, Index, Mode, false, TriStorageOrder, 1> {
+  static void kernel(Index size, Index otherSize, const double *_tri, Index triStride, double *_other, Index otherIncr,
+                     Index otherStride);
+};
+
+template <typename Index, int Mode, int TriStorageOrder>
+EIGEN_DONT_INLINE void trsmKernelL<float, Index, Mode, false, TriStorageOrder, 1>::kernel(
+    Index size, Index otherSize, const float *_tri, Index triStride, float *_other, Index otherIncr,
+    Index otherStride) {
+  EIGEN_UNUSED_VARIABLE(otherIncr);
+  triSolve<float, TriStorageOrder == RowMajor, false, (Mode & Lower) == Lower, (Mode & UnitDiag) != 0>(
+      const_cast<float *>(_tri), _other, size, otherSize, triStride, otherStride);
+}
+
+template <typename Index, int Mode, int TriStorageOrder>
+EIGEN_DONT_INLINE void trsmKernelL<double, Index, Mode, false, TriStorageOrder, 1>::kernel(
+    Index size, Index otherSize, const double *_tri, Index triStride, double *_other, Index otherIncr,
+    Index otherStride) {
+  EIGEN_UNUSED_VARIABLE(otherIncr);
+  triSolve<double, TriStorageOrder == RowMajor, false, (Mode & Lower) == Lower, (Mode & UnitDiag) != 0>(
+      const_cast<double *>(_tri), _other, size, otherSize, triStride, otherStride);
+}
+#endif  // EIGEN_USE_AVX512_TRSM_L_KERNELS
+#endif  // EIGEN_USE_AVX512_TRSM_KERNELS
+}  // namespace internal
+}  // namespace Eigen
+#endif  // EIGEN_CORE_ARCH_AVX512_TRSM_KERNEL_H
diff --git a/libs/eigen/Eigen/src/Core/arch/AVX512/TrsmUnrolls.inc b/libs/eigen/Eigen/src/Core/arch/AVX512/TrsmUnrolls.inc
new file mode 100644
index 0000000..6b09424
--- /dev/null
+++ b/libs/eigen/Eigen/src/Core/arch/AVX512/TrsmUnrolls.inc
@@ -0,0 +1,1212 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2022 Intel Corporation
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_CORE_ARCH_AVX512_TRSM_UNROLLS_H
+#define EIGEN_CORE_ARCH_AVX512_TRSM_UNROLLS_H
+
+template <bool isARowMajor = true>
+static EIGEN_ALWAYS_INLINE int64_t idA(int64_t i, int64_t j, int64_t LDA) {
+  EIGEN_IF_CONSTEXPR(isARowMajor) return i * LDA + j;
+  else return i + j * LDA;
+}
+
+/**
+ * This namespace contains various classes used to generate compile-time unrolls which are
+ * used throughout the trsm/gemm kernels. The unrolls are characterized as for-loops (1-D), nested
+ * for-loops (2-D), or triple nested for-loops (3-D). Unrolls are generated using template recursion
+ *
+ * Example, the 2-D for-loop is unrolled recursively by first flattening to a 1-D loop.
+ *
+ * for(startI = 0; startI < endI; startI++)             for(startC = 0; startC < endI*endJ; startC++)
+ *   for(startJ = 0; startJ < endJ; startJ++)  ---->      startI = (startC)/(endJ)
+ *     func(startI,startJ)                                startJ = (startC)%(endJ)
+ *                                                        func(...)
+ *
+ * The 1-D loop can be unrolled recursively by using enable_if and defining an auxillary function
+ * with a template parameter used as a counter.
+ *
+ * template <endI, endJ, counter>
+ * std::enable_if_t<(counter <= 0)>  <---- tail case.
+ * aux_func {}
+ *
+ * template <endI, endJ, counter>
+ * std::enable_if_t<(counter > 0)>   <---- actual for-loop
+ * aux_func {
+ *   startC = endI*endJ - counter
+ *   startI = (startC)/(endJ)
+ *   startJ = (startC)%(endJ)
+ *   func(startI, startJ)
+ *   aux_func<endI, endJ, counter-1>()
+ * }
+ *
+ * Note: Additional wrapper functions are provided for aux_func which hides the counter template
+ * parameter since counter usually depends on endI, endJ, etc...
+ *
+ * Conventions:
+ * 1) endX: specifies the terminal value for the for-loop, (ex: for(startX = 0; startX < endX; startX++))
+ *
+ * 2) rem, remM, remK template parameters are used for deciding whether to use masked operations for
+ *    handling remaining tails (when sizes are not multiples of PacketSize or EIGEN_AVX_MAX_NUM_ROW)
+ */
+namespace unrolls {
+
+template <int64_t N>
+EIGEN_ALWAYS_INLINE auto remMask(int64_t m) {
+  EIGEN_IF_CONSTEXPR(N == 16) { return 0xFFFF >> (16 - m); }
+  else EIGEN_IF_CONSTEXPR(N == 8) {
+    return 0xFF >> (8 - m);
+  }
+  else EIGEN_IF_CONSTEXPR(N == 4) {
+    return 0x0F >> (4 - m);
+  }
+  return 0;
+}
+
+template <typename Packet>
+EIGEN_ALWAYS_INLINE void trans8x8blocks(PacketBlock<Packet, 8> &kernel);
+
+template <>
+EIGEN_ALWAYS_INLINE void trans8x8blocks(PacketBlock<Packet16f, 8> &kernel) {
+  __m512 T0 = _mm512_unpacklo_ps(kernel.packet[0], kernel.packet[1]);
+  __m512 T1 = _mm512_unpackhi_ps(kernel.packet[0], kernel.packet[1]);
+  __m512 T2 = _mm512_unpacklo_ps(kernel.packet[2], kernel.packet[3]);
+  __m512 T3 = _mm512_unpackhi_ps(kernel.packet[2], kernel.packet[3]);
+  __m512 T4 = _mm512_unpacklo_ps(kernel.packet[4], kernel.packet[5]);
+  __m512 T5 = _mm512_unpackhi_ps(kernel.packet[4], kernel.packet[5]);
+  __m512 T6 = _mm512_unpacklo_ps(kernel.packet[6], kernel.packet[7]);
+  __m512 T7 = _mm512_unpackhi_ps(kernel.packet[6], kernel.packet[7]);
+
+  kernel.packet[0] = _mm512_castpd_ps(_mm512_unpacklo_pd(_mm512_castps_pd(T0), _mm512_castps_pd(T2)));
+  kernel.packet[1] = _mm512_castpd_ps(_mm512_unpackhi_pd(_mm512_castps_pd(T0), _mm512_castps_pd(T2)));
+  kernel.packet[2] = _mm512_castpd_ps(_mm512_unpacklo_pd(_mm512_castps_pd(T1), _mm512_castps_pd(T3)));
+  kernel.packet[3] = _mm512_castpd_ps(_mm512_unpackhi_pd(_mm512_castps_pd(T1), _mm512_castps_pd(T3)));
+  kernel.packet[4] = _mm512_castpd_ps(_mm512_unpacklo_pd(_mm512_castps_pd(T4), _mm512_castps_pd(T6)));
+  kernel.packet[5] = _mm512_castpd_ps(_mm512_unpackhi_pd(_mm512_castps_pd(T4), _mm512_castps_pd(T6)));
+  kernel.packet[6] = _mm512_castpd_ps(_mm512_unpacklo_pd(_mm512_castps_pd(T5), _mm512_castps_pd(T7)));
+  kernel.packet[7] = _mm512_castpd_ps(_mm512_unpackhi_pd(_mm512_castps_pd(T5), _mm512_castps_pd(T7)));
+
+  T0 = _mm512_castpd_ps(_mm512_permutex_pd(_mm512_castps_pd(kernel.packet[4]), 0x4E));
+  T0 = _mm512_mask_blend_ps(0xF0F0, kernel.packet[0], T0);
+  T4 = _mm512_castpd_ps(_mm512_permutex_pd(_mm512_castps_pd(kernel.packet[0]), 0x4E));
+  T4 = _mm512_mask_blend_ps(0xF0F0, T4, kernel.packet[4]);
+  T1 = _mm512_castpd_ps(_mm512_permutex_pd(_mm512_castps_pd(kernel.packet[5]), 0x4E));
+  T1 = _mm512_mask_blend_ps(0xF0F0, kernel.packet[1], T1);
+  T5 = _mm512_castpd_ps(_mm512_permutex_pd(_mm512_castps_pd(kernel.packet[1]), 0x4E));
+  T5 = _mm512_mask_blend_ps(0xF0F0, T5, kernel.packet[5]);
+  T2 = _mm512_castpd_ps(_mm512_permutex_pd(_mm512_castps_pd(kernel.packet[6]), 0x4E));
+  T2 = _mm512_mask_blend_ps(0xF0F0, kernel.packet[2], T2);
+  T6 = _mm512_castpd_ps(_mm512_permutex_pd(_mm512_castps_pd(kernel.packet[2]), 0x4E));
+  T6 = _mm512_mask_blend_ps(0xF0F0, T6, kernel.packet[6]);
+  T3 = _mm512_castpd_ps(_mm512_permutex_pd(_mm512_castps_pd(kernel.packet[7]), 0x4E));
+  T3 = _mm512_mask_blend_ps(0xF0F0, kernel.packet[3], T3);
+  T7 = _mm512_castpd_ps(_mm512_permutex_pd(_mm512_castps_pd(kernel.packet[3]), 0x4E));
+  T7 = _mm512_mask_blend_ps(0xF0F0, T7, kernel.packet[7]);
+
+  kernel.packet[0] = T0;
+  kernel.packet[1] = T1;
+  kernel.packet[2] = T2;
+  kernel.packet[3] = T3;
+  kernel.packet[4] = T4;
+  kernel.packet[5] = T5;
+  kernel.packet[6] = T6;
+  kernel.packet[7] = T7;
+}
+
+template <>
+EIGEN_ALWAYS_INLINE void trans8x8blocks(PacketBlock<Packet8d, 8> &kernel) {
+  ptranspose(kernel);
+}
+
+/***
+ * Unrolls for tranposed C stores
+ */
+template <typename Scalar>
+class trans {
+ public:
+  using vec = typename std::conditional<std::is_same<Scalar, float>::value, vecFullFloat, vecFullDouble>::type;
+  using vecHalf = typename std::conditional<std::is_same<Scalar, float>::value, vecHalfFloat, vecFullDouble>::type;
+  static constexpr int64_t PacketSize = packet_traits<Scalar>::size;
+
+  /***********************************
+   * Auxillary Functions for:
+   *  - storeC
+   ***********************************
+   */
+
+  /**
+   * aux_storeC
+   *
+   * 1-D unroll
+   *      for(startN = 0; startN < endN; startN++)
+   *
+   * (endN <= PacketSize) is required to handle the fp32 case, see comments in transStoreC
+   *
+   **/
+  template <int64_t endN, int64_t counter, int64_t unrollN, int64_t packetIndexOffset, bool remM>
+  static EIGEN_ALWAYS_INLINE std::enable_if_t<(counter > 0 && endN <= PacketSize)> aux_storeC(
+      Scalar *C_arr, int64_t LDC, PacketBlock<vec, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS> &zmm, int64_t remM_ = 0) {
+    constexpr int64_t counterReverse = endN - counter;
+    constexpr int64_t startN = counterReverse;
+
+    EIGEN_IF_CONSTEXPR(startN < EIGEN_AVX_MAX_NUM_ROW) {
+      EIGEN_IF_CONSTEXPR(remM) {
+        pstoreu<Scalar>(
+            C_arr + LDC * startN,
+            padd(ploadu<vecHalf>((const Scalar *)C_arr + LDC * startN, remMask<EIGEN_AVX_MAX_NUM_ROW>(remM_)),
+                 preinterpret<vecHalf>(zmm.packet[packetIndexOffset + (unrollN / PacketSize) * startN]),
+                 remMask<EIGEN_AVX_MAX_NUM_ROW>(remM_)),
+            remMask<EIGEN_AVX_MAX_NUM_ROW>(remM_));
+      }
+      else {
+        pstoreu<Scalar>(C_arr + LDC * startN,
+                        padd(ploadu<vecHalf>((const Scalar *)C_arr + LDC * startN),
+                             preinterpret<vecHalf>(zmm.packet[packetIndexOffset + (unrollN / PacketSize) * startN])));
+      }
+    }
+    else {  // This block is only needed for fp32 case
+      // Reinterpret as __m512 for _mm512_shuffle_f32x4
+      vecFullFloat zmm2vecFullFloat = preinterpret<vecFullFloat>(
+          zmm.packet[packetIndexOffset + (unrollN / PacketSize) * (startN - EIGEN_AVX_MAX_NUM_ROW)]);
+      // Swap lower and upper half of avx register.
+      zmm.packet[packetIndexOffset + (unrollN / PacketSize) * (startN - EIGEN_AVX_MAX_NUM_ROW)] =
+          preinterpret<vec>(_mm512_shuffle_f32x4(zmm2vecFullFloat, zmm2vecFullFloat, 0b01001110));
+
+      EIGEN_IF_CONSTEXPR(remM) {
+        pstoreu<Scalar>(
+            C_arr + LDC * startN,
+            padd(ploadu<vecHalf>((const Scalar *)C_arr + LDC * startN, remMask<EIGEN_AVX_MAX_NUM_ROW>(remM_)),
+                 preinterpret<vecHalf>(
+                     zmm.packet[packetIndexOffset + (unrollN / PacketSize) * (startN - EIGEN_AVX_MAX_NUM_ROW)])),
+            remMask<EIGEN_AVX_MAX_NUM_ROW>(remM_));
+      }
+      else {
+        pstoreu<Scalar>(
+            C_arr + LDC * startN,
+            padd(ploadu<vecHalf>((const Scalar *)C_arr + LDC * startN),
+                 preinterpret<vecHalf>(
+                     zmm.packet[packetIndexOffset + (unrollN / PacketSize) * (startN - EIGEN_AVX_MAX_NUM_ROW)])));
+      }
+    }
+    aux_storeC<endN, counter - 1, unrollN, packetIndexOffset, remM>(C_arr, LDC, zmm, remM_);
+  }
+
+  template <int64_t endN, int64_t counter, int64_t unrollN, int64_t packetIndexOffset, bool remM>
+  static EIGEN_ALWAYS_INLINE std::enable_if_t<!(counter > 0 && endN <= PacketSize)> aux_storeC(
+      Scalar *C_arr, int64_t LDC, PacketBlock<vec, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS> &zmm, int64_t remM_ = 0) {
+    EIGEN_UNUSED_VARIABLE(C_arr);
+    EIGEN_UNUSED_VARIABLE(LDC);
+    EIGEN_UNUSED_VARIABLE(zmm);
+    EIGEN_UNUSED_VARIABLE(remM_);
+  }
+
+  template <int64_t endN, int64_t unrollN, int64_t packetIndexOffset, bool remM>
+  static EIGEN_ALWAYS_INLINE void storeC(Scalar *C_arr, int64_t LDC,
+                                         PacketBlock<vec, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS> &zmm,
+                                         int64_t remM_ = 0) {
+    aux_storeC<endN, endN, unrollN, packetIndexOffset, remM>(C_arr, LDC, zmm, remM_);
+  }
+
+  /**
+   * Transposes LxunrollN row major block of matrices stored EIGEN_AVX_MAX_NUM_ACC zmm registers to
+   * "unrollN"xL ymm registers to be stored col-major into C.
+   *
+   *  For 8x48, the 8x48 block (row-major) is stored in zmm as follows:
+   *
+   *  row0: zmm0 zmm1 zmm2
+   *  row1: zmm3 zmm4 zmm5
+   *    .
+   *    .
+   *  row7: zmm21 zmm22 zmm23
+   *
+   *  For 8x32, the 8x32 block (row-major) is stored in zmm as follows:
+   *
+   *  row0: zmm0 zmm1
+   *  row1: zmm2 zmm3
+   *    .
+   *    .
+   *  row7: zmm14 zmm15
+   *
+   *
+   * In general we will have {1,2,3} groups of avx registers each of size
+   * EIGEN_AVX_MAX_NUM_ROW. packetIndexOffset is used to select which "block" of
+   * avx registers are being transposed.
+   */
+  template <int64_t unrollN, int64_t packetIndexOffset>
+  static EIGEN_ALWAYS_INLINE void transpose(PacketBlock<vec, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS> &zmm) {
+    // Note: this assumes EIGEN_AVX_MAX_NUM_ROW = 8. Unrolls should be adjusted
+    // accordingly if EIGEN_AVX_MAX_NUM_ROW is smaller.
+    constexpr int64_t zmmStride = unrollN / PacketSize;
+    PacketBlock<vec, EIGEN_AVX_MAX_NUM_ROW> r;
+    r.packet[0] = zmm.packet[packetIndexOffset + zmmStride * 0];
+    r.packet[1] = zmm.packet[packetIndexOffset + zmmStride * 1];
+    r.packet[2] = zmm.packet[packetIndexOffset + zmmStride * 2];
+    r.packet[3] = zmm.packet[packetIndexOffset + zmmStride * 3];
+    r.packet[4] = zmm.packet[packetIndexOffset + zmmStride * 4];
+    r.packet[5] = zmm.packet[packetIndexOffset + zmmStride * 5];
+    r.packet[6] = zmm.packet[packetIndexOffset + zmmStride * 6];
+    r.packet[7] = zmm.packet[packetIndexOffset + zmmStride * 7];
+    trans8x8blocks(r);
+    zmm.packet[packetIndexOffset + zmmStride * 0] = r.packet[0];
+    zmm.packet[packetIndexOffset + zmmStride * 1] = r.packet[1];
+    zmm.packet[packetIndexOffset + zmmStride * 2] = r.packet[2];
+    zmm.packet[packetIndexOffset + zmmStride * 3] = r.packet[3];
+    zmm.packet[packetIndexOffset + zmmStride * 4] = r.packet[4];
+    zmm.packet[packetIndexOffset + zmmStride * 5] = r.packet[5];
+    zmm.packet[packetIndexOffset + zmmStride * 6] = r.packet[6];
+    zmm.packet[packetIndexOffset + zmmStride * 7] = r.packet[7];
+  }
+};
+
+/**
+ * Unrolls for copyBToRowMajor
+ *
+ * Idea:
+ *  1) Load a block of right-hand sides to registers (using loadB).
+ *  2) Convert the block from column-major to row-major (transposeLxL)
+ *  3) Store the blocks from register either to a temp array (toTemp == true), or back to B (toTemp == false).
+ *
+ *  We use at most EIGEN_AVX_MAX_NUM_ACC avx registers to store the blocks of B. The remaining registers are
+ *  used as temps for transposing.
+ *
+ *  Blocks will be of size Lx{U1,U2,U3}. packetIndexOffset is used to index between these subblocks
+ *  For fp32, PacketSize = 2*EIGEN_AVX_MAX_NUM_ROW, so we reinterpret packets as packets half the size (zmm -> ymm).
+ */
+template <typename Scalar>
+class transB {
+ public:
+  using vec = typename std::conditional<std::is_same<Scalar, float>::value, vecFullFloat, vecFullDouble>::type;
+  using vecHalf = typename std::conditional<std::is_same<Scalar, float>::value, vecHalfFloat, vecFullDouble>::type;
+  static constexpr int64_t PacketSize = packet_traits<Scalar>::size;
+
+  /***********************************
+   * Auxillary Functions for:
+   *  - loadB
+   *  - storeB
+   *  - loadBBlock
+   *  - storeBBlock
+   ***********************************
+   */
+
+  /**
+   * aux_loadB
+   *
+   * 1-D unroll
+   *      for(startN = 0; startN < endN; startN++)
+   **/
+  template <int64_t endN, int64_t counter, int64_t packetIndexOffset, bool remM>
+  static EIGEN_ALWAYS_INLINE std::enable_if_t<(counter > 0)> aux_loadB(
+      Scalar *B_arr, int64_t LDB, PacketBlock<vecHalf, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS> &ymm,
+      int64_t remM_ = 0) {
+    constexpr int64_t counterReverse = endN - counter;
+    constexpr int64_t startN = counterReverse;
+
+    EIGEN_IF_CONSTEXPR(remM) {
+      ymm.packet[packetIndexOffset + startN] =
+          ploadu<vecHalf>((const Scalar *)&B_arr[startN * LDB], remMask<EIGEN_AVX_MAX_NUM_ROW>(remM_));
+    }
+    else ymm.packet[packetIndexOffset + startN] = ploadu<vecHalf>((const Scalar *)&B_arr[startN * LDB]);
+
+    aux_loadB<endN, counter - 1, packetIndexOffset, remM>(B_arr, LDB, ymm, remM_);
+  }
+
+  template <int64_t endN, int64_t counter, int64_t packetIndexOffset, bool remM>
+  static EIGEN_ALWAYS_INLINE std::enable_if_t<(counter <= 0)> aux_loadB(
+      Scalar *B_arr, int64_t LDB, PacketBlock<vecHalf, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS> &ymm,
+      int64_t remM_ = 0) {
+    EIGEN_UNUSED_VARIABLE(B_arr);
+    EIGEN_UNUSED_VARIABLE(LDB);
+    EIGEN_UNUSED_VARIABLE(ymm);
+    EIGEN_UNUSED_VARIABLE(remM_);
+  }
+
+  /**
+   * aux_storeB
+   *
+   * 1-D unroll
+   *      for(startN = 0; startN < endN; startN++)
+   **/
+  template <int64_t endN, int64_t counter, int64_t packetIndexOffset, bool remK, bool remM>
+  static EIGEN_ALWAYS_INLINE std::enable_if_t<(counter > 0)> aux_storeB(
+      Scalar *B_arr, int64_t LDB, PacketBlock<vecHalf, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS> &ymm, int64_t rem_ = 0) {
+    constexpr int64_t counterReverse = endN - counter;
+    constexpr int64_t startN = counterReverse;
+
+    EIGEN_IF_CONSTEXPR(remK || remM) {
+      pstoreu<Scalar>(&B_arr[startN * LDB], ymm.packet[packetIndexOffset + startN],
+                      remMask<EIGEN_AVX_MAX_NUM_ROW>(rem_));
+    }
+    else {
+      pstoreu<Scalar>(&B_arr[startN * LDB], ymm.packet[packetIndexOffset + startN]);
+    }
+
+    aux_storeB<endN, counter - 1, packetIndexOffset, remK, remM>(B_arr, LDB, ymm, rem_);
+  }
+
+  template <int64_t endN, int64_t counter, int64_t packetIndexOffset, bool remK, bool remM>
+  static EIGEN_ALWAYS_INLINE std::enable_if_t<(counter <= 0)> aux_storeB(
+      Scalar *B_arr, int64_t LDB, PacketBlock<vecHalf, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS> &ymm, int64_t rem_ = 0) {
+    EIGEN_UNUSED_VARIABLE(B_arr);
+    EIGEN_UNUSED_VARIABLE(LDB);
+    EIGEN_UNUSED_VARIABLE(ymm);
+    EIGEN_UNUSED_VARIABLE(rem_);
+  }
+
+  /**
+   * aux_loadBBlock
+   *
+   * 1-D unroll
+   *      for(startN = 0; startN < endN; startN += EIGEN_AVX_MAX_NUM_ROW)
+   **/
+  template <int64_t endN, int64_t counter, bool toTemp, bool remM>
+  static EIGEN_ALWAYS_INLINE std::enable_if_t<(counter > 0)> aux_loadBBlock(
+      Scalar *B_arr, int64_t LDB, Scalar *B_temp, int64_t LDB_,
+      PacketBlock<vecHalf, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS> &ymm, int64_t remM_ = 0) {
+    constexpr int64_t counterReverse = endN - counter;
+    constexpr int64_t startN = counterReverse;
+    transB::template loadB<EIGEN_AVX_MAX_NUM_ROW, startN, false>(&B_temp[startN], LDB_, ymm);
+    aux_loadBBlock<endN, counter - EIGEN_AVX_MAX_NUM_ROW, toTemp, remM>(B_arr, LDB, B_temp, LDB_, ymm, remM_);
+  }
+
+  template <int64_t endN, int64_t counter, bool toTemp, bool remM>
+  static EIGEN_ALWAYS_INLINE std::enable_if_t<(counter <= 0)> aux_loadBBlock(
+      Scalar *B_arr, int64_t LDB, Scalar *B_temp, int64_t LDB_,
+      PacketBlock<vecHalf, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS> &ymm, int64_t remM_ = 0) {
+    EIGEN_UNUSED_VARIABLE(B_arr);
+    EIGEN_UNUSED_VARIABLE(LDB);
+    EIGEN_UNUSED_VARIABLE(B_temp);
+    EIGEN_UNUSED_VARIABLE(LDB_);
+    EIGEN_UNUSED_VARIABLE(ymm);
+    EIGEN_UNUSED_VARIABLE(remM_);
+  }
+
+  /**
+   * aux_storeBBlock
+   *
+   * 1-D unroll
+   *      for(startN = 0; startN < endN; startN += EIGEN_AVX_MAX_NUM_ROW)
+   **/
+  template <int64_t endN, int64_t counter, bool toTemp, bool remM, int64_t remK_>
+  static EIGEN_ALWAYS_INLINE std::enable_if_t<(counter > 0)> aux_storeBBlock(
+      Scalar *B_arr, int64_t LDB, Scalar *B_temp, int64_t LDB_,
+      PacketBlock<vecHalf, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS> &ymm, int64_t remM_ = 0) {
+    constexpr int64_t counterReverse = endN - counter;
+    constexpr int64_t startN = counterReverse;
+
+    EIGEN_IF_CONSTEXPR(toTemp) {
+      transB::template storeB<EIGEN_AVX_MAX_NUM_ROW, startN, remK_ != 0, false>(&B_temp[startN], LDB_, ymm, remK_);
+    }
+    else {
+      transB::template storeB<std::min(EIGEN_AVX_MAX_NUM_ROW, endN), startN, false, remM>(&B_arr[0 + startN * LDB], LDB,
+                                                                                          ymm, remM_);
+    }
+    aux_storeBBlock<endN, counter - EIGEN_AVX_MAX_NUM_ROW, toTemp, remM, remK_>(B_arr, LDB, B_temp, LDB_, ymm, remM_);
+  }
+
+  template <int64_t endN, int64_t counter, bool toTemp, bool remM, int64_t remK_>
+  static EIGEN_ALWAYS_INLINE std::enable_if_t<(counter <= 0)> aux_storeBBlock(
+      Scalar *B_arr, int64_t LDB, Scalar *B_temp, int64_t LDB_,
+      PacketBlock<vecHalf, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS> &ymm, int64_t remM_ = 0) {
+    EIGEN_UNUSED_VARIABLE(B_arr);
+    EIGEN_UNUSED_VARIABLE(LDB);
+    EIGEN_UNUSED_VARIABLE(B_temp);
+    EIGEN_UNUSED_VARIABLE(LDB_);
+    EIGEN_UNUSED_VARIABLE(ymm);
+    EIGEN_UNUSED_VARIABLE(remM_);
+  }
+
+  /********************************************************
+   * Wrappers for aux_XXXX to hide counter parameter
+   ********************************************************/
+
+  template <int64_t endN, int64_t packetIndexOffset, bool remM>
+  static EIGEN_ALWAYS_INLINE void loadB(Scalar *B_arr, int64_t LDB,
+                                        PacketBlock<vecHalf, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS> &ymm,
+                                        int64_t remM_ = 0) {
+    aux_loadB<endN, endN, packetIndexOffset, remM>(B_arr, LDB, ymm, remM_);
+  }
+
+  template <int64_t endN, int64_t packetIndexOffset, bool remK, bool remM>
+  static EIGEN_ALWAYS_INLINE void storeB(Scalar *B_arr, int64_t LDB,
+                                         PacketBlock<vecHalf, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS> &ymm,
+                                         int64_t rem_ = 0) {
+    aux_storeB<endN, endN, packetIndexOffset, remK, remM>(B_arr, LDB, ymm, rem_);
+  }
+
+  template <int64_t unrollN, bool toTemp, bool remM>
+  static EIGEN_ALWAYS_INLINE void loadBBlock(Scalar *B_arr, int64_t LDB, Scalar *B_temp, int64_t LDB_,
+                                             PacketBlock<vecHalf, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS> &ymm,
+                                             int64_t remM_ = 0) {
+    EIGEN_IF_CONSTEXPR(toTemp) { transB::template loadB<unrollN, 0, remM>(&B_arr[0], LDB, ymm, remM_); }
+    else {
+      aux_loadBBlock<unrollN, unrollN, toTemp, remM>(B_arr, LDB, B_temp, LDB_, ymm, remM_);
+    }
+  }
+
+  template <int64_t unrollN, bool toTemp, bool remM, int64_t remK_>
+  static EIGEN_ALWAYS_INLINE void storeBBlock(Scalar *B_arr, int64_t LDB, Scalar *B_temp, int64_t LDB_,
+                                              PacketBlock<vecHalf, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS> &ymm,
+                                              int64_t remM_ = 0) {
+    aux_storeBBlock<unrollN, unrollN, toTemp, remM, remK_>(B_arr, LDB, B_temp, LDB_, ymm, remM_);
+  }
+
+  template <int64_t packetIndexOffset>
+  static EIGEN_ALWAYS_INLINE void transposeLxL(PacketBlock<vecHalf, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS> &ymm) {
+    // Note: this assumes EIGEN_AVX_MAX_NUM_ROW = 8. Unrolls should be adjusted
+    // accordingly if EIGEN_AVX_MAX_NUM_ROW is smaller.
+    PacketBlock<vecHalf, EIGEN_AVX_MAX_NUM_ROW> r;
+    r.packet[0] = ymm.packet[packetIndexOffset + 0];
+    r.packet[1] = ymm.packet[packetIndexOffset + 1];
+    r.packet[2] = ymm.packet[packetIndexOffset + 2];
+    r.packet[3] = ymm.packet[packetIndexOffset + 3];
+    r.packet[4] = ymm.packet[packetIndexOffset + 4];
+    r.packet[5] = ymm.packet[packetIndexOffset + 5];
+    r.packet[6] = ymm.packet[packetIndexOffset + 6];
+    r.packet[7] = ymm.packet[packetIndexOffset + 7];
+    ptranspose(r);
+    ymm.packet[packetIndexOffset + 0] = r.packet[0];
+    ymm.packet[packetIndexOffset + 1] = r.packet[1];
+    ymm.packet[packetIndexOffset + 2] = r.packet[2];
+    ymm.packet[packetIndexOffset + 3] = r.packet[3];
+    ymm.packet[packetIndexOffset + 4] = r.packet[4];
+    ymm.packet[packetIndexOffset + 5] = r.packet[5];
+    ymm.packet[packetIndexOffset + 6] = r.packet[6];
+    ymm.packet[packetIndexOffset + 7] = r.packet[7];
+  }
+
+  template <int64_t unrollN, bool toTemp, bool remM>
+  static EIGEN_ALWAYS_INLINE void transB_kernel(Scalar *B_arr, int64_t LDB, Scalar *B_temp, int64_t LDB_,
+                                                PacketBlock<vecHalf, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS> &ymm,
+                                                int64_t remM_ = 0) {
+    constexpr int64_t U3 = PacketSize * 3;
+    constexpr int64_t U2 = PacketSize * 2;
+    constexpr int64_t U1 = PacketSize * 1;
+    /**
+     *  Unrolls needed for each case:
+     *   - AVX512 fp32 48 32 16 8 4 2 1
+     *   - AVX512 fp64 24 16 8  4 2 1
+     *
+     *  For fp32 L and U1 are 1:2 so for U3/U2 cases the loads/stores need to be split up.
+     */
+    EIGEN_IF_CONSTEXPR(unrollN == U3) {
+      // load LxU3 B col major, transpose LxU3 row major
+      constexpr int64_t maxUBlock = std::min(3 * EIGEN_AVX_MAX_NUM_ROW, U3);
+      transB::template loadBBlock<maxUBlock, toTemp, remM>(B_arr, LDB, B_temp, LDB_, ymm, remM_);
+      transB::template transposeLxL<0 * EIGEN_AVX_MAX_NUM_ROW>(ymm);
+      transB::template transposeLxL<1 * EIGEN_AVX_MAX_NUM_ROW>(ymm);
+      transB::template transposeLxL<2 * EIGEN_AVX_MAX_NUM_ROW>(ymm);
+      transB::template storeBBlock<maxUBlock, toTemp, remM, 0>(B_arr, LDB, B_temp, LDB_, ymm, remM_);
+
+      EIGEN_IF_CONSTEXPR(maxUBlock < U3) {
+        transB::template loadBBlock<maxUBlock, toTemp, remM>(&B_arr[maxUBlock * LDB], LDB, &B_temp[maxUBlock], LDB_,
+                                                             ymm, remM_);
+        transB::template transposeLxL<0 * EIGEN_AVX_MAX_NUM_ROW>(ymm);
+        transB::template transposeLxL<1 * EIGEN_AVX_MAX_NUM_ROW>(ymm);
+        transB::template transposeLxL<2 * EIGEN_AVX_MAX_NUM_ROW>(ymm);
+        transB::template storeBBlock<maxUBlock, toTemp, remM, 0>(&B_arr[maxUBlock * LDB], LDB, &B_temp[maxUBlock], LDB_,
+                                                                 ymm, remM_);
+      }
+    }
+    else EIGEN_IF_CONSTEXPR(unrollN == U2) {
+      // load LxU2 B col major, transpose LxU2 row major
+      constexpr int64_t maxUBlock = std::min(3 * EIGEN_AVX_MAX_NUM_ROW, U2);
+      transB::template loadBBlock<maxUBlock, toTemp, remM>(B_arr, LDB, B_temp, LDB_, ymm, remM_);
+      transB::template transposeLxL<0 * EIGEN_AVX_MAX_NUM_ROW>(ymm);
+      transB::template transposeLxL<1 * EIGEN_AVX_MAX_NUM_ROW>(ymm);
+      EIGEN_IF_CONSTEXPR(maxUBlock < U2) transB::template transposeLxL<2 * EIGEN_AVX_MAX_NUM_ROW>(ymm);
+      transB::template storeBBlock<maxUBlock, toTemp, remM, 0>(B_arr, LDB, B_temp, LDB_, ymm, remM_);
+
+      EIGEN_IF_CONSTEXPR(maxUBlock < U2) {
+        transB::template loadBBlock<EIGEN_AVX_MAX_NUM_ROW, toTemp, remM>(&B_arr[maxUBlock * LDB], LDB,
+                                                                         &B_temp[maxUBlock], LDB_, ymm, remM_);
+        transB::template transposeLxL<0>(ymm);
+        transB::template storeBBlock<EIGEN_AVX_MAX_NUM_ROW, toTemp, remM, 0>(&B_arr[maxUBlock * LDB], LDB,
+                                                                             &B_temp[maxUBlock], LDB_, ymm, remM_);
+      }
+    }
+    else EIGEN_IF_CONSTEXPR(unrollN == U1) {
+      // load LxU1 B col major, transpose LxU1 row major
+      transB::template loadBBlock<U1, toTemp, remM>(B_arr, LDB, B_temp, LDB_, ymm, remM_);
+      transB::template transposeLxL<0>(ymm);
+      EIGEN_IF_CONSTEXPR(EIGEN_AVX_MAX_NUM_ROW < U1) { transB::template transposeLxL<1 * EIGEN_AVX_MAX_NUM_ROW>(ymm); }
+      transB::template storeBBlock<U1, toTemp, remM, 0>(B_arr, LDB, B_temp, LDB_, ymm, remM_);
+    }
+    else EIGEN_IF_CONSTEXPR(unrollN == 8 && U1 > 8) {
+      // load Lx4 B col major, transpose Lx4 row major
+      transB::template loadBBlock<8, toTemp, remM>(B_arr, LDB, B_temp, LDB_, ymm, remM_);
+      transB::template transposeLxL<0>(ymm);
+      transB::template storeBBlock<8, toTemp, remM, 8>(B_arr, LDB, B_temp, LDB_, ymm, remM_);
+    }
+    else EIGEN_IF_CONSTEXPR(unrollN == 4 && U1 > 4) {
+      // load Lx4 B col major, transpose Lx4 row major
+      transB::template loadBBlock<4, toTemp, remM>(B_arr, LDB, B_temp, LDB_, ymm, remM_);
+      transB::template transposeLxL<0>(ymm);
+      transB::template storeBBlock<4, toTemp, remM, 4>(B_arr, LDB, B_temp, LDB_, ymm, remM_);
+    }
+    else EIGEN_IF_CONSTEXPR(unrollN == 2) {
+      // load Lx2 B col major, transpose Lx2 row major
+      transB::template loadBBlock<2, toTemp, remM>(B_arr, LDB, B_temp, LDB_, ymm, remM_);
+      transB::template transposeLxL<0>(ymm);
+      transB::template storeBBlock<2, toTemp, remM, 2>(B_arr, LDB, B_temp, LDB_, ymm, remM_);
+    }
+    else EIGEN_IF_CONSTEXPR(unrollN == 1) {
+      // load Lx1 B col major, transpose Lx1 row major
+      transB::template loadBBlock<1, toTemp, remM>(B_arr, LDB, B_temp, LDB_, ymm, remM_);
+      transB::template transposeLxL<0>(ymm);
+      transB::template storeBBlock<1, toTemp, remM, 1>(B_arr, LDB, B_temp, LDB_, ymm, remM_);
+    }
+  }
+};
+
+/**
+ * Unrolls for triSolveKernel
+ *
+ * Idea:
+ *  1) Load a block of right-hand sides to registers in RHSInPacket (using loadRHS).
+ *  2) Do triangular solve with RHSInPacket and a small block of A (triangular matrix)
+ *     stored in AInPacket (using triSolveMicroKernel).
+ *  3) Store final results (in avx registers) back into memory (using storeRHS).
+ *
+ *  RHSInPacket uses at most EIGEN_AVX_MAX_NUM_ACC avx registers and AInPacket uses at most
+ *  EIGEN_AVX_MAX_NUM_ROW registers.
+ */
+template <typename Scalar>
+class trsm {
+ public:
+  using vec = typename std::conditional<std::is_same<Scalar, float>::value, vecFullFloat, vecFullDouble>::type;
+  static constexpr int64_t PacketSize = packet_traits<Scalar>::size;
+
+  /***********************************
+   * Auxillary Functions for:
+   *  - loadRHS
+   *  - storeRHS
+   *  - divRHSByDiag
+   *  - updateRHS
+   *  - triSolveMicroKernel
+   ************************************/
+  /**
+   * aux_loadRHS
+   *
+   * 2-D unroll
+   *      for(startM = 0; startM < endM; startM++)
+   *        for(startK = 0; startK < endK; startK++)
+   **/
+  template <bool isFWDSolve, int64_t endM, int64_t endK, int64_t counter, bool krem>
+  static EIGEN_ALWAYS_INLINE std::enable_if_t<(counter > 0)> aux_loadRHS(
+      Scalar *B_arr, int64_t LDB, PacketBlock<vec, EIGEN_AVX_MAX_NUM_ACC> &RHSInPacket, int64_t rem = 0) {
+    constexpr int64_t counterReverse = endM * endK - counter;
+    constexpr int64_t startM = counterReverse / (endK);
+    constexpr int64_t startK = counterReverse % endK;
+
+    constexpr int64_t packetIndex = startM * endK + startK;
+    constexpr int64_t startM_ = isFWDSolve ? startM : -startM;
+    const int64_t rhsIndex = (startK * PacketSize) + startM_ * LDB;
+    EIGEN_IF_CONSTEXPR(krem) {
+      RHSInPacket.packet[packetIndex] = ploadu<vec>(&B_arr[rhsIndex], remMask<PacketSize>(rem));
+    }
+    else {
+      RHSInPacket.packet[packetIndex] = ploadu<vec>(&B_arr[rhsIndex]);
+    }
+    aux_loadRHS<isFWDSolve, endM, endK, counter - 1, krem>(B_arr, LDB, RHSInPacket, rem);
+  }
+
+  template <bool isFWDSolve, int64_t endM, int64_t endK, int64_t counter, bool krem>
+  static EIGEN_ALWAYS_INLINE std::enable_if_t<(counter <= 0)> aux_loadRHS(
+      Scalar *B_arr, int64_t LDB, PacketBlock<vec, EIGEN_AVX_MAX_NUM_ACC> &RHSInPacket, int64_t rem = 0) {
+    EIGEN_UNUSED_VARIABLE(B_arr);
+    EIGEN_UNUSED_VARIABLE(LDB);
+    EIGEN_UNUSED_VARIABLE(RHSInPacket);
+    EIGEN_UNUSED_VARIABLE(rem);
+  }
+
+  /**
+   * aux_storeRHS
+   *
+   * 2-D unroll
+   *      for(startM = 0; startM < endM; startM++)
+   *        for(startK = 0; startK < endK; startK++)
+   **/
+  template <bool isFWDSolve, int64_t endM, int64_t endK, int64_t counter, bool krem>
+  static EIGEN_ALWAYS_INLINE std::enable_if_t<(counter > 0)> aux_storeRHS(
+      Scalar *B_arr, int64_t LDB, PacketBlock<vec, EIGEN_AVX_MAX_NUM_ACC> &RHSInPacket, int64_t rem = 0) {
+    constexpr int64_t counterReverse = endM * endK - counter;
+    constexpr int64_t startM = counterReverse / (endK);
+    constexpr int64_t startK = counterReverse % endK;
+
+    constexpr int64_t packetIndex = startM * endK + startK;
+    constexpr int64_t startM_ = isFWDSolve ? startM : -startM;
+    const int64_t rhsIndex = (startK * PacketSize) + startM_ * LDB;
+    EIGEN_IF_CONSTEXPR(krem) {
+      pstoreu<Scalar>(&B_arr[rhsIndex], RHSInPacket.packet[packetIndex], remMask<PacketSize>(rem));
+    }
+    else {
+      pstoreu<Scalar>(&B_arr[rhsIndex], RHSInPacket.packet[packetIndex]);
+    }
+    aux_storeRHS<isFWDSolve, endM, endK, counter - 1, krem>(B_arr, LDB, RHSInPacket, rem);
+  }
+
+  template <bool isFWDSolve, int64_t endM, int64_t endK, int64_t counter, bool krem>
+  static EIGEN_ALWAYS_INLINE std::enable_if_t<(counter <= 0)> aux_storeRHS(
+      Scalar *B_arr, int64_t LDB, PacketBlock<vec, EIGEN_AVX_MAX_NUM_ACC> &RHSInPacket, int64_t rem = 0) {
+    EIGEN_UNUSED_VARIABLE(B_arr);
+    EIGEN_UNUSED_VARIABLE(LDB);
+    EIGEN_UNUSED_VARIABLE(RHSInPacket);
+    EIGEN_UNUSED_VARIABLE(rem);
+  }
+
+  /**
+   * aux_divRHSByDiag
+   *
+   * currM may be -1, (currM >=0) in enable_if checks for this
+   *
+   * 1-D unroll
+   *      for(startK = 0; startK < endK; startK++)
+   **/
+  template <int64_t currM, int64_t endK, int64_t counter>
+  static EIGEN_ALWAYS_INLINE std::enable_if_t<(counter > 0 && currM >= 0)> aux_divRHSByDiag(
+      PacketBlock<vec, EIGEN_AVX_MAX_NUM_ACC> &RHSInPacket, PacketBlock<vec, EIGEN_AVX_MAX_NUM_ROW> &AInPacket) {
+    constexpr int64_t counterReverse = endK - counter;
+    constexpr int64_t startK = counterReverse;
+
+    constexpr int64_t packetIndex = currM * endK + startK;
+    RHSInPacket.packet[packetIndex] = pmul(AInPacket.packet[currM], RHSInPacket.packet[packetIndex]);
+    aux_divRHSByDiag<currM, endK, counter - 1>(RHSInPacket, AInPacket);
+  }
+
+  template <int64_t currM, int64_t endK, int64_t counter>
+  static EIGEN_ALWAYS_INLINE std::enable_if_t<!(counter > 0 && currM >= 0)> aux_divRHSByDiag(
+      PacketBlock<vec, EIGEN_AVX_MAX_NUM_ACC> &RHSInPacket, PacketBlock<vec, EIGEN_AVX_MAX_NUM_ROW> &AInPacket) {
+    EIGEN_UNUSED_VARIABLE(RHSInPacket);
+    EIGEN_UNUSED_VARIABLE(AInPacket);
+  }
+
+  /**
+   * aux_updateRHS
+   *
+   * 2-D unroll
+   *      for(startM = initM; startM < endM; startM++)
+   *        for(startK = 0; startK < endK; startK++)
+   **/
+  template <bool isARowMajor, bool isFWDSolve, bool isUnitDiag, int64_t initM, int64_t endM, int64_t endK,
+            int64_t counter, int64_t currentM>
+  static EIGEN_ALWAYS_INLINE std::enable_if_t<(counter > 0)> aux_updateRHS(
+      Scalar *A_arr, int64_t LDA, PacketBlock<vec, EIGEN_AVX_MAX_NUM_ACC> &RHSInPacket,
+      PacketBlock<vec, EIGEN_AVX_MAX_NUM_ROW> &AInPacket) {
+    constexpr int64_t counterReverse = (endM - initM) * endK - counter;
+    constexpr int64_t startM = initM + counterReverse / (endK);
+    constexpr int64_t startK = counterReverse % endK;
+
+    // For each row of A, first update all corresponding RHS
+    constexpr int64_t packetIndex = startM * endK + startK;
+    EIGEN_IF_CONSTEXPR(currentM > 0) {
+      RHSInPacket.packet[packetIndex] =
+          pnmadd(AInPacket.packet[startM], RHSInPacket.packet[(currentM - 1) * endK + startK],
+                 RHSInPacket.packet[packetIndex]);
+    }
+
+    EIGEN_IF_CONSTEXPR(startK == endK - 1) {
+      // Once all RHS for previous row of A is updated, we broadcast the next element in the column A_{i, currentM}.
+      EIGEN_IF_CONSTEXPR(startM == currentM && !isUnitDiag) {
+        // If diagonal is not unit, we broadcast reciprocals of diagonals AinPacket.packet[currentM].
+        // This will be used in divRHSByDiag
+        EIGEN_IF_CONSTEXPR(isFWDSolve)
+        AInPacket.packet[currentM] = pset1<vec>(Scalar(1) / A_arr[idA<isARowMajor>(currentM, currentM, LDA)]);
+        else AInPacket.packet[currentM] = pset1<vec>(Scalar(1) / A_arr[idA<isARowMajor>(-currentM, -currentM, LDA)]);
+      }
+      else {
+        // Broadcast next off diagonal element of A
+        EIGEN_IF_CONSTEXPR(isFWDSolve)
+        AInPacket.packet[startM] = pset1<vec>(A_arr[idA<isARowMajor>(startM, currentM, LDA)]);
+        else AInPacket.packet[startM] = pset1<vec>(A_arr[idA<isARowMajor>(-startM, -currentM, LDA)]);
+      }
+    }
+
+    aux_updateRHS<isARowMajor, isFWDSolve, isUnitDiag, initM, endM, endK, counter - 1, currentM>(
+        A_arr, LDA, RHSInPacket, AInPacket);
+  }
+
+  template <bool isARowMajor, bool isFWDSolve, bool isUnitDiag, int64_t initM, int64_t endM, int64_t endK,
+            int64_t counter, int64_t currentM>
+  static EIGEN_ALWAYS_INLINE std::enable_if_t<(counter <= 0)> aux_updateRHS(
+      Scalar *A_arr, int64_t LDA, PacketBlock<vec, EIGEN_AVX_MAX_NUM_ACC> &RHSInPacket,
+      PacketBlock<vec, EIGEN_AVX_MAX_NUM_ROW> &AInPacket) {
+    EIGEN_UNUSED_VARIABLE(A_arr);
+    EIGEN_UNUSED_VARIABLE(LDA);
+    EIGEN_UNUSED_VARIABLE(RHSInPacket);
+    EIGEN_UNUSED_VARIABLE(AInPacket);
+  }
+
+  /**
+   * aux_triSolverMicroKernel
+   *
+   * 1-D unroll
+   *      for(startM = 0; startM < endM; startM++)
+   **/
+  template <bool isARowMajor, bool isFWDSolve, bool isUnitDiag, int64_t endM, int64_t counter, int64_t numK>
+  static EIGEN_ALWAYS_INLINE std::enable_if_t<(counter > 0)> aux_triSolveMicroKernel(
+      Scalar *A_arr, int64_t LDA, PacketBlock<vec, EIGEN_AVX_MAX_NUM_ACC> &RHSInPacket,
+      PacketBlock<vec, EIGEN_AVX_MAX_NUM_ROW> &AInPacket) {
+    constexpr int64_t counterReverse = endM - counter;
+    constexpr int64_t startM = counterReverse;
+
+    constexpr int64_t currentM = startM;
+    // Divides the right-hand side in row startM, by digonal value of A
+    // broadcasted to AInPacket.packet[startM-1] in the previous iteration.
+    //
+    // Without "if constexpr" the compiler instantiates the case <-1, numK>
+    // this is handled with enable_if to prevent out-of-bound warnings
+    // from the compiler
+    EIGEN_IF_CONSTEXPR(!isUnitDiag && startM > 0)
+    trsm::template divRHSByDiag<startM - 1, numK>(RHSInPacket, AInPacket);
+
+    // After division, the rhs corresponding to subsequent rows of A can be partially updated
+    // We also broadcast the reciprocal of the next diagonal to AInPacket.packet[currentM] (if needed)
+    // to be used in the next iteration.
+    trsm::template updateRHS<isARowMajor, isFWDSolve, isUnitDiag, startM, endM, numK, currentM>(A_arr, LDA, RHSInPacket,
+                                                                                                AInPacket);
+
+    // Handle division for the RHS corresponding to the final row of A.
+    EIGEN_IF_CONSTEXPR(!isUnitDiag && startM == endM - 1)
+    trsm::template divRHSByDiag<startM, numK>(RHSInPacket, AInPacket);
+
+    aux_triSolveMicroKernel<isARowMajor, isFWDSolve, isUnitDiag, endM, counter - 1, numK>(A_arr, LDA, RHSInPacket,
+                                                                                          AInPacket);
+  }
+
+  template <bool isARowMajor, bool isFWDSolve, bool isUnitDiag, int64_t endM, int64_t counter, int64_t numK>
+  static EIGEN_ALWAYS_INLINE std::enable_if_t<(counter <= 0)> aux_triSolveMicroKernel(
+      Scalar *A_arr, int64_t LDA, PacketBlock<vec, EIGEN_AVX_MAX_NUM_ACC> &RHSInPacket,
+      PacketBlock<vec, EIGEN_AVX_MAX_NUM_ROW> &AInPacket) {
+    EIGEN_UNUSED_VARIABLE(A_arr);
+    EIGEN_UNUSED_VARIABLE(LDA);
+    EIGEN_UNUSED_VARIABLE(RHSInPacket);
+    EIGEN_UNUSED_VARIABLE(AInPacket);
+  }
+
+  /********************************************************
+   * Wrappers for aux_XXXX to hide counter parameter
+   ********************************************************/
+
+  /**
+   * Load endMxendK block of B to RHSInPacket
+   * Masked loads are used for cases where endK is not a multiple of PacketSize
+   */
+  template <bool isFWDSolve, int64_t endM, int64_t endK, bool krem = false>
+  static EIGEN_ALWAYS_INLINE void loadRHS(Scalar *B_arr, int64_t LDB,
+                                          PacketBlock<vec, EIGEN_AVX_MAX_NUM_ACC> &RHSInPacket, int64_t rem = 0) {
+    aux_loadRHS<isFWDSolve, endM, endK, endM * endK, krem>(B_arr, LDB, RHSInPacket, rem);
+  }
+
+  /**
+   * Load endMxendK block of B to RHSInPacket
+   * Masked loads are used for cases where endK is not a multiple of PacketSize
+   */
+  template <bool isFWDSolve, int64_t endM, int64_t endK, bool krem = false>
+  static EIGEN_ALWAYS_INLINE void storeRHS(Scalar *B_arr, int64_t LDB,
+                                           PacketBlock<vec, EIGEN_AVX_MAX_NUM_ACC> &RHSInPacket, int64_t rem = 0) {
+    aux_storeRHS<isFWDSolve, endM, endK, endM * endK, krem>(B_arr, LDB, RHSInPacket, rem);
+  }
+
+  /**
+   * Only used if Triangular matrix has non-unit diagonal values
+   */
+  template <int64_t currM, int64_t endK>
+  static EIGEN_ALWAYS_INLINE void divRHSByDiag(PacketBlock<vec, EIGEN_AVX_MAX_NUM_ACC> &RHSInPacket,
+                                               PacketBlock<vec, EIGEN_AVX_MAX_NUM_ROW> &AInPacket) {
+    aux_divRHSByDiag<currM, endK, endK>(RHSInPacket, AInPacket);
+  }
+
+  /**
+   * Update right-hand sides (stored in avx registers)
+   * Traversing along the column A_{i,currentM}, where currentM <= i <= endM, and broadcasting each value to AInPacket.
+   **/
+  template <bool isARowMajor, bool isFWDSolve, bool isUnitDiag, int64_t startM, int64_t endM, int64_t endK,
+            int64_t currentM>
+  static EIGEN_ALWAYS_INLINE void updateRHS(Scalar *A_arr, int64_t LDA,
+                                            PacketBlock<vec, EIGEN_AVX_MAX_NUM_ACC> &RHSInPacket,
+                                            PacketBlock<vec, EIGEN_AVX_MAX_NUM_ROW> &AInPacket) {
+    aux_updateRHS<isARowMajor, isFWDSolve, isUnitDiag, startM, endM, endK, (endM - startM) * endK, currentM>(
+        A_arr, LDA, RHSInPacket, AInPacket);
+  }
+
+  /**
+   * endM: dimension of A. 1 <= endM <= EIGEN_AVX_MAX_NUM_ROW
+   * numK: number of avx registers to use for each row of B (ex fp32: 48 rhs => 3 avx reg used). 1 <= endK <= 3.
+   * isFWDSolve: true => forward substitution, false => backwards substitution
+   * isUnitDiag: true => triangular matrix has unit diagonal.
+   */
+  template <bool isARowMajor, bool isFWDSolve, bool isUnitDiag, int64_t endM, int64_t numK>
+  static EIGEN_ALWAYS_INLINE void triSolveMicroKernel(Scalar *A_arr, int64_t LDA,
+                                                      PacketBlock<vec, EIGEN_AVX_MAX_NUM_ACC> &RHSInPacket,
+                                                      PacketBlock<vec, EIGEN_AVX_MAX_NUM_ROW> &AInPacket) {
+    static_assert(numK >= 1 && numK <= 3, "numK out of range");
+    aux_triSolveMicroKernel<isARowMajor, isFWDSolve, isUnitDiag, endM, endM, numK>(A_arr, LDA, RHSInPacket, AInPacket);
+  }
+};
+
+/**
+ * Unrolls for gemm kernel
+ *
+ * isAdd: true => C += A*B, false => C -= A*B
+ */
+template <typename Scalar, bool isAdd>
+class gemm {
+ public:
+  using vec = typename std::conditional<std::is_same<Scalar, float>::value, vecFullFloat, vecFullDouble>::type;
+  static constexpr int64_t PacketSize = packet_traits<Scalar>::size;
+
+  /***********************************
+   * Auxillary Functions for:
+   *  - setzero
+   *  - updateC
+   *  - storeC
+   *  - startLoadB
+   *  - triSolveMicroKernel
+   ************************************/
+
+  /**
+   * aux_setzero
+   *
+   * 2-D unroll
+   *      for(startM = 0; startM < endM; startM++)
+   *        for(startN = 0; startN < endN; startN++)
+   **/
+  template <int64_t endM, int64_t endN, int64_t counter>
+  static EIGEN_ALWAYS_INLINE std::enable_if_t<(counter > 0)> aux_setzero(
+      PacketBlock<vec, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS> &zmm) {
+    constexpr int64_t counterReverse = endM * endN - counter;
+    constexpr int64_t startM = counterReverse / (endN);
+    constexpr int64_t startN = counterReverse % endN;
+
+    zmm.packet[startN * endM + startM] = pzero(zmm.packet[startN * endM + startM]);
+    aux_setzero<endM, endN, counter - 1>(zmm);
+  }
+
+  template <int64_t endM, int64_t endN, int64_t counter>
+  static EIGEN_ALWAYS_INLINE std::enable_if_t<(counter <= 0)> aux_setzero(
+      PacketBlock<vec, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS> &zmm) {
+    EIGEN_UNUSED_VARIABLE(zmm);
+  }
+
+  /**
+   * aux_updateC
+   *
+   * 2-D unroll
+   *      for(startM = 0; startM < endM; startM++)
+   *        for(startN = 0; startN < endN; startN++)
+   **/
+  template <int64_t endM, int64_t endN, int64_t counter, bool rem>
+  static EIGEN_ALWAYS_INLINE std::enable_if_t<(counter > 0)> aux_updateC(
+      Scalar *C_arr, int64_t LDC, PacketBlock<vec, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS> &zmm, int64_t rem_ = 0) {
+    EIGEN_UNUSED_VARIABLE(rem_);
+    constexpr int64_t counterReverse = endM * endN - counter;
+    constexpr int64_t startM = counterReverse / (endN);
+    constexpr int64_t startN = counterReverse % endN;
+
+    EIGEN_IF_CONSTEXPR(rem)
+    zmm.packet[startN * endM + startM] =
+        padd(ploadu<vec>(&C_arr[(startN)*LDC + startM * PacketSize], remMask<PacketSize>(rem_)),
+             zmm.packet[startN * endM + startM], remMask<PacketSize>(rem_));
+    else zmm.packet[startN * endM + startM] =
+        padd(ploadu<vec>(&C_arr[(startN)*LDC + startM * PacketSize]), zmm.packet[startN * endM + startM]);
+    aux_updateC<endM, endN, counter - 1, rem>(C_arr, LDC, zmm, rem_);
+  }
+
+  template <int64_t endM, int64_t endN, int64_t counter, bool rem>
+  static EIGEN_ALWAYS_INLINE std::enable_if_t<(counter <= 0)> aux_updateC(
+      Scalar *C_arr, int64_t LDC, PacketBlock<vec, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS> &zmm, int64_t rem_ = 0) {
+    EIGEN_UNUSED_VARIABLE(C_arr);
+    EIGEN_UNUSED_VARIABLE(LDC);
+    EIGEN_UNUSED_VARIABLE(zmm);
+    EIGEN_UNUSED_VARIABLE(rem_);
+  }
+
+  /**
+   * aux_storeC
+   *
+   * 2-D unroll
+   *      for(startM = 0; startM < endM; startM++)
+   *        for(startN = 0; startN < endN; startN++)
+   **/
+  template <int64_t endM, int64_t endN, int64_t counter, bool rem>
+  static EIGEN_ALWAYS_INLINE std::enable_if_t<(counter > 0)> aux_storeC(
+      Scalar *C_arr, int64_t LDC, PacketBlock<vec, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS> &zmm, int64_t rem_ = 0) {
+    EIGEN_UNUSED_VARIABLE(rem_);
+    constexpr int64_t counterReverse = endM * endN - counter;
+    constexpr int64_t startM = counterReverse / (endN);
+    constexpr int64_t startN = counterReverse % endN;
+
+    EIGEN_IF_CONSTEXPR(rem)
+    pstoreu<Scalar>(&C_arr[(startN)*LDC + startM * PacketSize], zmm.packet[startN * endM + startM],
+                    remMask<PacketSize>(rem_));
+    else pstoreu<Scalar>(&C_arr[(startN)*LDC + startM * PacketSize], zmm.packet[startN * endM + startM]);
+    aux_storeC<endM, endN, counter - 1, rem>(C_arr, LDC, zmm, rem_);
+  }
+
+  template <int64_t endM, int64_t endN, int64_t counter, bool rem>
+  static EIGEN_ALWAYS_INLINE std::enable_if_t<(counter <= 0)> aux_storeC(
+      Scalar *C_arr, int64_t LDC, PacketBlock<vec, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS> &zmm, int64_t rem_ = 0) {
+    EIGEN_UNUSED_VARIABLE(C_arr);
+    EIGEN_UNUSED_VARIABLE(LDC);
+    EIGEN_UNUSED_VARIABLE(zmm);
+    EIGEN_UNUSED_VARIABLE(rem_);
+  }
+
+  /**
+   * aux_startLoadB
+   *
+   * 1-D unroll
+   *      for(startL = 0; startL < endL; startL++)
+   **/
+  template <int64_t unrollM, int64_t unrollN, int64_t endL, int64_t counter, bool rem>
+  static EIGEN_ALWAYS_INLINE std::enable_if_t<(counter > 0)> aux_startLoadB(
+      Scalar *B_t, int64_t LDB, PacketBlock<vec, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS> &zmm, int64_t rem_ = 0) {
+    EIGEN_UNUSED_VARIABLE(rem_);
+    constexpr int64_t counterReverse = endL - counter;
+    constexpr int64_t startL = counterReverse;
+
+    EIGEN_IF_CONSTEXPR(rem)
+    zmm.packet[unrollM * unrollN + startL] =
+        ploadu<vec>(&B_t[(startL / unrollM) * LDB + (startL % unrollM) * PacketSize], remMask<PacketSize>(rem_));
+    else zmm.packet[unrollM * unrollN + startL] =
+        ploadu<vec>(&B_t[(startL / unrollM) * LDB + (startL % unrollM) * PacketSize]);
+
+    aux_startLoadB<unrollM, unrollN, endL, counter - 1, rem>(B_t, LDB, zmm, rem_);
+  }
+
+  template <int64_t unrollM, int64_t unrollN, int64_t endL, int64_t counter, bool rem>
+  static EIGEN_ALWAYS_INLINE std::enable_if_t<(counter <= 0)> aux_startLoadB(
+      Scalar *B_t, int64_t LDB, PacketBlock<vec, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS> &zmm, int64_t rem_ = 0) {
+    EIGEN_UNUSED_VARIABLE(B_t);
+    EIGEN_UNUSED_VARIABLE(LDB);
+    EIGEN_UNUSED_VARIABLE(zmm);
+    EIGEN_UNUSED_VARIABLE(rem_);
+  }
+
+  /**
+   * aux_startBCastA
+   *
+   * 1-D unroll
+   *      for(startB = 0; startB < endB; startB++)
+   **/
+  template <bool isARowMajor, int64_t unrollM, int64_t unrollN, int64_t endB, int64_t counter, int64_t numLoad>
+  static EIGEN_ALWAYS_INLINE std::enable_if_t<(counter > 0)> aux_startBCastA(
+      Scalar *A_t, int64_t LDA, PacketBlock<vec, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS> &zmm) {
+    constexpr int64_t counterReverse = endB - counter;
+    constexpr int64_t startB = counterReverse;
+
+    zmm.packet[unrollM * unrollN + numLoad + startB] = pload1<vec>(&A_t[idA<isARowMajor>(startB, 0, LDA)]);
+
+    aux_startBCastA<isARowMajor, unrollM, unrollN, endB, counter - 1, numLoad>(A_t, LDA, zmm);
+  }
+
+  template <bool isARowMajor, int64_t unrollM, int64_t unrollN, int64_t endB, int64_t counter, int64_t numLoad>
+  static EIGEN_ALWAYS_INLINE std::enable_if_t<(counter <= 0)> aux_startBCastA(
+      Scalar *A_t, int64_t LDA, PacketBlock<vec, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS> &zmm) {
+    EIGEN_UNUSED_VARIABLE(A_t);
+    EIGEN_UNUSED_VARIABLE(LDA);
+    EIGEN_UNUSED_VARIABLE(zmm);
+  }
+
+  /**
+   * aux_loadB
+   * currK: current K
+   *
+   * 1-D unroll
+   *      for(startM = 0; startM < endM; startM++)
+   **/
+  template <int64_t endM, int64_t counter, int64_t unrollN, int64_t currK, int64_t unrollK, int64_t numLoad,
+            int64_t numBCast, bool rem>
+  static EIGEN_ALWAYS_INLINE std::enable_if_t<(counter > 0)> aux_loadB(
+      Scalar *B_t, int64_t LDB, PacketBlock<vec, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS> &zmm, int64_t rem_ = 0) {
+    EIGEN_UNUSED_VARIABLE(rem_);
+    if ((numLoad / endM + currK < unrollK)) {
+      constexpr int64_t counterReverse = endM - counter;
+      constexpr int64_t startM = counterReverse;
+
+      EIGEN_IF_CONSTEXPR(rem) {
+        zmm.packet[endM * unrollN + (startM + currK * endM) % numLoad] =
+            ploadu<vec>(&B_t[(numLoad / endM + currK) * LDB + startM * PacketSize], remMask<PacketSize>(rem_));
+      }
+      else {
+        zmm.packet[endM * unrollN + (startM + currK * endM) % numLoad] =
+            ploadu<vec>(&B_t[(numLoad / endM + currK) * LDB + startM * PacketSize]);
+      }
+
+      aux_loadB<endM, counter - 1, unrollN, currK, unrollK, numLoad, numBCast, rem>(B_t, LDB, zmm, rem_);
+    }
+  }
+
+  template <int64_t endM, int64_t counter, int64_t unrollN, int64_t currK, int64_t unrollK, int64_t numLoad,
+            int64_t numBCast, bool rem>
+  static EIGEN_ALWAYS_INLINE std::enable_if_t<(counter <= 0)> aux_loadB(
+      Scalar *B_t, int64_t LDB, PacketBlock<vec, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS> &zmm, int64_t rem_ = 0) {
+    EIGEN_UNUSED_VARIABLE(B_t);
+    EIGEN_UNUSED_VARIABLE(LDB);
+    EIGEN_UNUSED_VARIABLE(zmm);
+    EIGEN_UNUSED_VARIABLE(rem_);
+  }
+
+  /**
+   * aux_microKernel
+   *
+   * 3-D unroll
+   *      for(startM = 0; startM < endM; startM++)
+   *        for(startN = 0; startN < endN; startN++)
+   *          for(startK = 0; startK < endK; startK++)
+   **/
+  template <bool isARowMajor, int64_t endM, int64_t endN, int64_t endK, int64_t counter, int64_t numLoad,
+            int64_t numBCast, bool rem>
+  static EIGEN_ALWAYS_INLINE std::enable_if_t<(counter > 0)> aux_microKernel(
+      Scalar *B_t, Scalar *A_t, int64_t LDB, int64_t LDA, PacketBlock<vec, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS> &zmm,
+      int64_t rem_ = 0) {
+    EIGEN_UNUSED_VARIABLE(rem_);
+    constexpr int64_t counterReverse = endM * endN * endK - counter;
+    constexpr int startK = counterReverse / (endM * endN);
+    constexpr int startN = (counterReverse / (endM)) % endN;
+    constexpr int startM = counterReverse % endM;
+
+    EIGEN_IF_CONSTEXPR(startK == 0 && startM == 0 && startN == 0) {
+      gemm::template startLoadB<endM, endN, numLoad, rem>(B_t, LDB, zmm, rem_);
+      gemm::template startBCastA<isARowMajor, endM, endN, numBCast, numLoad>(A_t, LDA, zmm);
+    }
+
+    {
+      // Interleave FMA and Bcast
+      EIGEN_IF_CONSTEXPR(isAdd) {
+        zmm.packet[startN * endM + startM] =
+            pmadd(zmm.packet[endM * endN + numLoad + (startN + startK * endN) % numBCast],
+                  zmm.packet[endM * endN + (startM + startK * endM) % numLoad], zmm.packet[startN * endM + startM]);
+      }
+      else {
+        zmm.packet[startN * endM + startM] =
+            pnmadd(zmm.packet[endM * endN + numLoad + (startN + startK * endN) % numBCast],
+                   zmm.packet[endM * endN + (startM + startK * endM) % numLoad], zmm.packet[startN * endM + startM]);
+      }
+      // Bcast
+      EIGEN_IF_CONSTEXPR(startM == endM - 1 && (numBCast + startN + startK * endN < endK * endN)) {
+        zmm.packet[endM * endN + numLoad + (startN + startK * endN) % numBCast] = pload1<vec>(&A_t[idA<isARowMajor>(
+            (numBCast + startN + startK * endN) % endN, (numBCast + startN + startK * endN) / endN, LDA)]);
+      }
+    }
+
+    // We have updated all accumlators, time to load next set of B's
+    EIGEN_IF_CONSTEXPR((startN == endN - 1) && (startM == endM - 1)) {
+      gemm::template loadB<endM, endN, startK, endK, numLoad, numBCast, rem>(B_t, LDB, zmm, rem_);
+    }
+    aux_microKernel<isARowMajor, endM, endN, endK, counter - 1, numLoad, numBCast, rem>(B_t, A_t, LDB, LDA, zmm, rem_);
+  }
+
+  template <bool isARowMajor, int64_t endM, int64_t endN, int64_t endK, int64_t counter, int64_t numLoad,
+            int64_t numBCast, bool rem>
+  static EIGEN_ALWAYS_INLINE std::enable_if_t<(counter <= 0)> aux_microKernel(
+      Scalar *B_t, Scalar *A_t, int64_t LDB, int64_t LDA, PacketBlock<vec, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS> &zmm,
+      int64_t rem_ = 0) {
+    EIGEN_UNUSED_VARIABLE(B_t);
+    EIGEN_UNUSED_VARIABLE(A_t);
+    EIGEN_UNUSED_VARIABLE(LDB);
+    EIGEN_UNUSED_VARIABLE(LDA);
+    EIGEN_UNUSED_VARIABLE(zmm);
+    EIGEN_UNUSED_VARIABLE(rem_);
+  }
+
+  /********************************************************
+   * Wrappers for aux_XXXX to hide counter parameter
+   ********************************************************/
+
+  template <int64_t endM, int64_t endN>
+  static EIGEN_ALWAYS_INLINE void setzero(PacketBlock<vec, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS> &zmm) {
+    aux_setzero<endM, endN, endM * endN>(zmm);
+  }
+
+  /**
+   * Ideally the compiler folds these into vaddp{s,d} with an embedded memory load.
+   */
+  template <int64_t endM, int64_t endN, bool rem = false>
+  static EIGEN_ALWAYS_INLINE void updateC(Scalar *C_arr, int64_t LDC,
+                                          PacketBlock<vec, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS> &zmm,
+                                          int64_t rem_ = 0) {
+    EIGEN_UNUSED_VARIABLE(rem_);
+    aux_updateC<endM, endN, endM * endN, rem>(C_arr, LDC, zmm, rem_);
+  }
+
+  template <int64_t endM, int64_t endN, bool rem = false>
+  static EIGEN_ALWAYS_INLINE void storeC(Scalar *C_arr, int64_t LDC,
+                                         PacketBlock<vec, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS> &zmm,
+                                         int64_t rem_ = 0) {
+    EIGEN_UNUSED_VARIABLE(rem_);
+    aux_storeC<endM, endN, endM * endN, rem>(C_arr, LDC, zmm, rem_);
+  }
+
+  /**
+   * Use numLoad registers for loading B at start of microKernel
+   */
+  template <int64_t unrollM, int64_t unrollN, int64_t endL, bool rem>
+  static EIGEN_ALWAYS_INLINE void startLoadB(Scalar *B_t, int64_t LDB,
+                                             PacketBlock<vec, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS> &zmm,
+                                             int64_t rem_ = 0) {
+    EIGEN_UNUSED_VARIABLE(rem_);
+    aux_startLoadB<unrollM, unrollN, endL, endL, rem>(B_t, LDB, zmm, rem_);
+  }
+
+  /**
+   * Use numBCast registers for broadcasting A at start of microKernel
+   */
+  template <bool isARowMajor, int64_t unrollM, int64_t unrollN, int64_t endB, int64_t numLoad>
+  static EIGEN_ALWAYS_INLINE void startBCastA(Scalar *A_t, int64_t LDA,
+                                              PacketBlock<vec, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS> &zmm) {
+    aux_startBCastA<isARowMajor, unrollM, unrollN, endB, endB, numLoad>(A_t, LDA, zmm);
+  }
+
+  /**
+   * Loads next set of B into vector registers between each K unroll.
+   */
+  template <int64_t endM, int64_t unrollN, int64_t currK, int64_t unrollK, int64_t numLoad, int64_t numBCast, bool rem>
+  static EIGEN_ALWAYS_INLINE void loadB(Scalar *B_t, int64_t LDB,
+                                        PacketBlock<vec, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS> &zmm,
+                                        int64_t rem_ = 0) {
+    EIGEN_UNUSED_VARIABLE(rem_);
+    aux_loadB<endM, endM, unrollN, currK, unrollK, numLoad, numBCast, rem>(B_t, LDB, zmm, rem_);
+  }
+
+  /**
+   * Generates a microkernel for gemm (row-major) with unrolls {1,2,4,8}x{U1,U2,U3} to compute C -= A*B.
+   * A matrix can be row/col-major. B matrix is assumed row-major.
+   *
+   * isARowMajor: is A row major
+   * endM: Number registers per row
+   * endN: Number of rows
+   * endK: Loop unroll for K.
+   * numLoad: Number of registers for loading B.
+   * numBCast: Number of registers for broadcasting A.
+   *
+   * Ex: microkernel<isARowMajor,0,3,0,4,0,4,6,2>: 8x48 unroll (24 accumulators), k unrolled 4 times,
+   * 6 register for loading B, 2 for broadcasting A.
+   *
+   * Note: Ideally the microkernel should not have any register spilling.
+   * The avx instruction counts should be:
+   *   - endK*endN vbroadcasts{s,d}
+   *   - endK*endM vmovup{s,d}
+   *   - endK*endN*endM FMAs
+   *
+   * From testing, there are no register spills with clang. There are register spills with GNU, which
+   * causes a performance hit.
+   */
+  template <bool isARowMajor, int64_t endM, int64_t endN, int64_t endK, int64_t numLoad, int64_t numBCast,
+            bool rem = false>
+  static EIGEN_ALWAYS_INLINE void microKernel(Scalar *B_t, Scalar *A_t, int64_t LDB, int64_t LDA,
+                                              PacketBlock<vec, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS> &zmm,
+                                              int64_t rem_ = 0) {
+    EIGEN_UNUSED_VARIABLE(rem_);
+    aux_microKernel<isARowMajor, endM, endN, endK, endM * endN * endK, numLoad, numBCast, rem>(B_t, A_t, LDB, LDA, zmm,
+                                                                                               rem_);
+  }
+};
+}  // namespace unrolls
+
+#endif  // EIGEN_CORE_ARCH_AVX512_TRSM_UNROLLS_H
diff --git a/libs/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h b/libs/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h
index 3304127..62a7429 100644
--- a/libs/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h
+++ b/libs/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_TYPE_CASTING_AVX512_H
 #define EIGEN_TYPE_CASTING_AVX512_H
 
+#include "../../InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
@@ -30,6 +32,56 @@ template<> EIGEN_STRONG_INLINE Packet16f preinterpret<Packet16f, Packet16i>(cons
   return _mm512_castsi512_ps(a);
 }
 
+template<> EIGEN_STRONG_INLINE Packet8d preinterpret<Packet8d, Packet16f>(const Packet16f& a) {
+  return _mm512_castps_pd(a);
+}
+
+template<> EIGEN_STRONG_INLINE Packet16f preinterpret<Packet16f, Packet8d>(const Packet8d& a) {
+  return _mm512_castpd_ps(a);
+}
+
+template<> EIGEN_STRONG_INLINE Packet8f preinterpret<Packet8f, Packet16f>(const Packet16f& a) {
+  return _mm512_castps512_ps256(a);
+}
+
+template<> EIGEN_STRONG_INLINE Packet4f preinterpret<Packet4f, Packet16f>(const Packet16f& a) {
+  return _mm512_castps512_ps128(a);
+}
+
+template<> EIGEN_STRONG_INLINE Packet4d preinterpret<Packet4d, Packet8d>(const Packet8d& a) {
+  return _mm512_castpd512_pd256(a);
+}
+
+template<> EIGEN_STRONG_INLINE Packet2d preinterpret<Packet2d, Packet8d>(const Packet8d& a) {
+  return _mm512_castpd512_pd128(a);
+}
+
+template<> EIGEN_STRONG_INLINE Packet16f preinterpret<Packet16f, Packet8f>(const Packet8f& a) {
+  return _mm512_castps256_ps512(a);
+}
+
+template<> EIGEN_STRONG_INLINE Packet16f preinterpret<Packet16f, Packet4f>(const Packet4f& a) {
+  return _mm512_castps128_ps512(a);
+}
+
+template<> EIGEN_STRONG_INLINE Packet8d preinterpret<Packet8d, Packet4d>(const Packet4d& a) {
+  return _mm512_castpd256_pd512(a);
+}
+
+template<> EIGEN_STRONG_INLINE Packet8d preinterpret<Packet8d, Packet2d>(const Packet2d& a) {
+  return _mm512_castpd128_pd512(a);
+}
+
+template<> EIGEN_STRONG_INLINE Packet16f preinterpret<Packet16f, Packet16f>(const Packet16f& a) {
+  return a;
+}
+
+template<> EIGEN_STRONG_INLINE Packet8d preinterpret<Packet8d, Packet8d>(const Packet8d& a) {
+  return a;
+}
+
+#ifndef EIGEN_VECTORIZE_AVX512FP16
+
 template <>
 struct type_casting_traits<half, float> {
   enum {
@@ -56,6 +108,8 @@ template<> EIGEN_STRONG_INLINE Packet16h pcast<Packet16f, Packet16h>(const Packe
   return float2half(a);
 }
 
+#endif
+
 template <>
 struct type_casting_traits<bfloat16, float> {
   enum {
@@ -82,6 +136,77 @@ template<> EIGEN_STRONG_INLINE Packet16bf pcast<Packet16f, Packet16bf>(const Pac
   return F32ToBf16(a);
 }
 
+#ifdef EIGEN_VECTORIZE_AVX512FP16
+
+template <>
+struct type_casting_traits<half, float> {
+  enum {
+    VectorizedCast = 1,
+    SrcCoeffRatio = 1,
+    TgtCoeffRatio = 2
+  };
+};
+
+template <>
+struct type_casting_traits<float, half> {
+  enum {
+    VectorizedCast = 1,
+    SrcCoeffRatio = 2,
+    TgtCoeffRatio = 1
+  };
+};
+
+template <>
+EIGEN_STRONG_INLINE Packet16f pcast<Packet32h, Packet16f>(const Packet32h& a) {
+  // Discard second-half of input.
+  Packet16h low = _mm256_castpd_si256(_mm512_extractf64x4_pd(_mm512_castph_pd(a), 0));
+  return _mm512_cvtxph_ps(_mm256_castsi256_ph(low));
+}
+
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pcast<Packet16f, Packet32h>(const Packet16f& a, const Packet16f& b) {
+  __m512d result = _mm512_undefined_pd();
+  result = _mm512_insertf64x4(result, _mm256_castsi256_pd(_mm512_cvtps_ph(a, _MM_FROUND_TO_NEAREST_INT|_MM_FROUND_NO_EXC)), 0);
+  result = _mm512_insertf64x4(result, _mm256_castsi256_pd(_mm512_cvtps_ph(b, _MM_FROUND_TO_NEAREST_INT|_MM_FROUND_NO_EXC)), 1);
+  return _mm512_castpd_ph(result);
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet8f pcast<Packet16h, Packet8f>(const Packet16h& a) {
+  // Discard second-half of input.
+  Packet8h low = _mm_castps_si128(_mm256_extractf32x4_ps(_mm256_castsi256_ps(a), 0));
+  return _mm256_cvtxph_ps(_mm_castsi128_ph(low));
+}
+
+
+template <>
+EIGEN_STRONG_INLINE Packet16h pcast<Packet8f, Packet16h>(const Packet8f& a, const Packet8f& b) {
+  __m256d result = _mm256_undefined_pd();
+  result = _mm256_insertf64x2(result, _mm_castsi128_pd(_mm256_cvtps_ph(a, _MM_FROUND_TO_NEAREST_INT|_MM_FROUND_NO_EXC)), 0);
+  result = _mm256_insertf64x2(result, _mm_castsi128_pd(_mm256_cvtps_ph(b, _MM_FROUND_TO_NEAREST_INT|_MM_FROUND_NO_EXC)), 1);
+  return _mm256_castpd_si256(result);
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet4f pcast<Packet8h, Packet4f>(const Packet8h& a) {
+  Packet8f full = _mm256_cvtxph_ps(_mm_castsi128_ph(a));
+  // Discard second-half of input.
+  return _mm256_extractf32x4_ps(full, 0);
+}
+
+
+template <>
+EIGEN_STRONG_INLINE Packet8h pcast<Packet4f, Packet8h>(const Packet4f& a, const Packet4f& b) {
+  __m256 result = _mm256_undefined_ps();
+  result = _mm256_insertf128_ps(result, a, 0);
+  result = _mm256_insertf128_ps(result, b, 1);
+  return _mm256_cvtps_ph(result, _MM_FROUND_TO_NEAREST_INT|_MM_FROUND_NO_EXC);
+}
+
+
+#endif
+
 } // end namespace internal
 
 } // end namespace Eigen
diff --git a/libs/eigen/Eigen/src/Core/arch/AltiVec/Complex.h b/libs/eigen/Eigen/src/Core/arch/AltiVec/Complex.h
index f424f11..46812f9 100644
--- a/libs/eigen/Eigen/src/Core/arch/AltiVec/Complex.h
+++ b/libs/eigen/Eigen/src/Core/arch/AltiVec/Complex.h
@@ -11,6 +11,8 @@
 #ifndef EIGEN_COMPLEX32_ALTIVEC_H
 #define EIGEN_COMPLEX32_ALTIVEC_H
 
+#include "../../InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
@@ -100,6 +102,7 @@ template<> struct packet_traits<std::complex<float> >  : default_packet_traits
     HasAbs2   = 0,
     HasMin    = 0,
     HasMax    = 0,
+    HasSqrt   = 1,
 #ifdef __VSX__
     HasBlend  = 1,
 #endif
@@ -112,53 +115,99 @@ template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type;
 template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>&  from)
 {
   Packet2cf res;
+#ifdef __VSX__
+  // Load a single std::complex<float> from memory and duplicate
+  //
+  // Using pload would read past the end of the reference in this case
+  // Using vec_xl_len + vec_splat, generates poor assembly
+  __asm__ ("lxvdsx %x0,%y1" : "=wa" (res.v) : "Z" (from));
+#else
   if((std::ptrdiff_t(&from) % 16) == 0)
     res.v = pload<Packet4f>((const float *)&from);
   else
     res.v = ploadu<Packet4f>((const float *)&from);
   res.v = vec_perm(res.v, res.v, p16uc_PSET64_HI);
+#endif
   return res;
 }
 
 template<> EIGEN_STRONG_INLINE Packet2cf pload<Packet2cf>(const std::complex<float>*        from) { return Packet2cf(pload<Packet4f>((const float *) from)); }
 template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>*       from) { return Packet2cf(ploadu<Packet4f>((const float*) from)); }
+template<> EIGEN_ALWAYS_INLINE Packet2cf pload_partial<Packet2cf>(const std::complex<float>* from, const Index n, const Index offset)
+{
+  return Packet2cf(pload_partial<Packet4f>((const float *) from, n * 2, offset * 2));
+}
+template<> EIGEN_ALWAYS_INLINE Packet2cf ploadu_partial<Packet2cf>(const std::complex<float>* from, const Index n)
+{
+  return Packet2cf(ploadu_partial<Packet4f>((const float*) from, n * 2));
+}
 template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>*     from) { return pset1<Packet2cf>(*from); }
 
 template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> *   to, const Packet2cf& from) { pstore((float*)to, from.v); }
 template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> *   to, const Packet2cf& from) { pstoreu((float*)to, from.v); }
+template<> EIGEN_ALWAYS_INLINE void pstore_partial <std::complex<float> >(std::complex<float> *  to, const Packet2cf& from, const Index n, const Index offset) { pstore_partial((float*)to, from.v, n * 2, offset * 2); }
+template<> EIGEN_ALWAYS_INLINE void pstoreu_partial<std::complex<float> >(std::complex<float> *  to, const Packet2cf& from, const Index n) { pstoreu_partial((float*)to, from.v, n * 2); }
 
-EIGEN_STRONG_INLINE Packet2cf pload2(const std::complex<float>* from0, const std::complex<float>* from1)
+EIGEN_STRONG_INLINE Packet2cf pload2(const std::complex<float>& from0, const std::complex<float>& from1)
 {
   Packet4f res0, res1;
 #ifdef __VSX__
-  __asm__ ("lxsdx %x0,%y1" : "=wa" (res0) : "Z" (*from0));
-  __asm__ ("lxsdx %x0,%y1" : "=wa" (res1) : "Z" (*from1));
+  // Load two std::complex<float> from memory and combine
+  __asm__ ("lxsdx %x0,%y1" : "=wa" (res0) : "Z" (from0));
+  __asm__ ("lxsdx %x0,%y1" : "=wa" (res1) : "Z" (from1));
 #ifdef _BIG_ENDIAN
   __asm__ ("xxpermdi %x0, %x1, %x2, 0" : "=wa" (res0) : "wa" (res0), "wa" (res1));
 #else
   __asm__ ("xxpermdi %x0, %x2, %x1, 0" : "=wa" (res0) : "wa" (res0), "wa" (res1));
 #endif
 #else
-  *reinterpret_cast<std::complex<float> *>(&res0) = *from0;
-  *reinterpret_cast<std::complex<float> *>(&res1) = *from1;
+  *reinterpret_cast<std::complex<float> *>(&res0) = from0;
+  *reinterpret_cast<std::complex<float> *>(&res1) = from1;
   res0 = vec_perm(res0, res1, p16uc_TRANSPOSE64_HI);
 #endif
   return Packet2cf(res0);
 }
 
-template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, Index stride)
+template<> EIGEN_ALWAYS_INLINE Packet2cf pload_ignore<Packet2cf>(const std::complex<float>*     from)
 {
-  EIGEN_ALIGN16 std::complex<float> af[2];
-  af[0] = from[0*stride];
-  af[1] = from[1*stride];
-  return pload<Packet2cf>(af);
+  Packet2cf res;
+  res.v = pload_ignore<Packet4f>(reinterpret_cast<const float*>(from));
+  return res;
 }
-template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, Index stride)
+
+template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pgather_complex_size2(const Scalar* from, Index stride, const Index n = 2)
 {
-  EIGEN_ALIGN16 std::complex<float> af[2];
-  pstore<std::complex<float> >((std::complex<float> *) af, from);
-  to[0*stride] = af[0];
-  to[1*stride] = af[1];
+  eigen_assert(n <= unpacket_traits<Packet>::size && "number of elements will gather past end of packet");
+  EIGEN_ALIGN16 Scalar af[2];
+  for (Index i = 0; i < n; i++) {
+    af[i] = from[i*stride];
+  }
+  return pload_ignore<Packet>(af);
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, Index stride)
+{
+  return pgather_complex_size2<std::complex<float>, Packet2cf>(from, stride);
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet2cf pgather_partial<std::complex<float>, Packet2cf>(const std::complex<float>* from, Index stride, const Index n)
+{
+  return pgather_complex_size2<std::complex<float>, Packet2cf>(from, stride, n);
+}
+template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pscatter_complex_size2(Scalar* to, const Packet& from, Index stride, const Index n = 2)
+{
+  eigen_assert(n <= unpacket_traits<Packet>::size && "number of elements will scatter past end of packet");
+  EIGEN_ALIGN16 Scalar af[2];
+  pstore<Scalar>((Scalar *) af, from);
+  for (Index i = 0; i < n; i++) {
+    to[i*stride] = af[i];
+  }
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, Index stride)
+{
+  pscatter_complex_size2<std::complex<float>, Packet2cf>(to, from, stride);
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pscatter_partial<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, Index stride, const Index n)
+{
+  pscatter_complex_size2<std::complex<float>, Packet2cf>(to, from, stride, n);
 }
 
 template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(a.v + b.v); }
@@ -184,7 +233,7 @@ template<> EIGEN_STRONG_INLINE std::complex<float>  pfirst<Packet2cf>(const Pack
 template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a)
 {
   Packet4f rev_a;
-  rev_a = vec_perm(a.v, a.v, p16uc_COMPLEX32_REV2);
+  rev_a = vec_sld(a.v, a.v, 8);
   return Packet2cf(rev_a);
 }
 
@@ -210,10 +259,7 @@ EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
 
 template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
 {
-  // TODO optimize it for AltiVec
-  Packet2cf res = pmul(a, pconj(b));
-  Packet4f s = pmul<Packet4f>(b.v, b.v);
-  return Packet2cf(pdiv(res.v, padd<Packet4f>(s, vec_perm(s, s, p16uc_COMPLEX32_REV))));
+  return pdiv_complex(a, b);
 }
 
 template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& x)
@@ -223,8 +269,13 @@ template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& x
 
 EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2cf,2>& kernel)
 {
+#ifdef __VSX__
+  Packet4f tmp = reinterpret_cast<Packet4f>(vec_mergeh(reinterpret_cast<Packet2d>(kernel.packet[0].v), reinterpret_cast<Packet2d>(kernel.packet[1].v)));
+  kernel.packet[1].v = reinterpret_cast<Packet4f>(vec_mergel(reinterpret_cast<Packet2d>(kernel.packet[0].v), reinterpret_cast<Packet2d>(kernel.packet[1].v)));
+#else
   Packet4f tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_HI);
   kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_LO);
+#endif
   kernel.packet[0].v = tmp;
 }
 
@@ -320,6 +371,7 @@ template<> struct packet_traits<std::complex<double> >  : default_packet_traits
     HasAbs2   = 0,
     HasMin    = 0,
     HasMax    = 0,
+    HasSqrt   = 1,
     HasSetLinear = 0
   };
 };
@@ -328,17 +380,35 @@ template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type
 
 template<> EIGEN_STRONG_INLINE Packet1cd pload <Packet1cd>(const std::complex<double>* from) { return Packet1cd(pload<Packet2d>((const double*)from)); }
 template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) { return Packet1cd(ploadu<Packet2d>((const double*)from)); }
+template<> EIGEN_ALWAYS_INLINE Packet1cd pload_partial<Packet1cd>(const std::complex<double>* from, const Index n, const Index offset)
+{
+  return Packet1cd(pload_partial<Packet2d>((const double*)from, n * 2, offset * 2));
+}
+template<> EIGEN_ALWAYS_INLINE Packet1cd ploadu_partial<Packet1cd>(const std::complex<double>* from, const Index n)
+{
+  return Packet1cd(ploadu_partial<Packet2d>((const double*)from, n * 2));
+}
 template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> *   to, const Packet1cd& from) { pstore((double*)to, from.v); }
 template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> *   to, const Packet1cd& from) { pstoreu((double*)to, from.v); }
+template<> EIGEN_ALWAYS_INLINE void pstore_partial <std::complex<double> >(std::complex<double> *  to, const Packet1cd& from, const Index n, const Index offset) { pstore_partial((double*)to, from.v, n * 2, offset * 2); }
+template<> EIGEN_ALWAYS_INLINE void pstoreu_partial<std::complex<double> >(std::complex<double> *  to, const Packet1cd& from, const Index n) { pstoreu_partial((double*)to, from.v, n * 2); }
 
 template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>&  from)
 { /* here we really have to use unaligned loads :( */ return ploadu<Packet1cd>(&from); }
 
-template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, Index)
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, Index)
 {
   return pload<Packet1cd>(from);
 }
-template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1cd>(std::complex<double>* to, const Packet1cd& from, Index)
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet1cd pgather_partial<std::complex<double>, Packet1cd>(const std::complex<double>* from, Index, const Index)
+{
+  return pload<Packet1cd>(from);
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pscatter<std::complex<double>, Packet1cd>(std::complex<double>* to, const Packet1cd& from, Index)
+{
+  pstore<std::complex<double> >(to, from);
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pscatter_partial<std::complex<double>, Packet1cd>(std::complex<double>* to, const Packet1cd& from, Index, const Index)
 {
   pstore<std::complex<double> >(to, from);
 }
@@ -359,7 +429,7 @@ template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::c
 
 template<> EIGEN_STRONG_INLINE std::complex<double>  pfirst<Packet1cd>(const Packet1cd& a)
 {
-  EIGEN_ALIGN16 std::complex<double> res[2];
+  EIGEN_ALIGN16 std::complex<double> res[1];
   pstore<std::complex<double> >(res, a);
 
   return res[0];
@@ -375,10 +445,7 @@ EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
 
 template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
 {
-  // TODO optimize it for AltiVec
-  Packet1cd res = pmul(a,pconj(b));
-  Packet2d s = pmul<Packet2d>(b.v, b.v);
-  return Packet1cd(pdiv(res.v, padd<Packet2d>(s, vec_perm(s, s, p16uc_REVERSE64))));
+  return pdiv_complex(a, b);
 }
 
 EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x)
@@ -388,8 +455,8 @@ EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x)
 
 EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet1cd,2>& kernel)
 {
-  Packet2d tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_HI);
-  kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_LO);
+  Packet2d tmp = vec_mergeh(kernel.packet[0].v, kernel.packet[1].v);
+  kernel.packet[1].v = vec_mergel(kernel.packet[0].v, kernel.packet[1].v);
   kernel.packet[0].v = tmp;
 }
 
diff --git a/libs/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h b/libs/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h
index 3a7a329..6f48d98 100644
--- a/libs/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h
+++ b/libs/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h
@@ -12,73 +12,117 @@
 #ifndef EIGEN_MATH_FUNCTIONS_ALTIVEC_H
 #define EIGEN_MATH_FUNCTIONS_ALTIVEC_H
 
+#include "../../InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
 
-template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet4f plog<Packet4f>(const Packet4f& _x)
 {
   return plog_float(_x);
 }
 
-template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet4f pexp<Packet4f>(const Packet4f& _x)
 {
   return pexp_float(_x);
 }
 
-template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet4f psin<Packet4f>(const Packet4f& _x)
 {
   return psin_float(_x);
 }
 
-template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet4f pcos<Packet4f>(const Packet4f& _x)
 {
   return pcos_float(_x);
 }
 
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet4f pacos<Packet4f>(const Packet4f& _x)
+{
+  return pacos_float(_x);
+}
+
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet4f pasin<Packet4f>(const Packet4f& _x)
+{
+  return pasin_float(_x);
+}
+
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet4f patan<Packet4f>(const Packet4f& _x)
+{
+  return patan_float(_x);
+}
+
+#ifdef __VSX__
 #ifndef EIGEN_COMP_CLANG
-template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet4f prsqrt<Packet4f>(const Packet4f& x)
 {
   return  vec_rsqrt(x);
 }
-#endif
 
-#ifdef __VSX__
-#ifndef EIGEN_COMP_CLANG
-template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet2d prsqrt<Packet2d>(const Packet2d& x)
 {
   return  vec_rsqrt(x);
 }
+
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet2d patan<Packet2d>(const Packet2d& _x)
+{
+  return patan_double(_x);
+}
 #endif
 
-template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet4f psqrt<Packet4f>(const Packet4f& x)
 {
   return  vec_sqrt(x);
 }
 
-template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet2d psqrt<Packet2d>(const Packet2d& x)
 {
   return  vec_sqrt(x);
 }
 
-template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet2d pexp<Packet2d>(const Packet2d& _x)
 {
   return pexp_double(_x);
 }
+
+template<> EIGEN_STRONG_INLINE Packet8bf psqrt<Packet8bf> (const Packet8bf& a){
+  BF16_TO_F32_UNARY_OP_WRAPPER(psqrt<Packet4f>, a);
+}
+
+#ifndef EIGEN_COMP_CLANG
+template<> EIGEN_STRONG_INLINE Packet8bf prsqrt<Packet8bf> (const Packet8bf& a){
+  BF16_TO_F32_UNARY_OP_WRAPPER(prsqrt<Packet4f>, a);
+}
+#endif
+#else
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet4f psqrt<Packet4f>(const Packet4f& x)
+{
+  Packet4f a;
+  for (Index i = 0; i < packet_traits<float>::size; i++) {
+    a[i] = numext::sqrt(x[i]);
+  }
+  return a;
+}
 #endif
 
 // Hyperbolic Tangent function.
 template <>
-EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f
 ptanh<Packet4f>(const Packet4f& x) {
   return internal::generic_fast_tanh_float(x);
 }
diff --git a/libs/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h b/libs/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h
index 3f79b97..2429c81 100644
--- a/libs/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h
+++ b/libs/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h
@@ -17,24 +17,35 @@
 
 #include "MatrixProductCommon.h"
 
-// Since LLVM doesn't support dynamic dispatching, force either always MMA or VSX
-#if EIGEN_COMP_LLVM
-#if !defined(EIGEN_ALTIVEC_DISABLE_MMA) && !defined(EIGEN_ALTIVEC_MMA_ONLY)
-#ifdef __MMA__
-#define EIGEN_ALTIVEC_MMA_ONLY
-#else
-#define EIGEN_ALTIVEC_DISABLE_MMA
-#endif
-#endif
+#if !defined(EIGEN_ALTIVEC_DISABLE_MMA)
+#define EIGEN_ALTIVEC_DISABLE_MMA 0
 #endif
 
-#ifdef __has_builtin
+// Check for MMA builtin support. 
+#if !EIGEN_ALTIVEC_DISABLE_MMA && defined(__has_builtin)
 #if __has_builtin(__builtin_mma_assemble_acc)
-  #define ALTIVEC_MMA_SUPPORT
+  #define EIGEN_ALTIVEC_MMA_SUPPORT
 #endif
 #endif
 
-#if defined(ALTIVEC_MMA_SUPPORT) && !defined(EIGEN_ALTIVEC_DISABLE_MMA)
+// Check if and how we should actually use MMA if supported.
+#if defined(EIGEN_ALTIVEC_MMA_SUPPORT)
+
+#if !defined(EIGEN_ALTIVEC_ENABLE_MMA_DYNAMIC_DISPATCH)
+#define EIGEN_ALTIVEC_ENABLE_MMA_DYNAMIC_DISPATCH 0
+#endif
+
+// Check if we want to enable dynamic dispatch. Not supported by LLVM.
+#if EIGEN_ALTIVEC_ENABLE_MMA_DYNAMIC_DISPATCH && !EIGEN_COMP_LLVM
+#define EIGEN_ALTIVEC_MMA_DYNAMIC_DISPATCH 1
+// Otherwise, use MMA by default if available.
+#elif defined(__MMA__)
+#define EIGEN_ALTIVEC_MMA_ONLY 1
+#endif
+
+#endif // EIGEN_ALTIVEC_MMA_SUPPORT
+
+#if defined(EIGEN_ALTIVEC_MMA_ONLY) || defined(EIGEN_ALTIVEC_MMA_DYNAMIC_DISPATCH)
   #include "MatrixProductMMA.h"
 #endif
 
@@ -43,6 +54,8 @@
  * - Check StorageOrder on dhs_pack (the innermost second loop seems unvectorized when it could). *
  * - Check the possibility of transposing as GETREAL and GETIMAG when needed.                     *
  **************************************************************************************************/
+#include "../../InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
@@ -78,6 +91,20 @@ struct quad_traits<double>
   };
 };
 
+template<>
+struct quad_traits<bfloat16>
+{
+  typedef Packet8bf                       vectortype;
+  typedef PacketBlock<vectortype,4>             type;
+  typedef vectortype                         rhstype;
+  enum
+  {
+    vectorsize = packet_traits<bfloat16>::size,
+    size = 8,
+    rows = 4
+  };
+};
+
 // MatrixProduct decomposes real/imaginary vectors into a real vector and an imaginary vector, this turned out
 // to be faster than Eigen's usual approach of having real/imaginary pairs on a single vector. This constants then
 // are responsible to extract from convert between Eigen's and MatrixProduct approach.
@@ -91,12 +118,6 @@ const static Packet16uc p16uc_GETIMAG32 = {  4,  5,  6,  7,
                                             12, 13, 14, 15,
                                             20, 21, 22, 23,
                                             28, 29, 30, 31};
-const static Packet16uc p16uc_GETREAL64 = {  0,  1,  2,  3,  4,  5,  6,  7,
-                                            16, 17, 18, 19, 20, 21, 22, 23};
-
-//[a,ai],[b,bi] = [ai,bi]
-const static Packet16uc p16uc_GETIMAG64 = {  8,  9, 10, 11, 12, 13, 14, 15,
-                                            24, 25, 26, 27, 28, 29, 30, 31};
 
 /*********************************************
  * Single precision real and complex packing *
@@ -116,7 +137,7 @@ const static Packet16uc p16uc_GETIMAG64 = {  8,  9, 10, 11, 12, 13, 14, 15,
  * reason why packing for complex is broken down into several different parts, also the reason why we endup having a
  * float32/64 and complex float32/64 version.
  **/
-template<typename Scalar, typename Index, int StorageOrder>
+template<typename Scalar, int StorageOrder>
 EIGEN_ALWAYS_INLINE std::complex<Scalar> getAdjointVal(Index i, Index j, const_blas_data_mapper<std::complex<Scalar>, Index, StorageOrder>& dt)
 {
   std::complex<Scalar> v;
@@ -135,7 +156,7 @@ EIGEN_ALWAYS_INLINE std::complex<Scalar> getAdjointVal(Index i, Index j, const_b
   return v;
 }
 
-template<typename Scalar, typename Index, int StorageOrder, int N>
+template<typename Scalar, int StorageOrder, int N>
 EIGEN_STRONG_INLINE void symm_pack_complex_rhs_helper(std::complex<Scalar>* blockB, const std::complex<Scalar>* _rhs, Index rhsStride, Index rows, Index cols, Index k2)
 {
   const Index depth = k2 + rows;
@@ -153,7 +174,7 @@ EIGEN_STRONG_INLINE void symm_pack_complex_rhs_helper(std::complex<Scalar>* bloc
     {
       for(Index k = 0; k < vectorSize; k++)
       {
-        std::complex<Scalar> v = getAdjointVal<Scalar, Index, StorageOrder>(i, j + k, rhs);
+        std::complex<Scalar> v = getAdjointVal<Scalar, StorageOrder>(i, j + k, rhs);
 
         blockBf[rir + k] = v.real();
         blockBf[rii + k] = v.imag();
@@ -164,35 +185,34 @@ EIGEN_STRONG_INLINE void symm_pack_complex_rhs_helper(std::complex<Scalar>* bloc
 
     rir += vectorDelta;
   }
-  if (j < cols)
+
+  for(; j < cols; j++)
   {
-    rii = rir + ((cols - j) * rows);
+    rii = rir + rows;
 
     for(Index i = k2; i < depth; i++)
     {
-      Index k = j;
-      for(; k < cols; k++)
-      {
-        std::complex<Scalar> v = getAdjointVal<Scalar, Index, StorageOrder>(i, k, rhs);
+      std::complex<Scalar> v = getAdjointVal<Scalar, StorageOrder>(i, j, rhs);
 
-        blockBf[rir] = v.real();
-        blockBf[rii] = v.imag();
+      blockBf[rir] = v.real();
+      blockBf[rii] = v.imag();
 
-        rir += 1;
-        rii += 1;
-      }
+      rir += 1;
+      rii += 1;
     }
+
+    rir += rows;
   }
 }
 
-template<typename Scalar, typename Index, int StorageOrder>
+template<typename Scalar, int StorageOrder>
 EIGEN_STRONG_INLINE void symm_pack_complex_lhs_helper(std::complex<Scalar>* blockA, const std::complex<Scalar>* _lhs, Index lhsStride, Index cols, Index rows)
 {
   const Index depth = cols;
   const_blas_data_mapper<std::complex<Scalar>, Index, StorageOrder> lhs(_lhs, lhsStride);
   const Index vectorSize = quad_traits<Scalar>::vectorsize;
   const Index vectorDelta = vectorSize * depth;
-  Scalar* blockAf = (Scalar *)(blockA);
+  Scalar* blockAf = reinterpret_cast<Scalar *>(blockA);
 
   Index rir = 0, rii, j = 0;
   for(; j + vectorSize <= rows; j+=vectorSize)
@@ -203,7 +223,7 @@ EIGEN_STRONG_INLINE void symm_pack_complex_lhs_helper(std::complex<Scalar>* bloc
     {
       for(Index k = 0; k < vectorSize; k++)
       {
-        std::complex<Scalar> v = getAdjointVal<Scalar, Index, StorageOrder>(j+k, i, lhs);
+        std::complex<Scalar> v = getAdjointVal<Scalar, StorageOrder>(j+k, i, lhs);
 
         blockAf[rir + k] = v.real();
         blockAf[rii + k] = v.imag();
@@ -224,7 +244,7 @@ EIGEN_STRONG_INLINE void symm_pack_complex_lhs_helper(std::complex<Scalar>* bloc
       Index k = j;
       for(; k < rows; k++)
       {
-        std::complex<Scalar> v = getAdjointVal<Scalar, Index, StorageOrder>(k, i, lhs);
+        std::complex<Scalar> v = getAdjointVal<Scalar, StorageOrder>(k, i, lhs);
 
         blockAf[rir] = v.real();
         blockAf[rii] = v.imag();
@@ -236,7 +256,7 @@ EIGEN_STRONG_INLINE void symm_pack_complex_lhs_helper(std::complex<Scalar>* bloc
   }
 }
 
-template<typename Scalar, typename Index, int StorageOrder, int N>
+template<typename Scalar, int StorageOrder, int N>
 EIGEN_STRONG_INLINE void symm_pack_rhs_helper(Scalar* blockB, const Scalar* _rhs, Index rhsStride, Index rows, Index cols, Index k2)
 {
   const Index depth = k2 + rows;
@@ -260,24 +280,20 @@ EIGEN_STRONG_INLINE void symm_pack_rhs_helper(Scalar* blockB, const Scalar* _rhs
     }
   }
 
-  if (j < cols)
+  for(; j < cols; j++)
   {
     for(Index i = k2; i < depth; i++)
     {
-      Index k = j;
-      for(; k < cols; k++)
-      {
-        if(k <= i)
-          blockB[ri] = rhs(i, k);
-        else
-          blockB[ri] = rhs(k, i);
-        ri += 1;
-      }
+      if(j <= i)
+        blockB[ri] = rhs(i, j);
+      else
+        blockB[ri] = rhs(j, i);
+      ri += 1;
     }
   }
 }
 
-template<typename Scalar, typename Index, int StorageOrder>
+template<typename Scalar, int StorageOrder>
 EIGEN_STRONG_INLINE void symm_pack_lhs_helper(Scalar* blockA, const Scalar* _lhs, Index lhsStride, Index cols, Index rows)
 {
   const Index depth = cols;
@@ -324,7 +340,7 @@ struct symm_pack_rhs<std::complex<float>, Index, nr, StorageOrder>
 {
   void operator()(std::complex<float>* blockB, const std::complex<float>* _rhs, Index rhsStride, Index rows, Index cols, Index k2)
   {
-    symm_pack_complex_rhs_helper<float, Index, StorageOrder, 1>(blockB, _rhs, rhsStride, rows, cols, k2);
+    symm_pack_complex_rhs_helper<float, StorageOrder, 1>(blockB, _rhs, rhsStride, rows, cols, k2);
   }
 };
 
@@ -333,7 +349,7 @@ struct symm_pack_lhs<std::complex<float>, Index, Pack1, Pack2_dummy, StorageOrde
 {
   void operator()(std::complex<float>* blockA, const std::complex<float>* _lhs, Index lhsStride, Index cols, Index rows)
   {
-    symm_pack_complex_lhs_helper<float, Index, StorageOrder>(blockA, _lhs, lhsStride, cols, rows);
+    symm_pack_complex_lhs_helper<float, StorageOrder>(blockA, _lhs, lhsStride, cols, rows);
   }
 };
 
@@ -344,7 +360,7 @@ struct symm_pack_rhs<std::complex<double>, Index, nr, StorageOrder>
 {
   void operator()(std::complex<double>* blockB, const std::complex<double>* _rhs, Index rhsStride, Index rows, Index cols, Index k2)
   {
-    symm_pack_complex_rhs_helper<double, Index, StorageOrder, 2>(blockB, _rhs, rhsStride, rows, cols, k2);
+    symm_pack_complex_rhs_helper<double, StorageOrder, 2>(blockB, _rhs, rhsStride, rows, cols, k2);
   }
 };
 
@@ -353,7 +369,7 @@ struct symm_pack_lhs<std::complex<double>, Index, Pack1, Pack2_dummy, StorageOrd
 {
   void operator()(std::complex<double>* blockA, const std::complex<double>* _lhs, Index lhsStride, Index cols, Index rows)
   {
-    symm_pack_complex_lhs_helper<double, Index, StorageOrder>(blockA, _lhs, lhsStride, cols, rows);
+    symm_pack_complex_lhs_helper<double, StorageOrder>(blockA, _lhs, lhsStride, cols, rows);
   }
 };
 
@@ -363,7 +379,7 @@ struct symm_pack_rhs<float, Index, nr, StorageOrder>
 {
   void operator()(float* blockB, const float* _rhs, Index rhsStride, Index rows, Index cols, Index k2)
   {
-    symm_pack_rhs_helper<float, Index, StorageOrder, 1>(blockB, _rhs, rhsStride, rows, cols, k2);
+    symm_pack_rhs_helper<float, StorageOrder, 1>(blockB, _rhs, rhsStride, rows, cols, k2);
   }
 };
 
@@ -372,7 +388,7 @@ struct symm_pack_lhs<float, Index, Pack1, Pack2_dummy, StorageOrder>
 {
   void operator()(float* blockA, const float* _lhs, Index lhsStride, Index cols, Index rows)
   {
-    symm_pack_lhs_helper<float, Index, StorageOrder>(blockA, _lhs, lhsStride, cols, rows);
+    symm_pack_lhs_helper<float, StorageOrder>(blockA, _lhs, lhsStride, cols, rows);
   }
 };
 
@@ -382,7 +398,7 @@ struct symm_pack_rhs<double, Index, nr, StorageOrder>
 {
   void operator()(double* blockB, const double* _rhs, Index rhsStride, Index rows, Index cols, Index k2)
   {
-    symm_pack_rhs_helper<double, Index, StorageOrder, 2>(blockB, _rhs, rhsStride, rows, cols, k2);
+    symm_pack_rhs_helper<double, StorageOrder, 2>(blockB, _rhs, rhsStride, rows, cols, k2);
   }
 };
 
@@ -391,7 +407,7 @@ struct symm_pack_lhs<double, Index, Pack1, Pack2_dummy, StorageOrder>
 {
   void operator()(double* blockA, const double* _lhs, Index lhsStride, Index cols, Index rows)
   {
-    symm_pack_lhs_helper<double, Index, StorageOrder>(blockA, _lhs, lhsStride, cols, rows);
+    symm_pack_lhs_helper<double, StorageOrder>(blockA, _lhs, lhsStride, cols, rows);
   }
 };
 
@@ -406,26 +422,22 @@ struct symm_pack_lhs<double, Index, Pack1, Pack2_dummy, StorageOrder>
  * and offset and behaves accordingly.
  **/
 
-template<typename Scalar, typename Packet, typename Index>
-EIGEN_ALWAYS_INLINE void storeBlock(Scalar* to, PacketBlock<Packet,4>& block)
-{
-  const Index size = 16 / sizeof(Scalar);
-  pstore<Scalar>(to + (0 * size), block.packet[0]);
-  pstore<Scalar>(to + (1 * size), block.packet[1]);
-  pstore<Scalar>(to + (2 * size), block.packet[2]);
-  pstore<Scalar>(to + (3 * size), block.packet[3]);
-}
-
-template<typename Scalar, typename Packet, typename Index>
-EIGEN_ALWAYS_INLINE void storeBlock(Scalar* to, PacketBlock<Packet,2>& block)
+template<typename Scalar, typename Packet, int N>
+EIGEN_ALWAYS_INLINE void storeBlock(Scalar* to, PacketBlock<Packet,N>& block)
 {
   const Index size = 16 / sizeof(Scalar);
   pstore<Scalar>(to + (0 * size), block.packet[0]);
   pstore<Scalar>(to + (1 * size), block.packet[1]);
+  if (N > 2) {
+    pstore<Scalar>(to + (2 * size), block.packet[2]);
+  }
+  if (N > 3) {
+    pstore<Scalar>(to + (3 * size), block.packet[3]);
+  }
 }
 
 // General template for lhs & rhs complex packing.
-template<typename Scalar, typename Index, typename DataMapper, typename Packet, typename PacketC, int StorageOrder, bool Conjugate, bool PanelMode, bool UseLhs>
+template<typename Scalar, typename DataMapper, typename Packet, typename PacketC, int StorageOrder, bool Conjugate, bool PanelMode, bool UseLhs>
 struct dhs_cpack {
   EIGEN_STRONG_INLINE void operator()(std::complex<Scalar>* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride, Index offset)
   {
@@ -437,6 +449,7 @@ struct dhs_cpack {
 
     for(; j + vectorSize <= rows; j+=vectorSize)
     {
+      const DataMapper lhs2 = UseLhs ? lhs.getSubMapper(j, 0) : lhs.getSubMapper(0, j);
       Index i = 0;
 
       rii = rir + vectorDelta;
@@ -447,9 +460,9 @@ struct dhs_cpack {
         PacketBlock<PacketC,8> cblock;
 
         if (UseLhs) {
-          bload<DataMapper, PacketC, Index, 2, 0, StorageOrder>(cblock, lhs, j, i);
+          bload<DataMapper, PacketC, 2, StorageOrder, true, 4>(cblock, lhs2, 0, i);
         } else {
-          bload<DataMapper, PacketC, Index, 2, 0, StorageOrder>(cblock, lhs, i, j);
+          bload<DataMapper, PacketC, 2, StorageOrder, true, 4>(cblock, lhs2, i, 0);
         }
 
         blockr.packet[0] = vec_perm(cblock.packet[0].v, cblock.packet[4].v, p16uc_GETREAL32);
@@ -476,8 +489,8 @@ struct dhs_cpack {
           ptranspose(blocki);
         }
 
-        storeBlock<Scalar, Packet, Index>(blockAt + rir, blockr);
-        storeBlock<Scalar, Packet, Index>(blockAt + rii, blocki);
+        storeBlock<Scalar, Packet, 4>(blockAt + rir, blockr);
+        storeBlock<Scalar, Packet, 4>(blockAt + rii, blocki);
 
         rir += 4*vectorSize;
         rii += 4*vectorSize;
@@ -490,28 +503,19 @@ struct dhs_cpack {
         if(((StorageOrder == ColMajor) && UseLhs) || (((StorageOrder == RowMajor) && !UseLhs)))
         {
           if (UseLhs) {
-            cblock.packet[0] = lhs.template loadPacket<PacketC>(j + 0, i);
-            cblock.packet[1] = lhs.template loadPacket<PacketC>(j + 2, i);
+            cblock.packet[0] = lhs2.template loadPacket<PacketC>(0, i);
+            cblock.packet[1] = lhs2.template loadPacket<PacketC>(2, i);
           } else {
-            cblock.packet[0] = lhs.template loadPacket<PacketC>(i, j + 0);
-            cblock.packet[1] = lhs.template loadPacket<PacketC>(i, j + 2);
+            cblock.packet[0] = lhs2.template loadPacket<PacketC>(i, 0);
+            cblock.packet[1] = lhs2.template loadPacket<PacketC>(i, 2);
           }
         } else {
-          std::complex<Scalar> lhs0, lhs1;
           if (UseLhs) {
-            lhs0 = lhs(j + 0, i);
-            lhs1 = lhs(j + 1, i);
-            cblock.packet[0] = pload2(&lhs0, &lhs1);
-            lhs0 = lhs(j + 2, i);
-            lhs1 = lhs(j + 3, i);
-            cblock.packet[1] = pload2(&lhs0, &lhs1);
+            cblock.packet[0] = pload2(lhs2(0, i), lhs2(1, i));
+            cblock.packet[1] = pload2(lhs2(2, i), lhs2(3, i));
           } else {
-            lhs0 = lhs(i, j + 0);
-            lhs1 = lhs(i, j + 1);
-            cblock.packet[0] = pload2(&lhs0, &lhs1);
-            lhs0 = lhs(i, j + 2);
-            lhs1 = lhs(i, j + 3);
-            cblock.packet[1] = pload2(&lhs0, &lhs1);
+            cblock.packet[0] = pload2(lhs2(i, 0), lhs2(i, 1));
+            cblock.packet[1] = pload2(lhs2(i, 2), lhs2(i, 3));
           }
         }
 
@@ -533,34 +537,51 @@ struct dhs_cpack {
       rir += ((PanelMode) ? (vectorSize*(2*stride - depth)) : vectorDelta);
     }
 
-    if (j < rows)
+    if (!UseLhs)
     {
-      if(PanelMode) rir += (offset*(rows - j - vectorSize));
-      rii = rir + (((PanelMode) ? stride : depth) * (rows - j));
+      if(PanelMode) rir -= (offset*(vectorSize - 1));
 
-      for(Index i = 0; i < depth; i++)
+      for(; j < rows; j++)
       {
-        Index k = j;
-        for(; k < rows; k++)
+        const DataMapper lhs2 = lhs.getSubMapper(0, j);
+        rii = rir + ((PanelMode) ? stride : depth);
+
+        for(Index i = 0; i < depth; i++)
         {
-          if (UseLhs) {
+          blockAt[rir] = lhs2(i, 0).real();
+
+          if(Conjugate)
+            blockAt[rii] = -lhs2(i, 0).imag();
+          else
+            blockAt[rii] =  lhs2(i, 0).imag();
+
+          rir += 1;
+          rii += 1;
+        }
+
+        rir += ((PanelMode) ? (2*stride - depth) : depth);
+      }
+    } else {
+      if (j < rows)
+      {
+        if(PanelMode) rir += (offset*(rows - j - vectorSize));
+        rii = rir + (((PanelMode) ? stride : depth) * (rows - j));
+
+        for(Index i = 0; i < depth; i++)
+        {
+          Index k = j;
+          for(; k < rows; k++)
+          {
             blockAt[rir] = lhs(k, i).real();
 
             if(Conjugate)
               blockAt[rii] = -lhs(k, i).imag();
             else
               blockAt[rii] =  lhs(k, i).imag();
-          } else {
-            blockAt[rir] = lhs(i, k).real();
 
-            if(Conjugate)
-              blockAt[rii] = -lhs(i, k).imag();
-            else
-              blockAt[rii] =  lhs(i, k).imag();
+            rir += 1;
+            rii += 1;
           }
-
-          rir += 1;
-          rii += 1;
         }
       }
     }
@@ -568,7 +589,7 @@ struct dhs_cpack {
 };
 
 // General template for lhs & rhs packing.
-template<typename Scalar, typename Index, typename DataMapper, typename Packet, int StorageOrder, bool PanelMode, bool UseLhs>
+template<typename Scalar, typename DataMapper, typename Packet, int StorageOrder, bool PanelMode, bool UseLhs>
 struct dhs_pack{
   EIGEN_STRONG_INLINE void operator()(Scalar* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride, Index offset)
   {
@@ -577,6 +598,7 @@ struct dhs_pack{
 
     for(; j + vectorSize <= rows; j+=vectorSize)
     {
+      const DataMapper lhs2 = UseLhs ? lhs.getSubMapper(j, 0) : lhs.getSubMapper(0, j);
       Index i = 0;
 
       if(PanelMode) ri += vectorSize*offset;
@@ -586,16 +608,16 @@ struct dhs_pack{
         PacketBlock<Packet,4> block;
 
         if (UseLhs) {
-          bload<DataMapper, Packet, Index, 4, 0, StorageOrder>(block, lhs, j, i);
+          bload<DataMapper, Packet, 4, StorageOrder, false, 4>(block, lhs2, 0, i);
         } else {
-          bload<DataMapper, Packet, Index, 4, 0, StorageOrder>(block, lhs, i, j);
+          bload<DataMapper, Packet, 4, StorageOrder, false, 4>(block, lhs2, i, 0);
         }
         if(((StorageOrder == RowMajor) && UseLhs) || ((StorageOrder == ColMajor) && !UseLhs))
         {
           ptranspose(block);
         }
 
-        storeBlock<Scalar, Packet, Index>(blockA + ri, block);
+        storeBlock<Scalar, Packet, 4>(blockA + ri, block);
 
         ri += 4*vectorSize;
       }
@@ -604,22 +626,22 @@ struct dhs_pack{
         if(((StorageOrder == RowMajor) && UseLhs) || ((StorageOrder == ColMajor) && !UseLhs))
         {
           if (UseLhs) {
-            blockA[ri+0] = lhs(j+0, i);
-            blockA[ri+1] = lhs(j+1, i);
-            blockA[ri+2] = lhs(j+2, i);
-            blockA[ri+3] = lhs(j+3, i);
+            blockA[ri+0] = lhs2(0, i);
+            blockA[ri+1] = lhs2(1, i);
+            blockA[ri+2] = lhs2(2, i);
+            blockA[ri+3] = lhs2(3, i);
           } else {
-            blockA[ri+0] = lhs(i, j+0);
-            blockA[ri+1] = lhs(i, j+1);
-            blockA[ri+2] = lhs(i, j+2);
-            blockA[ri+3] = lhs(i, j+3);
+            blockA[ri+0] = lhs2(i, 0);
+            blockA[ri+1] = lhs2(i, 1);
+            blockA[ri+2] = lhs2(i, 2);
+            blockA[ri+3] = lhs2(i, 3);
           }
         } else {
           Packet lhsV;
           if (UseLhs) {
-            lhsV = lhs.template loadPacket<Packet>(j, i);
+            lhsV = lhs2.template loadPacket<Packet>(0, i);
           } else {
-            lhsV = lhs.template loadPacket<Packet>(i, j);
+            lhsV = lhs2.template loadPacket<Packet>(i, 0);
           }
           pstore<Scalar>(blockA + ri, lhsV);
         }
@@ -630,30 +652,43 @@ struct dhs_pack{
       if(PanelMode) ri += vectorSize*(stride - offset - depth);
     }
 
-    if (j < rows)
+    if (!UseLhs)
     {
-      if(PanelMode) ri += offset*(rows - j);
+      if(PanelMode) ri += offset;
 
-      for(Index i = 0; i < depth; i++)
+      for(; j < rows; j++)
       {
-        Index k = j;
-        for(; k < rows; k++)
+        const DataMapper lhs2 = lhs.getSubMapper(0, j);
+        for(Index i = 0; i < depth; i++)
         {
-          if (UseLhs) {
-            blockA[ri] = lhs(k, i);
-          } else {
-            blockA[ri] = lhs(i, k);
-          }
+          blockA[ri] = lhs2(i, 0);
           ri += 1;
         }
+
+        if(PanelMode) ri += stride - depth;
+      }
+    } else {
+      if (j < rows)
+      {
+        if(PanelMode) ri += offset*(rows - j);
+
+        for(Index i = 0; i < depth; i++)
+        {
+          Index k = j;
+          for(; k < rows; k++)
+          {
+            blockA[ri] = lhs(k, i);
+            ri += 1;
+          }
+        }
       }
     }
   }
 };
 
 // General template for lhs packing, float64 specialization.
-template<typename Index, typename DataMapper, int StorageOrder, bool PanelMode>
-struct dhs_pack<double, Index, DataMapper, Packet2d, StorageOrder, PanelMode, true>
+template<typename DataMapper, int StorageOrder, bool PanelMode>
+struct dhs_pack<double, DataMapper, Packet2d, StorageOrder, PanelMode, true>
 {
   EIGEN_STRONG_INLINE void operator()(double* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride, Index offset)
   {
@@ -662,6 +697,7 @@ struct dhs_pack<double, Index, DataMapper, Packet2d, StorageOrder, PanelMode, tr
 
     for(; j + vectorSize <= rows; j+=vectorSize)
     {
+      const DataMapper lhs2 = lhs.getSubMapper(j, 0);
       Index i = 0;
 
       if(PanelMode) ri += vectorSize*offset;
@@ -671,16 +707,16 @@ struct dhs_pack<double, Index, DataMapper, Packet2d, StorageOrder, PanelMode, tr
         PacketBlock<Packet2d,2> block;
         if(StorageOrder == RowMajor)
         {
-          block.packet[0] = lhs.template loadPacket<Packet2d>(j + 0, i);
-          block.packet[1] = lhs.template loadPacket<Packet2d>(j + 1, i);
+          block.packet[0] = lhs2.template loadPacket<Packet2d>(0, i);
+          block.packet[1] = lhs2.template loadPacket<Packet2d>(1, i);
 
           ptranspose(block);
         } else {
-          block.packet[0] = lhs.template loadPacket<Packet2d>(j, i + 0);
-          block.packet[1] = lhs.template loadPacket<Packet2d>(j, i + 1);
+          block.packet[0] = lhs2.template loadPacket<Packet2d>(0, i + 0);
+          block.packet[1] = lhs2.template loadPacket<Packet2d>(0, i + 1);
         }
 
-        storeBlock<double, Packet2d, Index>(blockA + ri, block);
+        storeBlock<double, Packet2d, 2>(blockA + ri, block);
 
         ri += 2*vectorSize;
       }
@@ -688,10 +724,10 @@ struct dhs_pack<double, Index, DataMapper, Packet2d, StorageOrder, PanelMode, tr
       {
         if(StorageOrder == RowMajor)
         {
-          blockA[ri+0] = lhs(j+0, i);
-          blockA[ri+1] = lhs(j+1, i);
+          blockA[ri+0] = lhs2(0, i);
+          blockA[ri+1] = lhs2(1, i);
         } else {
-          Packet2d lhsV = lhs.template loadPacket<Packet2d>(j, i);
+          Packet2d lhsV = lhs2.template loadPacket<Packet2d>(0, i);
           pstore<double>(blockA + ri, lhsV);
         }
 
@@ -719,8 +755,8 @@ struct dhs_pack<double, Index, DataMapper, Packet2d, StorageOrder, PanelMode, tr
 };
 
 // General template for rhs packing, float64 specialization.
-template<typename Index, typename DataMapper, int StorageOrder, bool PanelMode>
-struct dhs_pack<double, Index, DataMapper, Packet2d, StorageOrder, PanelMode, false>
+template<typename DataMapper, int StorageOrder, bool PanelMode>
+struct dhs_pack<double, DataMapper, Packet2d, StorageOrder, PanelMode, false>
 {
   EIGEN_STRONG_INLINE void operator()(double* blockB, const DataMapper& rhs, Index depth, Index cols, Index stride, Index offset)
   {
@@ -729,6 +765,7 @@ struct dhs_pack<double, Index, DataMapper, Packet2d, StorageOrder, PanelMode, fa
 
     for(; j + 2*vectorSize <= cols; j+=2*vectorSize)
     {
+      const DataMapper rhs2 = rhs.getSubMapper(0, j);
       Index i = 0;
 
       if(PanelMode) ri += offset*(2*vectorSize);
@@ -739,10 +776,10 @@ struct dhs_pack<double, Index, DataMapper, Packet2d, StorageOrder, PanelMode, fa
         if(StorageOrder == ColMajor)
         {
           PacketBlock<Packet2d,2> block1, block2;
-          block1.packet[0] = rhs.template loadPacket<Packet2d>(i, j + 0);
-          block1.packet[1] = rhs.template loadPacket<Packet2d>(i, j + 1);
-          block2.packet[0] = rhs.template loadPacket<Packet2d>(i, j + 2);
-          block2.packet[1] = rhs.template loadPacket<Packet2d>(i, j + 3);
+          block1.packet[0] = rhs2.template loadPacket<Packet2d>(i, 0);
+          block1.packet[1] = rhs2.template loadPacket<Packet2d>(i, 1);
+          block2.packet[0] = rhs2.template loadPacket<Packet2d>(i, 2);
+          block2.packet[1] = rhs2.template loadPacket<Packet2d>(i, 3);
 
           ptranspose(block1);
           ptranspose(block2);
@@ -752,12 +789,12 @@ struct dhs_pack<double, Index, DataMapper, Packet2d, StorageOrder, PanelMode, fa
           pstore<double>(blockB + ri + 4, block1.packet[1]);
           pstore<double>(blockB + ri + 6, block2.packet[1]);
         } else {
-          block.packet[0] = rhs.template loadPacket<Packet2d>(i + 0, j + 0); //[a1 a2]
-          block.packet[1] = rhs.template loadPacket<Packet2d>(i + 0, j + 2); //[a3 a4]
-          block.packet[2] = rhs.template loadPacket<Packet2d>(i + 1, j + 0); //[b1 b2]
-          block.packet[3] = rhs.template loadPacket<Packet2d>(i + 1, j + 2); //[b3 b4]
+          block.packet[0] = rhs2.template loadPacket<Packet2d>(i + 0, 0); //[a1 a2]
+          block.packet[1] = rhs2.template loadPacket<Packet2d>(i + 0, 2); //[a3 a4]
+          block.packet[2] = rhs2.template loadPacket<Packet2d>(i + 1, 0); //[b1 b2]
+          block.packet[3] = rhs2.template loadPacket<Packet2d>(i + 1, 2); //[b3 b4]
 
-          storeBlock<double, Packet2d, Index>(blockB + ri, block);
+          storeBlock<double, Packet2d, 4>(blockB + ri, block);
         }
 
         ri += 4*vectorSize;
@@ -766,20 +803,20 @@ struct dhs_pack<double, Index, DataMapper, Packet2d, StorageOrder, PanelMode, fa
       {
         if(StorageOrder == ColMajor)
         {
-          blockB[ri+0] = rhs(i, j+0);
-          blockB[ri+1] = rhs(i, j+1);
+          blockB[ri+0] = rhs2(i, 0);
+          blockB[ri+1] = rhs2(i, 1);
 
           ri += vectorSize;
 
-          blockB[ri+0] = rhs(i, j+2);
-          blockB[ri+1] = rhs(i, j+3);
+          blockB[ri+0] = rhs2(i, 2);
+          blockB[ri+1] = rhs2(i, 3);
         } else {
-          Packet2d rhsV = rhs.template loadPacket<Packet2d>(i, j);
+          Packet2d rhsV = rhs2.template loadPacket<Packet2d>(i, 0);
           pstore<double>(blockB + ri, rhsV);
 
           ri += vectorSize;
 
-          rhsV = rhs.template loadPacket<Packet2d>(i, j + 2);
+          rhsV = rhs2.template loadPacket<Packet2d>(i, 2);
           pstore<double>(blockB + ri, rhsV);
         }
         ri += vectorSize;
@@ -788,26 +825,25 @@ struct dhs_pack<double, Index, DataMapper, Packet2d, StorageOrder, PanelMode, fa
       if(PanelMode) ri += (2*vectorSize)*(stride - offset - depth);
     }
 
-    if (j < cols)
-    {
-      if(PanelMode) ri += offset*(cols - j);
+    if(PanelMode) ri += offset;
 
+    for(; j < cols; j++)
+    {
+      const DataMapper rhs2 = rhs.getSubMapper(0, j);
       for(Index i = 0; i < depth; i++)
       {
-        Index k = j;
-        for(; k < cols; k++)
-        {
-          blockB[ri] = rhs(i, k);
-          ri += 1;
-        }
+        blockB[ri] = rhs2(i, 0);
+        ri += 1;
       }
+
+      if(PanelMode) ri += stride - depth;
     }
   }
 };
 
 // General template for lhs complex packing, float64 specialization.
-template<typename Index, typename DataMapper, typename Packet, typename PacketC, int StorageOrder, bool Conjugate, bool PanelMode>
-struct dhs_cpack<double, Index, DataMapper, Packet, PacketC, StorageOrder, Conjugate, PanelMode, true>
+template<typename DataMapper, typename Packet, typename PacketC, int StorageOrder, bool Conjugate, bool PanelMode>
+struct dhs_cpack<double, DataMapper, Packet, PacketC, StorageOrder, Conjugate, PanelMode, true>
 {
   EIGEN_STRONG_INLINE void operator()(std::complex<double>* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride, Index offset)
   {
@@ -819,6 +855,7 @@ struct dhs_cpack<double, Index, DataMapper, Packet, PacketC, StorageOrder, Conju
 
     for(; j + vectorSize <= rows; j+=vectorSize)
     {
+      const DataMapper lhs2 = lhs.getSubMapper(j, 0);
       Index i = 0;
 
       rii = rir + vectorDelta;
@@ -830,29 +867,29 @@ struct dhs_cpack<double, Index, DataMapper, Packet, PacketC, StorageOrder, Conju
 
         if(StorageOrder == ColMajor)
         {
-          cblock.packet[0] = lhs.template loadPacket<PacketC>(j, i + 0); //[a1 a1i]
-          cblock.packet[1] = lhs.template loadPacket<PacketC>(j, i + 1); //[b1 b1i]
+          cblock.packet[0] = lhs2.template loadPacket<PacketC>(0, i + 0); //[a1 a1i]
+          cblock.packet[1] = lhs2.template loadPacket<PacketC>(0, i + 1); //[b1 b1i]
 
-          cblock.packet[2] = lhs.template loadPacket<PacketC>(j + 1, i + 0); //[a2 a2i]
-          cblock.packet[3] = lhs.template loadPacket<PacketC>(j + 1, i + 1); //[b2 b2i]
+          cblock.packet[2] = lhs2.template loadPacket<PacketC>(1, i + 0); //[a2 a2i]
+          cblock.packet[3] = lhs2.template loadPacket<PacketC>(1, i + 1); //[b2 b2i]
 
-          blockr.packet[0] = vec_perm(cblock.packet[0].v, cblock.packet[2].v, p16uc_GETREAL64); //[a1 a2]
-          blockr.packet[1] = vec_perm(cblock.packet[1].v, cblock.packet[3].v, p16uc_GETREAL64); //[b1 b2]
+          blockr.packet[0] = vec_mergeh(cblock.packet[0].v, cblock.packet[2].v); //[a1 a2]
+          blockr.packet[1] = vec_mergeh(cblock.packet[1].v, cblock.packet[3].v); //[b1 b2]
 
-          blocki.packet[0] = vec_perm(cblock.packet[0].v, cblock.packet[2].v, p16uc_GETIMAG64);
-          blocki.packet[1] = vec_perm(cblock.packet[1].v, cblock.packet[3].v, p16uc_GETIMAG64);
+          blocki.packet[0] = vec_mergel(cblock.packet[0].v, cblock.packet[2].v);
+          blocki.packet[1] = vec_mergel(cblock.packet[1].v, cblock.packet[3].v);
         } else {
-          cblock.packet[0] = lhs.template loadPacket<PacketC>(j + 0, i); //[a1 a1i]
-          cblock.packet[1] = lhs.template loadPacket<PacketC>(j + 1, i); //[a2 a2i]
+          cblock.packet[0] = lhs2.template loadPacket<PacketC>(0, i); //[a1 a1i]
+          cblock.packet[1] = lhs2.template loadPacket<PacketC>(1, i); //[a2 a2i]
 
-          cblock.packet[2] = lhs.template loadPacket<PacketC>(j + 0, i + 1); //[b1 b1i]
-          cblock.packet[3] = lhs.template loadPacket<PacketC>(j + 1, i + 1); //[b2 b2i
+          cblock.packet[2] = lhs2.template loadPacket<PacketC>(0, i + 1); //[b1 b1i]
+          cblock.packet[3] = lhs2.template loadPacket<PacketC>(1, i + 1); //[b2 b2i
 
-          blockr.packet[0] = vec_perm(cblock.packet[0].v, cblock.packet[1].v, p16uc_GETREAL64); //[a1 a2]
-          blockr.packet[1] = vec_perm(cblock.packet[2].v, cblock.packet[3].v, p16uc_GETREAL64); //[b1 b2]
+          blockr.packet[0] = vec_mergeh(cblock.packet[0].v, cblock.packet[1].v); //[a1 a2]
+          blockr.packet[1] = vec_mergeh(cblock.packet[2].v, cblock.packet[3].v); //[b1 b2]
 
-          blocki.packet[0] = vec_perm(cblock.packet[0].v, cblock.packet[1].v, p16uc_GETIMAG64);
-          blocki.packet[1] = vec_perm(cblock.packet[2].v, cblock.packet[3].v, p16uc_GETIMAG64);
+          blocki.packet[0] = vec_mergel(cblock.packet[0].v, cblock.packet[1].v);
+          blocki.packet[1] = vec_mergel(cblock.packet[2].v, cblock.packet[3].v);
         }
 
         if(Conjugate)
@@ -861,8 +898,8 @@ struct dhs_cpack<double, Index, DataMapper, Packet, PacketC, StorageOrder, Conju
           blocki.packet[1] = -blocki.packet[1];
         }
 
-        storeBlock<double, Packet, Index>(blockAt + rir, blockr);
-        storeBlock<double, Packet, Index>(blockAt + rii, blocki);
+        storeBlock<double, Packet, 2>(blockAt + rir, blockr);
+        storeBlock<double, Packet, 2>(blockAt + rii, blocki);
 
         rir += 2*vectorSize;
         rii += 2*vectorSize;
@@ -872,11 +909,11 @@ struct dhs_cpack<double, Index, DataMapper, Packet, PacketC, StorageOrder, Conju
         PacketBlock<Packet,1> blockr, blocki;
         PacketBlock<PacketC,2> cblock;
 
-        cblock.packet[0] = lhs.template loadPacket<PacketC>(j + 0, i);
-        cblock.packet[1] = lhs.template loadPacket<PacketC>(j + 1, i);
+        cblock.packet[0] = lhs2.template loadPacket<PacketC>(0, i);
+        cblock.packet[1] = lhs2.template loadPacket<PacketC>(1, i);
 
-        blockr.packet[0] = vec_perm(cblock.packet[0].v, cblock.packet[1].v, p16uc_GETREAL64);
-        blocki.packet[0] = vec_perm(cblock.packet[0].v, cblock.packet[1].v, p16uc_GETIMAG64);
+        blockr.packet[0] = vec_mergeh(cblock.packet[0].v, cblock.packet[1].v);
+        blocki.packet[0] = vec_mergel(cblock.packet[0].v, cblock.packet[1].v);
 
         if(Conjugate)
         {
@@ -919,8 +956,8 @@ struct dhs_cpack<double, Index, DataMapper, Packet, PacketC, StorageOrder, Conju
 };
 
 // General template for rhs complex packing, float64 specialization.
-template<typename Index, typename DataMapper, typename Packet, typename PacketC, int StorageOrder, bool Conjugate, bool PanelMode>
-struct dhs_cpack<double, Index, DataMapper, Packet, PacketC, StorageOrder, Conjugate, PanelMode, false>
+template<typename DataMapper, typename Packet, typename PacketC, int StorageOrder, bool Conjugate, bool PanelMode>
+struct dhs_cpack<double, DataMapper, Packet, PacketC, StorageOrder, Conjugate, PanelMode, false>
 {
   EIGEN_STRONG_INLINE void operator()(std::complex<double>* blockB, const DataMapper& rhs, Index depth, Index cols, Index stride, Index offset)
   {
@@ -932,6 +969,7 @@ struct dhs_cpack<double, Index, DataMapper, Packet, PacketC, StorageOrder, Conju
 
     for(; j + 2*vectorSize <= cols; j+=2*vectorSize)
     {
+      const DataMapper rhs2 = rhs.getSubMapper(0, j);
       Index i = 0;
 
       rii = rir + vectorDelta;
@@ -941,13 +979,13 @@ struct dhs_cpack<double, Index, DataMapper, Packet, PacketC, StorageOrder, Conju
         PacketBlock<PacketC,4> cblock;
         PacketBlock<Packet,2> blockr, blocki;
 
-        bload<DataMapper, PacketC, Index, 2, 0, ColMajor>(cblock, rhs, i, j);
+        bload<DataMapper, PacketC, 2, ColMajor, false, 4>(cblock, rhs2, i, 0);
 
-        blockr.packet[0] = vec_perm(cblock.packet[0].v, cblock.packet[1].v, p16uc_GETREAL64);
-        blockr.packet[1] = vec_perm(cblock.packet[2].v, cblock.packet[3].v, p16uc_GETREAL64);
+        blockr.packet[0] = vec_mergeh(cblock.packet[0].v, cblock.packet[1].v);
+        blockr.packet[1] = vec_mergeh(cblock.packet[2].v, cblock.packet[3].v);
 
-        blocki.packet[0] = vec_perm(cblock.packet[0].v, cblock.packet[1].v, p16uc_GETIMAG64);
-        blocki.packet[1] = vec_perm(cblock.packet[2].v, cblock.packet[3].v, p16uc_GETIMAG64);
+        blocki.packet[0] = vec_mergel(cblock.packet[0].v, cblock.packet[1].v);
+        blocki.packet[1] = vec_mergel(cblock.packet[2].v, cblock.packet[3].v);
 
         if(Conjugate)
         {
@@ -955,8 +993,8 @@ struct dhs_cpack<double, Index, DataMapper, Packet, PacketC, StorageOrder, Conju
           blocki.packet[1] = -blocki.packet[1];
         }
 
-        storeBlock<double, Packet, Index>(blockBt + rir, blockr);
-        storeBlock<double, Packet, Index>(blockBt + rii, blocki);
+        storeBlock<double, Packet, 2>(blockBt + rir, blockr);
+        storeBlock<double, Packet, 2>(blockBt + rii, blocki);
 
         rir += 2*vectorSize;
         rii += 2*vectorSize;
@@ -965,27 +1003,27 @@ struct dhs_cpack<double, Index, DataMapper, Packet, PacketC, StorageOrder, Conju
       rir += ((PanelMode) ? (2*vectorSize*(2*stride - depth)) : vectorDelta);
     }
 
-    if (j < cols)
+    if(PanelMode) rir -= (offset*(2*vectorSize - 1));
+
+    for(; j < cols; j++)
     {
-      if(PanelMode) rir += (offset*(cols - j - 2*vectorSize));
-      rii = rir + (((PanelMode) ? stride : depth) * (cols - j));
+      const DataMapper rhs2 = rhs.getSubMapper(0, j);
+      rii = rir + ((PanelMode) ? stride : depth);
 
       for(Index i = 0; i < depth; i++)
       {
-        Index k = j;
-        for(; k < cols; k++)
-        {
-          blockBt[rir] = rhs(i, k).real();
+        blockBt[rir] = rhs2(i, 0).real();
 
-          if(Conjugate)
-            blockBt[rii] = -rhs(i, k).imag();
-          else
-            blockBt[rii] =  rhs(i, k).imag();
+        if(Conjugate)
+          blockBt[rii] = -rhs2(i, 0).imag();
+        else
+          blockBt[rii] =  rhs2(i, 0).imag();
 
-          rir += 1;
-          rii += 1;
-        }
+        rir += 1;
+        rii += 1;
       }
+
+      rir += ((PanelMode) ? (2*stride - depth) : depth);
     }
   }
 };
@@ -995,31 +1033,18 @@ struct dhs_cpack<double, Index, DataMapper, Packet, PacketC, StorageOrder, Conju
  **************/
 
 // 512-bits rank1-update of acc. It can either positive or negative accumulate (useful for complex gemm).
-template<typename Packet, bool NegativeAccumulate>
-EIGEN_ALWAYS_INLINE void pger_common(PacketBlock<Packet,4>* acc, const Packet& lhsV, const Packet* rhsV)
+template<typename Packet, bool NegativeAccumulate, int N>
+EIGEN_ALWAYS_INLINE void pger_common(PacketBlock<Packet,N>* acc, const Packet& lhsV, const Packet* rhsV)
 {
   if(NegativeAccumulate)
   {
-    acc->packet[0] = vec_nmsub(lhsV, rhsV[0], acc->packet[0]);
-    acc->packet[1] = vec_nmsub(lhsV, rhsV[1], acc->packet[1]);
-    acc->packet[2] = vec_nmsub(lhsV, rhsV[2], acc->packet[2]);
-    acc->packet[3] = vec_nmsub(lhsV, rhsV[3], acc->packet[3]);
+    for (int M = 0; M < N; M++) {
+      acc->packet[M] = vec_nmsub(lhsV, rhsV[M], acc->packet[M]);
+    }
   } else {
-    acc->packet[0] = vec_madd(lhsV, rhsV[0], acc->packet[0]);
-    acc->packet[1] = vec_madd(lhsV, rhsV[1], acc->packet[1]);
-    acc->packet[2] = vec_madd(lhsV, rhsV[2], acc->packet[2]);
-    acc->packet[3] = vec_madd(lhsV, rhsV[3], acc->packet[3]);
-  }
-}
-
-template<typename Packet, bool NegativeAccumulate>
-EIGEN_ALWAYS_INLINE void pger_common(PacketBlock<Packet,1>* acc, const Packet& lhsV, const Packet* rhsV)
-{
-  if(NegativeAccumulate)
-  {
-    acc->packet[0] = vec_nmsub(lhsV, rhsV[0], acc->packet[0]);
-  } else {
-    acc->packet[0] = vec_madd(lhsV, rhsV[0], acc->packet[0]);
+    for (int M = 0; M < N; M++) {
+      acc->packet[M] = vec_madd(lhsV, rhsV[M], acc->packet[M]);
+    }
   }
 }
 
@@ -1028,559 +1053,628 @@ EIGEN_ALWAYS_INLINE void pger(PacketBlock<Packet,N>* acc, const Scalar* lhs, con
 {
   Packet lhsV = pload<Packet>(lhs);
 
-  pger_common<Packet, NegativeAccumulate>(acc, lhsV, rhsV);
-}
-
-template<typename Scalar, typename Packet, typename Index>
-EIGEN_ALWAYS_INLINE void loadPacketRemaining(const Scalar* lhs, Packet &lhsV, Index remaining_rows)
-{
-#ifdef _ARCH_PWR9
-  lhsV = vec_xl_len((Scalar *)lhs, remaining_rows * sizeof(Scalar));
-#else
-  Index i = 0;
-  do {
-    lhsV[i] = lhs[i];
-  } while (++i < remaining_rows);
-#endif
-}
-
-template<int N, typename Scalar, typename Packet, typename Index, bool NegativeAccumulate>
-EIGEN_ALWAYS_INLINE void pger(PacketBlock<Packet,N>* acc, const Scalar* lhs, const Packet* rhsV, Index remaining_rows)
-{
-  Packet lhsV;
-  loadPacketRemaining<Scalar, Packet, Index>(lhs, lhsV, remaining_rows);
-
-  pger_common<Packet, NegativeAccumulate>(acc, lhsV, rhsV);
+  pger_common<Packet, NegativeAccumulate, N>(acc, lhsV, rhsV);
 }
 
 // 512-bits rank1-update of complex acc. It takes decoupled accumulators as entries. It also takes cares of mixed types real * complex and complex * real.
 template<int N, typename Packet, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>
-EIGEN_ALWAYS_INLINE void pgerc_common(PacketBlock<Packet,N>* accReal, PacketBlock<Packet,N>* accImag, const Packet &lhsV, const Packet &lhsVi, const Packet* rhsV, const Packet* rhsVi)
+EIGEN_ALWAYS_INLINE void pgerc_common(PacketBlock<Packet,N>* accReal, PacketBlock<Packet,N>* accImag, const Packet &lhsV, Packet &lhsVi, const Packet* rhsV, const Packet* rhsVi)
 {
-  pger_common<Packet, false>(accReal, lhsV, rhsV);
+  pger_common<Packet, false, N>(accReal, lhsV, rhsV);
   if(LhsIsReal)
   {
-    pger_common<Packet, ConjugateRhs>(accImag, lhsV, rhsVi);
+    pger_common<Packet, ConjugateRhs, N>(accImag, lhsV, rhsVi);
     EIGEN_UNUSED_VARIABLE(lhsVi);
   } else {
     if (!RhsIsReal) {
-      pger_common<Packet, ConjugateLhs == ConjugateRhs>(accReal, lhsVi, rhsVi);
-      pger_common<Packet, ConjugateRhs>(accImag, lhsV, rhsVi);
+      pger_common<Packet, ConjugateLhs == ConjugateRhs, N>(accReal, lhsVi, rhsVi);
+      pger_common<Packet, ConjugateRhs, N>(accImag, lhsV, rhsVi);
     } else {
       EIGEN_UNUSED_VARIABLE(rhsVi);
     }
-    pger_common<Packet, ConjugateLhs>(accImag, lhsVi, rhsV);
+    pger_common<Packet, ConjugateLhs, N>(accImag, lhsVi, rhsV);
   }
 }
 
 template<int N, typename Scalar, typename Packet, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>
 EIGEN_ALWAYS_INLINE void pgerc(PacketBlock<Packet,N>* accReal, PacketBlock<Packet,N>* accImag, const Scalar* lhs_ptr, const Scalar* lhs_ptr_imag, const Packet* rhsV, const Packet* rhsVi)
 {
-  Packet lhsV = ploadLhs<Scalar, Packet>(lhs_ptr);
+  Packet lhsV = ploadLhs<Packet>(lhs_ptr);
   Packet lhsVi;
-  if(!LhsIsReal) lhsVi = ploadLhs<Scalar, Packet>(lhs_ptr_imag);
+  if(!LhsIsReal) lhsVi = ploadLhs<Packet>(lhs_ptr_imag);
   else EIGEN_UNUSED_VARIABLE(lhs_ptr_imag);
 
   pgerc_common<N, Packet, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(accReal, accImag, lhsV, lhsVi, rhsV, rhsVi);
 }
 
-template<typename Scalar, typename Packet, typename Index, bool LhsIsReal>
-EIGEN_ALWAYS_INLINE void loadPacketRemaining(const Scalar* lhs_ptr, const Scalar* lhs_ptr_imag, Packet &lhsV, Packet &lhsVi, Index remaining_rows)
-{
-#ifdef _ARCH_PWR9
-  lhsV = vec_xl_len((Scalar *)lhs_ptr, remaining_rows * sizeof(Scalar));
-  if(!LhsIsReal) lhsVi = vec_xl_len((Scalar *)lhs_ptr_imag, remaining_rows * sizeof(Scalar));
-  else EIGEN_UNUSED_VARIABLE(lhs_ptr_imag);
-#else
-  Index i = 0;
-  do {
-    lhsV[i] = lhs_ptr[i];
-    if(!LhsIsReal) lhsVi[i] = lhs_ptr_imag[i];
-  } while (++i < remaining_rows);
-  if(LhsIsReal) EIGEN_UNUSED_VARIABLE(lhs_ptr_imag);
-#endif
-}
-
-template<int N, typename Scalar, typename Packet, typename Index, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>
-EIGEN_ALWAYS_INLINE void pgerc(PacketBlock<Packet,N>* accReal, PacketBlock<Packet,N>* accImag, const Scalar* lhs_ptr, const Scalar* lhs_ptr_imag, const Packet* rhsV, const Packet* rhsVi, Index remaining_rows)
-{
-  Packet lhsV, lhsVi;
-  loadPacketRemaining<Scalar, Packet, Index, LhsIsReal>(lhs_ptr, lhs_ptr_imag, lhsV, lhsVi, remaining_rows);
-
-  pgerc_common<N, Packet, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(accReal, accImag, lhsV, lhsVi, rhsV, rhsVi);
-}
-
-template<typename Scalar, typename Packet>
-EIGEN_ALWAYS_INLINE Packet ploadLhs(const Scalar* lhs)
+template<typename Packet>
+EIGEN_ALWAYS_INLINE Packet ploadLhs(const __UNPACK_TYPE__(Packet)* lhs)
 {
   return ploadu<Packet>(lhs);
 }
 
 // Zero the accumulator on PacketBlock.
-template<typename Scalar, typename Packet>
-EIGEN_ALWAYS_INLINE void bsetzero(PacketBlock<Packet,4>& acc)
+template<typename Packet, int N>
+EIGEN_ALWAYS_INLINE void bsetzero(PacketBlock<Packet,N>& acc)
 {
-  acc.packet[0] = pset1<Packet>((Scalar)0);
-  acc.packet[1] = pset1<Packet>((Scalar)0);
-  acc.packet[2] = pset1<Packet>((Scalar)0);
-  acc.packet[3] = pset1<Packet>((Scalar)0);
+  for (int M = 0; M < N; M++) {
+    acc.packet[M] = pset1<Packet>((__UNPACK_TYPE__(Packet))0);
+  }
 }
 
-template<typename Scalar, typename Packet>
-EIGEN_ALWAYS_INLINE void bsetzero(PacketBlock<Packet,1>& acc)
+template<typename Packet, int N>
+EIGEN_ALWAYS_INLINE void bscalec_common(PacketBlock<Packet,N>& acc, PacketBlock<Packet,N>& accZ, const Packet& pAlpha)
 {
-  acc.packet[0] = pset1<Packet>((Scalar)0);
+  for (int M = 0; M < N; M++) {
+    acc.packet[M] = vec_mul(accZ.packet[M], pAlpha);
+  }
 }
 
-// Scale the PacketBlock vectors by alpha.
-template<typename Packet>
-EIGEN_ALWAYS_INLINE void bscale(PacketBlock<Packet,4>& acc, PacketBlock<Packet,4>& accZ, const Packet& pAlpha)
+template<typename Packet, int N>
+EIGEN_ALWAYS_INLINE void band(PacketBlock<Packet,N>& acc, const Packet& pMask)
 {
-  acc.packet[0] = pmadd(pAlpha, accZ.packet[0], acc.packet[0]);
-  acc.packet[1] = pmadd(pAlpha, accZ.packet[1], acc.packet[1]);
-  acc.packet[2] = pmadd(pAlpha, accZ.packet[2], acc.packet[2]);
-  acc.packet[3] = pmadd(pAlpha, accZ.packet[3], acc.packet[3]);
-}
-
-template<typename Packet>
-EIGEN_ALWAYS_INLINE void bscale(PacketBlock<Packet,1>& acc, PacketBlock<Packet,1>& accZ, const Packet& pAlpha)
-{
-  acc.packet[0] = pmadd(pAlpha, accZ.packet[0], acc.packet[0]);
-}
-
-template<typename Packet>
-EIGEN_ALWAYS_INLINE void bscalec_common(PacketBlock<Packet,4>& acc, PacketBlock<Packet,4>& accZ, const Packet& pAlpha)
-{
-  acc.packet[0] = pmul<Packet>(accZ.packet[0], pAlpha);
-  acc.packet[1] = pmul<Packet>(accZ.packet[1], pAlpha);
-  acc.packet[2] = pmul<Packet>(accZ.packet[2], pAlpha);
-  acc.packet[3] = pmul<Packet>(accZ.packet[3], pAlpha);
-}
-
-template<typename Packet>
-EIGEN_ALWAYS_INLINE void bscalec_common(PacketBlock<Packet,1>& acc, PacketBlock<Packet,1>& accZ, const Packet& pAlpha)
-{
-  acc.packet[0] = pmul<Packet>(accZ.packet[0], pAlpha);
+  for (int M = 0; M < N; M++) {
+    acc.packet[M] = pand<Packet>(acc.packet[M], pMask);
+  }
 }
 
 // Complex version of PacketBlock scaling.
-template<typename Packet, int N>
-EIGEN_ALWAYS_INLINE void bscalec(PacketBlock<Packet,N>& aReal, PacketBlock<Packet,N>& aImag, const Packet& bReal, const Packet& bImag, PacketBlock<Packet,N>& cReal, PacketBlock<Packet,N>& cImag)
+template<typename Packet, int N, bool mask>
+EIGEN_ALWAYS_INLINE void bscalec(PacketBlock<Packet,N>& aReal, PacketBlock<Packet,N>& aImag, const Packet& bReal, const Packet& bImag, PacketBlock<Packet,N>& cReal, PacketBlock<Packet,N>& cImag, const Packet& pMask)
 {
-  bscalec_common<Packet>(cReal, aReal, bReal);
+  if (mask && (sizeof(__UNPACK_TYPE__(Packet)) == sizeof(float))) {
+    band<Packet, N>(aReal, pMask);
+    band<Packet, N>(aImag, pMask);
+  } else {
+    EIGEN_UNUSED_VARIABLE(pMask);
+  }
 
-  bscalec_common<Packet>(cImag, aImag, bReal);
+  bscalec_common<Packet, N>(cReal, aReal, bReal);
 
-  pger_common<Packet, true>(&cReal, bImag, aImag.packet);
+  bscalec_common<Packet, N>(cImag, aImag, bReal);
 
-  pger_common<Packet, false>(&cImag, bImag, aReal.packet);
-}
+  pger_common<Packet, true, N>(&cReal, bImag, aImag.packet);
 
-template<typename Packet>
-EIGEN_ALWAYS_INLINE void band(PacketBlock<Packet,4>& acc, const Packet& pMask)
-{
-  acc.packet[0] = pand(acc.packet[0], pMask);
-  acc.packet[1] = pand(acc.packet[1], pMask);
-  acc.packet[2] = pand(acc.packet[2], pMask);
-  acc.packet[3] = pand(acc.packet[3], pMask);
-}
-
-template<typename Packet>
-EIGEN_ALWAYS_INLINE void bscalec(PacketBlock<Packet,4>& aReal, PacketBlock<Packet,4>& aImag, const Packet& bReal, const Packet& bImag, PacketBlock<Packet,4>& cReal, PacketBlock<Packet,4>& cImag, const Packet& pMask)
-{
-  band<Packet>(aReal, pMask);
-  band<Packet>(aImag, pMask);
-
-  bscalec<Packet,4>(aReal, aImag, bReal, bImag, cReal, cImag);
+  pger_common<Packet, false, N>(&cImag, bImag, aReal.packet);
 }
 
 // Load a PacketBlock, the N parameters make tunning gemm easier so we can add more accumulators as needed.
-template<typename DataMapper, typename Packet, typename Index, const Index accCols, int N, int StorageOrder>
-EIGEN_ALWAYS_INLINE void bload(PacketBlock<Packet,4>& acc, const DataMapper& res, Index row, Index col)
+//
+// full = operate (load) on the entire PacketBlock or only half
+template<typename DataMapper, typename Packet, const Index accCols, int StorageOrder, bool Complex, int N, bool full>
+EIGEN_ALWAYS_INLINE void bload(PacketBlock<Packet,N*(Complex?2:1)>& acc, const DataMapper& res, Index row, Index col)
 {
   if (StorageOrder == RowMajor) {
-    acc.packet[0] = res.template loadPacket<Packet>(row + 0, col + N*accCols);
-    acc.packet[1] = res.template loadPacket<Packet>(row + 1, col + N*accCols);
-    acc.packet[2] = res.template loadPacket<Packet>(row + 2, col + N*accCols);
-    acc.packet[3] = res.template loadPacket<Packet>(row + 3, col + N*accCols);
+    for (int M = 0; M < N; M++) {
+      acc.packet[M] = res.template loadPacket<Packet>(row + M, col);
+    }
+    if (Complex) {
+      for (int M = 0; M < N; M++) {
+        acc.packet[M+N] = res.template loadPacket<Packet>(row + M, col + accCols);
+      }
+    }
   } else {
-    acc.packet[0] = res.template loadPacket<Packet>(row + N*accCols, col + 0);
-    acc.packet[1] = res.template loadPacket<Packet>(row + N*accCols, col + 1);
-    acc.packet[2] = res.template loadPacket<Packet>(row + N*accCols, col + 2);
-    acc.packet[3] = res.template loadPacket<Packet>(row + N*accCols, col + 3);
-  }
-}
-
-// An overload of bload when you have a PacketBLock with 8 vectors.
-template<typename DataMapper, typename Packet, typename Index, const Index accCols, int N, int StorageOrder>
-EIGEN_ALWAYS_INLINE void bload(PacketBlock<Packet,8>& acc, const DataMapper& res, Index row, Index col)
-{
-  if (StorageOrder == RowMajor) {
-    acc.packet[0] = res.template loadPacket<Packet>(row + 0, col + N*accCols);
-    acc.packet[1] = res.template loadPacket<Packet>(row + 1, col + N*accCols);
-    acc.packet[2] = res.template loadPacket<Packet>(row + 2, col + N*accCols);
-    acc.packet[3] = res.template loadPacket<Packet>(row + 3, col + N*accCols);
-    acc.packet[4] = res.template loadPacket<Packet>(row + 0, col + (N+1)*accCols);
-    acc.packet[5] = res.template loadPacket<Packet>(row + 1, col + (N+1)*accCols);
-    acc.packet[6] = res.template loadPacket<Packet>(row + 2, col + (N+1)*accCols);
-    acc.packet[7] = res.template loadPacket<Packet>(row + 3, col + (N+1)*accCols);
-  } else {
-    acc.packet[0] = res.template loadPacket<Packet>(row + N*accCols, col + 0);
-    acc.packet[1] = res.template loadPacket<Packet>(row + N*accCols, col + 1);
-    acc.packet[2] = res.template loadPacket<Packet>(row + N*accCols, col + 2);
-    acc.packet[3] = res.template loadPacket<Packet>(row + N*accCols, col + 3);
-    acc.packet[4] = res.template loadPacket<Packet>(row + (N+1)*accCols, col + 0);
-    acc.packet[5] = res.template loadPacket<Packet>(row + (N+1)*accCols, col + 1);
-    acc.packet[6] = res.template loadPacket<Packet>(row + (N+1)*accCols, col + 2);
-    acc.packet[7] = res.template loadPacket<Packet>(row + (N+1)*accCols, col + 3);
-  }
-}
-
-template<typename DataMapper, typename Packet, typename Index, const Index accCols, int N, int StorageOrder>
-EIGEN_ALWAYS_INLINE void bload(PacketBlock<Packet,2>& acc, const DataMapper& res, Index row, Index col)
-{
-  acc.packet[0] = res.template loadPacket<Packet>(row + N*accCols, col + 0);
-  acc.packet[1] = res.template loadPacket<Packet>(row + (N+1)*accCols, col + 0);
-}
-
-const static Packet4i mask41 = { -1,  0,  0,  0 };
-const static Packet4i mask42 = { -1, -1,  0,  0 };
-const static Packet4i mask43 = { -1, -1, -1,  0 };
-
-const static Packet2l mask21 = { -1, 0 };
-
-template<typename Packet>
-EIGEN_ALWAYS_INLINE Packet bmask(const int remaining_rows)
-{
-  if (remaining_rows == 0) {
-    return pset1<Packet>(float(0.0));  // Not used
-  } else {
-    switch (remaining_rows) {
-      case 1:  return Packet(mask41);
-      case 2:  return Packet(mask42);
-      default: return Packet(mask43);
+    for (int M = 0; M < N; M++) {
+      acc.packet[M] = res.template loadPacket<Packet>(row, col + M);
+    }
+    if (Complex && full) {
+      for (int M = 0; M < N; M++) {
+        acc.packet[M+N] = res.template loadPacket<Packet>(row + accCols, col + M);
+      }
     }
   }
 }
 
-template<>
-EIGEN_ALWAYS_INLINE Packet2d bmask<Packet2d>(const int remaining_rows)
+template<typename DataMapper, typename Packet, int N>
+EIGEN_ALWAYS_INLINE void bstore(PacketBlock<Packet,N>& acc, const DataMapper& res, Index row)
 {
-  if (remaining_rows == 0) {
-    return pset1<Packet2d>(double(0.0));  // Not used
-  } else {
-    return Packet2d(mask21);
+  for (int M = 0; M < N; M++) {
+    res.template storePacket<Packet>(row, M, acc.packet[M]);
   }
 }
 
-template<typename Packet>
-EIGEN_ALWAYS_INLINE void bscale(PacketBlock<Packet,4>& acc, PacketBlock<Packet,4>& accZ, const Packet& pAlpha, const Packet& pMask)
+#ifdef USE_PARTIAL_PACKETS
+template<typename DataMapper, typename Packet, const Index accCols, bool Complex, Index N, bool full>
+EIGEN_ALWAYS_INLINE void bload_partial(PacketBlock<Packet,N*(Complex?2:1)>& acc, const DataMapper& res, Index row, Index elements)
 {
-  band<Packet>(accZ, pMask);
-
-  bscale<Packet>(acc, accZ, pAlpha);
+  for (Index M = 0; M < N; M++) {
+    acc.packet[M] = res.template loadPacketPartial<Packet>(row, M, elements);
+  }
+  if (Complex && full) {
+    for (Index M = 0; M < N; M++) {
+      acc.packet[M+N] = res.template loadPacketPartial<Packet>(row + accCols, M, elements);
+    }
+  }
 }
 
-template<typename Packet>
-EIGEN_ALWAYS_INLINE void pbroadcast4_old(const __UNPACK_TYPE__(Packet)* a, Packet& a0, Packet& a1, Packet& a2, Packet& a3)
+template<typename DataMapper, typename Packet, Index N>
+EIGEN_ALWAYS_INLINE void bstore_partial(PacketBlock<Packet,N>& acc, const DataMapper& res, Index row, Index elements)
 {
-  pbroadcast4<Packet>(a, a0, a1, a2, a3);
+  for (Index M = 0; M < N; M++) {
+    res.template storePacketPartial<Packet>(row, M, acc.packet[M], elements);
+  }
+}
+#endif
+
+#ifdef _ARCH_PWR10
+#define USE_P10_AND_PVIPR2_0 (EIGEN_COMP_LLVM || (__GNUC__ >= 11))
+#else
+#define USE_P10_AND_PVIPR2_0 0
+#endif
+
+#if !USE_P10_AND_PVIPR2_0
+const static Packet4i mask4[4] = { {  0,  0,  0,  0 }, { -1,  0,  0,  0 }, { -1, -1,  0,  0 }, { -1, -1, -1,  0 } };
+#endif
+
+template<typename Packet>
+EIGEN_ALWAYS_INLINE Packet bmask(const Index remaining_rows)
+{
+#if USE_P10_AND_PVIPR2_0
+#ifdef _BIG_ENDIAN
+  return Packet(vec_reve(vec_genwm((1 << remaining_rows) - 1)));
+#else
+  return Packet(vec_genwm((1 << remaining_rows) - 1));
+#endif
+#else
+  return Packet(mask4[remaining_rows]);
+#endif
 }
 
 template<>
-EIGEN_ALWAYS_INLINE void pbroadcast4_old<Packet2d>(const double* a, Packet2d& a0, Packet2d& a1, Packet2d& a2, Packet2d& a3)
+EIGEN_ALWAYS_INLINE Packet2d bmask<Packet2d>(const Index remaining_rows)
 {
-  a1 = pload<Packet2d>(a);
-  a3 = pload<Packet2d>(a + 2);
+#if USE_P10_AND_PVIPR2_0
+  Packet2d mask2 = Packet2d(vec_gendm(remaining_rows));
+#ifdef _BIG_ENDIAN
+  return preverse(mask2);
+#else
+  return mask2;
+#endif
+#else
+  Packet2l ret = { -remaining_rows, 0 };
+  return Packet2d(ret);
+#endif
+}
+
+template<typename Packet, int N>
+EIGEN_ALWAYS_INLINE void bscale(PacketBlock<Packet,N>& acc, PacketBlock<Packet,N>& accZ, const Packet& pAlpha)
+{
+  for (int M = 0; M < N; M++) {
+    acc.packet[M] = pmadd<Packet>(pAlpha, accZ.packet[M], acc.packet[M]);
+  }
+}
+
+// Scale the PacketBlock vectors by alpha.
+template<typename Packet, int N, bool mask>
+EIGEN_ALWAYS_INLINE void bscale(PacketBlock<Packet,N>& acc, PacketBlock<Packet,N>& accZ, const Packet& pAlpha, const Packet& pMask)
+{
+  if (mask) {
+    band<Packet, N>(accZ, pMask);
+  } else {
+    EIGEN_UNUSED_VARIABLE(pMask);
+  }
+
+  bscale<Packet, N>(acc, accZ, pAlpha);
+}
+
+template<typename Packet, int N, bool real>
+EIGEN_ALWAYS_INLINE void pbroadcastN(const __UNPACK_TYPE__(Packet) *ap0,
+        const __UNPACK_TYPE__(Packet) *ap1, const __UNPACK_TYPE__(Packet) *ap2,
+        Packet& a0, Packet& a1, Packet& a2, Packet& a3)
+{
+  a0 = pset1<Packet>(ap0[0]);
+  if (N == 4) {
+    a1 = pset1<Packet>(ap0[1]);
+    a2 = pset1<Packet>(ap0[2]);
+    a3 = pset1<Packet>(ap0[3]);
+    EIGEN_UNUSED_VARIABLE(ap1);
+    EIGEN_UNUSED_VARIABLE(ap2);
+  } else {
+    if (N > 1) {
+      a1 = pset1<Packet>(ap1[0]);
+    } else {
+      EIGEN_UNUSED_VARIABLE(a1);
+      EIGEN_UNUSED_VARIABLE(ap1);
+    }
+    if (N > 2) {
+      a2 = pset1<Packet>(ap2[0]);
+    } else {
+      EIGEN_UNUSED_VARIABLE(a2);
+      EIGEN_UNUSED_VARIABLE(ap2);
+    }
+  }
+}
+
+template<> EIGEN_ALWAYS_INLINE void
+pbroadcastN<Packet4f,4,true>(const float *ap0, const float *, const float *,
+                             Packet4f& a0, Packet4f& a1, Packet4f& a2, Packet4f& a3)
+{
+  pbroadcast4<Packet4f>(ap0, a0, a1, a2, a3);
+}
+
+template<> EIGEN_ALWAYS_INLINE void
+pbroadcastN<Packet4f,4,false>(const float *ap0, const float *ap1, const float *ap2,
+                              Packet4f& a0, Packet4f& a1, Packet4f& a2, Packet4f& a3)
+{
+  pbroadcastN<Packet4f,4,true>(ap0, ap1, ap2, a0, a1, a2, a3);
+}
+
+template<>
+EIGEN_ALWAYS_INLINE void pbroadcastN<Packet2d,4,false>(const double* ap0, const double *,
+    const double *, Packet2d& a0, Packet2d& a1, Packet2d& a2, Packet2d& a3)
+{
+  a1 = pload<Packet2d>(ap0);
+  a3 = pload<Packet2d>(ap0 + 2);
   a0 = vec_splat(a1, 0);
   a1 = vec_splat(a1, 1);
   a2 = vec_splat(a3, 0);
   a3 = vec_splat(a3, 1);
 }
 
+// Grab two decouples real/imaginary PacketBlocks and return two coupled (real/imaginary pairs) PacketBlocks.
+template<typename Packet, typename Packetc, int N, bool full>
+EIGEN_ALWAYS_INLINE void bcouple_common(PacketBlock<Packet,N>& taccReal, PacketBlock<Packet,N>& taccImag, PacketBlock<Packetc, N>& acc1, PacketBlock<Packetc, N>& acc2)
+{
+  for (int M = 0; M < N; M++) {
+    acc1.packet[M].v = vec_mergeh(taccReal.packet[M], taccImag.packet[M]);
+  }
+
+  if (full) {
+    for (int M = 0; M < N; M++) {
+      acc2.packet[M].v = vec_mergel(taccReal.packet[M], taccImag.packet[M]);
+    }
+  }
+}
+
+template<typename Packet, typename Packetc, int N, bool full>
+EIGEN_ALWAYS_INLINE void bcouple(PacketBlock<Packet,N>& taccReal, PacketBlock<Packet,N>& taccImag, PacketBlock<Packetc,N*2>& tRes, PacketBlock<Packetc, N>& acc1, PacketBlock<Packetc, N>& acc2)
+{
+  bcouple_common<Packet, Packetc, N, full>(taccReal, taccImag, acc1, acc2);
+
+  for (int M = 0; M < N; M++) {
+    acc1.packet[M] = padd<Packetc>(tRes.packet[M], acc1.packet[M]);
+  }
+
+  if (full) {
+    for (int M = 0; M < N; M++) {
+      acc2.packet[M] = padd<Packetc>(tRes.packet[M+N], acc2.packet[M]);
+    }
+  }
+}
+
 // PEEL loop factor.
 #define PEEL 7
-
-template<typename Scalar, typename Packet, typename Index>
-EIGEN_ALWAYS_INLINE void MICRO_EXTRA_COL(
-  const Scalar* &lhs_ptr,
-  const Scalar* &rhs_ptr,
-  PacketBlock<Packet,1> &accZero,
-  Index remaining_rows,
-  Index remaining_cols)
-{
-  Packet rhsV[1];
-  rhsV[0] = pset1<Packet>(rhs_ptr[0]);
-  pger<1,Scalar, Packet, false>(&accZero, lhs_ptr, rhsV);
-  lhs_ptr += remaining_rows;
-  rhs_ptr += remaining_cols;
-}
-
-template<typename Scalar, typename Packet, typename DataMapper, typename Index, const Index accRows>
-EIGEN_STRONG_INLINE void gemm_extra_col(
-  const DataMapper& res,
-  const Scalar* lhs_base,
-  const Scalar* rhs_base,
-  Index depth,
-  Index strideA,
-  Index offsetA,
-  Index row,
-  Index col,
-  Index remaining_rows,
-  Index remaining_cols,
-  const Packet& pAlpha)
-{
-  const Scalar* rhs_ptr = rhs_base;
-  const Scalar* lhs_ptr = lhs_base + row*strideA + remaining_rows*offsetA;
-  PacketBlock<Packet,1> accZero;
-
-  bsetzero<Scalar, Packet>(accZero);
-
-  Index remaining_depth = (depth & -accRows);
-  Index k = 0;
-  for(; k + PEEL <= remaining_depth; k+= PEEL)
-  {
-    EIGEN_POWER_PREFETCH(rhs_ptr);
-    EIGEN_POWER_PREFETCH(lhs_ptr);
-    for (int l = 0; l < PEEL; l++) {
-      MICRO_EXTRA_COL<Scalar, Packet, Index>(lhs_ptr, rhs_ptr, accZero, remaining_rows, remaining_cols);
-    }
-  }
-  for(; k < remaining_depth; k++)
-  {
-    MICRO_EXTRA_COL<Scalar, Packet, Index>(lhs_ptr, rhs_ptr, accZero, remaining_rows, remaining_cols);
-  }
-  for(; k < depth; k++)
-  {
-    Packet rhsV[1];
-    rhsV[0] = pset1<Packet>(rhs_ptr[0]);
-    pger<1, Scalar, Packet, Index, false>(&accZero, lhs_ptr, rhsV, remaining_rows);
-    lhs_ptr += remaining_rows;
-    rhs_ptr += remaining_cols;
-  }
-
-  accZero.packet[0] = vec_mul(pAlpha, accZero.packet[0]);
-  for(Index i = 0; i < remaining_rows; i++) {
-    res(row + i, col) += accZero.packet[0][i];
-  }
-}
-
-template<typename Scalar, typename Packet, typename Index, const Index accRows>
-EIGEN_ALWAYS_INLINE void MICRO_EXTRA_ROW(
-  const Scalar* &lhs_ptr,
-  const Scalar* &rhs_ptr,
-  PacketBlock<Packet,4> &accZero,
-  Index remaining_rows)
-{
-  Packet rhsV[4];
-  pbroadcast4<Packet>(rhs_ptr, rhsV[0], rhsV[1], rhsV[2], rhsV[3]);
-  pger<4, Scalar, Packet, false>(&accZero, lhs_ptr, rhsV);
-  lhs_ptr += remaining_rows;
-  rhs_ptr += accRows;
-}
-
-template<typename Scalar, typename Packet, typename DataMapper, typename Index, const Index accRows, const Index accCols>
-EIGEN_STRONG_INLINE void gemm_extra_row(
-  const DataMapper& res,
-  const Scalar* lhs_base,
-  const Scalar* rhs_base,
-  Index depth,
-  Index strideA,
-  Index offsetA,
-  Index row,
-  Index col,
-  Index rows,
-  Index cols,
-  Index remaining_rows,
-  const Packet& pAlpha,
-  const Packet& pMask)
-{
-  const Scalar* rhs_ptr = rhs_base;
-  const Scalar* lhs_ptr = lhs_base + row*strideA + remaining_rows*offsetA;
-  PacketBlock<Packet,4> accZero, acc;
-
-  bsetzero<Scalar, Packet>(accZero);
-
-  Index remaining_depth = (col + accRows < cols) ? depth : (depth & -accRows);
-  Index k = 0;
-  for(; k + PEEL <= remaining_depth; k+= PEEL)
-  {
-    EIGEN_POWER_PREFETCH(rhs_ptr);
-    EIGEN_POWER_PREFETCH(lhs_ptr);
-    for (int l = 0; l < PEEL; l++) {
-      MICRO_EXTRA_ROW<Scalar, Packet, Index, accRows>(lhs_ptr, rhs_ptr, accZero, remaining_rows);
-    }
-  }
-  for(; k < remaining_depth; k++)
-  {
-    MICRO_EXTRA_ROW<Scalar, Packet, Index, accRows>(lhs_ptr, rhs_ptr, accZero, remaining_rows);
-  }
-
-  if ((remaining_depth == depth) && (rows >= accCols))
-  {
-    for(Index j = 0; j < 4; j++) {
-      acc.packet[j] = res.template loadPacket<Packet>(row, col + j);
-    }
-    bscale<Packet>(acc, accZero, pAlpha, pMask);
-    res.template storePacketBlock<Packet,4>(row, col, acc);
-  } else {
-    for(; k < depth; k++)
-    {
-      Packet rhsV[4];
-      pbroadcast4<Packet>(rhs_ptr, rhsV[0], rhsV[1], rhsV[2], rhsV[3]);
-      pger<4, Scalar, Packet, Index, false>(&accZero, lhs_ptr, rhsV, remaining_rows);
-      lhs_ptr += remaining_rows;
-      rhs_ptr += accRows;
-    }
-
-    for(Index j = 0; j < 4; j++) {
-      accZero.packet[j] = vec_mul(pAlpha, accZero.packet[j]);
-    }
-    for(Index j = 0; j < 4; j++) {
-      for(Index i = 0; i < remaining_rows; i++) {
-        res(row + i, col + j) += accZero.packet[j][i];
-      }
-    }
-  }
-}
+#define PEEL_ROW 7
 
 #define MICRO_UNROLL(func) \
   func(0) func(1) func(2) func(3) func(4) func(5) func(6) func(7)
 
-#define MICRO_UNROLL_WORK(func, func2, peel) \
-    MICRO_UNROLL(func2); \
-    func(0,peel) func(1,peel) func(2,peel) func(3,peel) \
-    func(4,peel) func(5,peel) func(6,peel) func(7,peel)
+#define MICRO_NORMAL_ROWS \
+  accRows == quad_traits<Scalar>::rows || accRows == 1
 
-#define MICRO_LOAD_ONE(iter) \
-  if (unroll_factor > iter) { \
-    lhsV##iter = ploadLhs<Scalar, Packet>(lhs_ptr##iter); \
-    lhs_ptr##iter += accCols; \
+#define MICRO_NEW_ROWS ((MICRO_NORMAL_ROWS) ? accRows : 1)
+
+#define MICRO_RHS(ptr, N) rhs_##ptr##N
+
+#define MICRO_ZERO_PEEL(peel) \
+  if ((PEEL_ROW > peel) && (peel != 0)) { \
+    bsetzero<Packet, accRows>(accZero##peel); \
   } else { \
-    EIGEN_UNUSED_VARIABLE(lhsV##iter); \
+    EIGEN_UNUSED_VARIABLE(accZero##peel); \
   }
 
+#define MICRO_ADD(ptr, N) \
+  if (MICRO_NORMAL_ROWS) { \
+    MICRO_RHS(ptr,0) += (accRows * N); \
+  } else { \
+    MICRO_RHS(ptr,0) += N; \
+    MICRO_RHS(ptr,1) += N; \
+    if (accRows == 3) { \
+       MICRO_RHS(ptr,2) += N; \
+    } \
+  }
+
+#define MICRO_ADD_ROWS(N) MICRO_ADD(ptr, N)
+
+#define MICRO_BROADCAST1(peel, ptr, rhsV, real) \
+  if (MICRO_NORMAL_ROWS) { \
+    pbroadcastN<Packet,accRows,real>(MICRO_RHS(ptr,0) + (accRows * peel), MICRO_RHS(ptr,0), MICRO_RHS(ptr,0), rhsV##peel[0], rhsV##peel[1], rhsV##peel[2], rhsV##peel[3]); \
+  } else { \
+    pbroadcastN<Packet,accRows,real>(MICRO_RHS(ptr,0) + peel, MICRO_RHS(ptr,1) + peel, MICRO_RHS(ptr,2) + peel, rhsV##peel[0], rhsV##peel[1], rhsV##peel[2], rhsV##peel[3]); \
+  }
+
+#define MICRO_BROADCAST(peel) MICRO_BROADCAST1(peel, ptr, rhsV, true)
+
+#define MICRO_BROADCAST_EXTRA1(ptr, rhsV, real) \
+  pbroadcastN<Packet,accRows,real>(MICRO_RHS(ptr,0), MICRO_RHS(ptr,1), MICRO_RHS(ptr,2), rhsV[0], rhsV[1], rhsV[2], rhsV[3]);
+
+#define MICRO_BROADCAST_EXTRA \
+  Packet rhsV[4]; \
+  MICRO_BROADCAST_EXTRA1(ptr, rhsV, true) \
+  MICRO_ADD_ROWS(1)
+
+#define MICRO_SRC2(ptr, N, M) \
+  if (MICRO_NORMAL_ROWS) { \
+    EIGEN_UNUSED_VARIABLE(strideB); \
+    EIGEN_UNUSED_VARIABLE(MICRO_RHS(ptr,1)); \
+    EIGEN_UNUSED_VARIABLE(MICRO_RHS(ptr,2)); \
+  } else { \
+    MICRO_RHS(ptr,1) = rhs_base + N + M; \
+    if (accRows == 3) { \
+      MICRO_RHS(ptr,2) = rhs_base + N*2 + M; \
+    } else { \
+      EIGEN_UNUSED_VARIABLE(MICRO_RHS(ptr,2)); \
+    } \
+  }
+
+#define MICRO_SRC2_PTR MICRO_SRC2(ptr, strideB, 0)
+
+#define MICRO_ZERO_PEEL_ROW MICRO_UNROLL(MICRO_ZERO_PEEL)
+
+#define MICRO_WORK_PEEL(peel) \
+  if (PEEL_ROW > peel) { \
+    MICRO_BROADCAST(peel) \
+    pger<accRows, Scalar, Packet, false>(&accZero##peel, lhs_ptr + (remaining_rows * peel), rhsV##peel); \
+  } else { \
+    EIGEN_UNUSED_VARIABLE(rhsV##peel); \
+  }
+
+#define MICRO_WORK_PEEL_ROW \
+  Packet rhsV0[4], rhsV1[4], rhsV2[4], rhsV3[4], rhsV4[4], rhsV5[4], rhsV6[4], rhsV7[4]; \
+  MICRO_UNROLL(MICRO_WORK_PEEL) \
+  lhs_ptr += (remaining_rows * PEEL_ROW); \
+  MICRO_ADD_ROWS(PEEL_ROW)
+
+#define MICRO_ADD_PEEL(peel, sum) \
+  if (PEEL_ROW > peel) { \
+    for (Index i = 0; i < accRows; i++) { \
+      accZero##sum.packet[i] += accZero##peel.packet[i]; \
+    } \
+  }
+
+#define MICRO_ADD_PEEL_ROW \
+  MICRO_ADD_PEEL(4, 0) MICRO_ADD_PEEL(5, 1) MICRO_ADD_PEEL(6, 2) MICRO_ADD_PEEL(7, 3) \
+  MICRO_ADD_PEEL(2, 0) MICRO_ADD_PEEL(3, 1) MICRO_ADD_PEEL(1, 0)
+
+#define MICRO_PREFETCHN1(ptr, N) \
+  EIGEN_POWER_PREFETCH(MICRO_RHS(ptr,0)); \
+  if (N == 2 || N == 3) { \
+    EIGEN_POWER_PREFETCH(MICRO_RHS(ptr,1)); \
+    if (N == 3) { \
+      EIGEN_POWER_PREFETCH(MICRO_RHS(ptr,2)); \
+    } \
+  }
+
+#define MICRO_PREFETCHN(N) MICRO_PREFETCHN1(ptr, N)
+
+#define MICRO_COMPLEX_PREFETCHN(N) \
+  MICRO_PREFETCHN1(ptr_real, N); \
+  if(!RhsIsReal) { \
+    MICRO_PREFETCHN1(ptr_imag, N); \
+  }
+
+template<typename Scalar, typename Packet, const Index accRows, const Index remaining_rows>
+EIGEN_ALWAYS_INLINE void MICRO_EXTRA_ROW(
+  const Scalar* &lhs_ptr,
+  const Scalar* &rhs_ptr0,
+  const Scalar* &rhs_ptr1,
+  const Scalar* &rhs_ptr2,
+  PacketBlock<Packet,accRows> &accZero)
+{
+  MICRO_BROADCAST_EXTRA
+  pger<accRows, Scalar, Packet, false>(&accZero, lhs_ptr, rhsV);
+  lhs_ptr += remaining_rows;
+}
+
+template<typename Scalar, typename Packet, typename DataMapper, const Index accRows, const Index accCols, const Index remaining_rows>
+EIGEN_ALWAYS_INLINE void gemm_unrolled_row_iteration(
+  const DataMapper& res,
+  const Scalar* lhs_base,
+  const Scalar* rhs_base,
+  Index depth,
+  Index strideA,
+  Index offsetA,
+  Index strideB,
+  Index row,
+  Index rows,
+  const Packet& pAlpha,
+  const Packet& pMask)
+{
+  const Scalar* rhs_ptr0 = rhs_base, * rhs_ptr1 = NULL, * rhs_ptr2 = NULL;
+  const Scalar* lhs_ptr = lhs_base + row*strideA + remaining_rows*offsetA;
+  PacketBlock<Packet,accRows> accZero0, accZero1, accZero2, accZero3, accZero4, accZero5, accZero6, accZero7, acc;
+
+  MICRO_SRC2_PTR
+  bsetzero<Packet, accRows>(accZero0);
+
+  Index remaining_depth = depth & -quad_traits<Scalar>::rows;
+  Index k = 0;
+  if (remaining_depth >= PEEL_ROW) {
+    MICRO_ZERO_PEEL_ROW
+    do
+    {
+      MICRO_PREFETCHN(accRows)
+      EIGEN_POWER_PREFETCH(lhs_ptr);
+      MICRO_WORK_PEEL_ROW
+    } while ((k += PEEL_ROW) + PEEL_ROW <= remaining_depth);
+    MICRO_ADD_PEEL_ROW
+  }
+  for(; k < depth; k++)
+  {
+    MICRO_EXTRA_ROW<Scalar, Packet, accRows, remaining_rows>(lhs_ptr, rhs_ptr0, rhs_ptr1, rhs_ptr2, accZero0);
+  }
+
+#ifdef USE_PARTIAL_PACKETS
+  EIGEN_UNUSED_VARIABLE(rows);
+  EIGEN_UNUSED_VARIABLE(pMask);
+  bload_partial<DataMapper, Packet, 0, false, accRows>(acc, res, row, remaining_rows);
+  bscale<Packet,accRows>(acc, accZero0, pAlpha);
+  bstore_partial<DataMapper, Packet, accRows>(acc, res, row, remaining_rows);
+#else
+  bload<DataMapper, Packet, 0, ColMajor, false, accRows>(acc, res, row, 0);
+  if ((accRows == 1) || (rows >= accCols))
+  {
+    bscale<Packet,accRows,true>(acc, accZero0, pAlpha, pMask);
+    bstore<DataMapper, Packet, accRows>(acc, res, row);
+  } else {
+    bscale<Packet,accRows,false>(acc, accZero0, pAlpha, pMask);
+    for(Index j = 0; j < accRows; j++) {
+      for(Index i = 0; i < remaining_rows; i++) {
+        res(row + i, j) = acc.packet[j][i];
+      }
+    }
+  }
+#endif
+}
+
+#define MICRO_EXTRA(MICRO_EXTRA_UNROLL, value, is_col) \
+  switch(value) { \
+    default: \
+      MICRO_EXTRA_UNROLL(1) \
+      break; \
+    case 2: \
+      if (is_col || (sizeof(Scalar) == sizeof(float))) { \
+        MICRO_EXTRA_UNROLL(2) \
+      } \
+      break; \
+    case 3: \
+      if (is_col || (sizeof(Scalar) == sizeof(float))) { \
+        MICRO_EXTRA_UNROLL(3) \
+      } \
+      break; \
+  }
+
+#define MICRO_EXTRA_ROWS(N) \
+  gemm_unrolled_row_iteration<Scalar, Packet, DataMapper, accRows, accCols, N>(res, lhs_base, rhs_base, depth, strideA, offsetA, strideB, row, rows, pAlpha, pMask);
+
+template<typename Scalar, typename Packet, typename DataMapper, const Index accRows, const Index accCols>
+EIGEN_ALWAYS_INLINE void gemm_extra_row(
+  const DataMapper& res,
+  const Scalar* lhs_base,
+  const Scalar* rhs_base,
+  Index depth,
+  Index strideA,
+  Index offsetA,
+  Index strideB,
+  Index row,
+  Index rows,
+  Index remaining_rows,
+  const Packet& pAlpha,
+  const Packet& pMask)
+{
+  MICRO_EXTRA(MICRO_EXTRA_ROWS, remaining_rows, false)
+}
+
+#define MICRO_UNROLL_WORK(func, func2, peel) \
+  MICRO_UNROLL(func2); \
+  func(0,peel) func(1,peel) func(2,peel) func(3,peel) \
+  func(4,peel) func(5,peel) func(6,peel) func(7,peel)
+
 #define MICRO_WORK_ONE(iter, peel) \
   if (unroll_factor > iter) { \
-    pger_common<Packet, false>(&accZero##iter, lhsV##iter, rhsV##peel); \
+    pger_common<Packet, false, accRows>(&accZero##iter, lhsV##iter, rhsV##peel); \
   }
 
 #define MICRO_TYPE_PEEL4(func, func2, peel) \
   if (PEEL > peel) { \
     Packet lhsV0, lhsV1, lhsV2, lhsV3, lhsV4, lhsV5, lhsV6, lhsV7; \
-    pbroadcast4<Packet>(rhs_ptr + (accRows * peel), rhsV##peel[0], rhsV##peel[1], rhsV##peel[2], rhsV##peel[3]); \
-    MICRO_UNROLL_WORK(func, func2, peel) \
-  } else { \
-    EIGEN_UNUSED_VARIABLE(rhsV##peel); \
-  }
-
-#define MICRO_TYPE_PEEL1(func, func2, peel) \
-  if (PEEL > peel) { \
-    Packet lhsV0, lhsV1, lhsV2, lhsV3, lhsV4, lhsV5, lhsV6, lhsV7; \
-    rhsV##peel[0] = pset1<Packet>(rhs_ptr[remaining_cols * peel]); \
+    MICRO_BROADCAST(peel) \
     MICRO_UNROLL_WORK(func, func2, peel) \
   } else { \
     EIGEN_UNUSED_VARIABLE(rhsV##peel); \
   }
 
 #define MICRO_UNROLL_TYPE_PEEL(M, func, func1, func2) \
-  Packet rhsV0[M], rhsV1[M], rhsV2[M], rhsV3[M], rhsV4[M], rhsV5[M], rhsV6[M], rhsV7[M], rhsV8[M], rhsV9[M]; \
-  func(func1,func2,0); func(func1,func2,1); \
-  func(func1,func2,2); func(func1,func2,3); \
-  func(func1,func2,4); func(func1,func2,5); \
-  func(func1,func2,6); func(func1,func2,7); \
-  func(func1,func2,8); func(func1,func2,9);
+  Packet rhsV0[M], rhsV1[M], rhsV2[M], rhsV3[M], rhsV4[M], rhsV5[M], rhsV6[M], rhsV7[M]; \
+  func(func1,func2,0) func(func1,func2,1) \
+  func(func1,func2,2) func(func1,func2,3) \
+  func(func1,func2,4) func(func1,func2,5) \
+  func(func1,func2,6) func(func1,func2,7)
 
 #define MICRO_UNROLL_TYPE_ONE(M, func, func1, func2) \
   Packet rhsV0[M]; \
-  func(func1,func2,0);
+  func(func1,func2,0)
 
-#define MICRO_ONE_PEEL4 \
-  MICRO_UNROLL_TYPE_PEEL(4, MICRO_TYPE_PEEL4, MICRO_WORK_ONE, MICRO_LOAD_ONE); \
-  rhs_ptr += (accRows * PEEL);
+#define MICRO_UNROLL_TYPE(MICRO_TYPE, size) \
+  MICRO_TYPE(4, MICRO_TYPE_PEEL4, MICRO_WORK_ONE, MICRO_LOAD_ONE) \
+  MICRO_ADD_ROWS(size)
 
-#define MICRO_ONE4 \
-  MICRO_UNROLL_TYPE_ONE(4, MICRO_TYPE_PEEL4, MICRO_WORK_ONE, MICRO_LOAD_ONE); \
-  rhs_ptr += accRows;
+#define MICRO_ONE_PEEL4 MICRO_UNROLL_TYPE(MICRO_UNROLL_TYPE_PEEL, PEEL)
 
-#define MICRO_ONE_PEEL1 \
-  MICRO_UNROLL_TYPE_PEEL(1, MICRO_TYPE_PEEL1, MICRO_WORK_ONE, MICRO_LOAD_ONE); \
-  rhs_ptr += (remaining_cols * PEEL);
-
-#define MICRO_ONE1 \
-  MICRO_UNROLL_TYPE_ONE(1, MICRO_TYPE_PEEL1, MICRO_WORK_ONE, MICRO_LOAD_ONE); \
-  rhs_ptr += remaining_cols;
+#define MICRO_ONE4 MICRO_UNROLL_TYPE(MICRO_UNROLL_TYPE_ONE, 1)
 
 #define MICRO_DST_PTR_ONE(iter) \
   if (unroll_factor > iter) { \
-    bsetzero<Scalar, Packet>(accZero##iter); \
+    bsetzero<Packet, accRows>(accZero##iter); \
   } else { \
     EIGEN_UNUSED_VARIABLE(accZero##iter); \
   }
 
 #define MICRO_DST_PTR MICRO_UNROLL(MICRO_DST_PTR_ONE)
 
-#define MICRO_SRC_PTR_ONE(iter) \
-  if (unroll_factor > iter) { \
-    lhs_ptr##iter = lhs_base + ( (row/accCols) + iter )*strideA*accCols + accCols*offsetA; \
-  } else { \
-    EIGEN_UNUSED_VARIABLE(lhs_ptr##iter); \
-  }
-
 #define MICRO_SRC_PTR MICRO_UNROLL(MICRO_SRC_PTR_ONE)
 
-#define MICRO_PREFETCH_ONE(iter) \
-  if (unroll_factor > iter) { \
-    EIGEN_POWER_PREFETCH(lhs_ptr##iter); \
-  }
-
 #define MICRO_PREFETCH MICRO_UNROLL(MICRO_PREFETCH_ONE)
 
+#ifdef USE_PARTIAL_PACKETS
 #define MICRO_STORE_ONE(iter) \
   if (unroll_factor > iter) { \
-    acc.packet[0] = res.template loadPacket<Packet>(row + iter*accCols, col + 0); \
-    acc.packet[1] = res.template loadPacket<Packet>(row + iter*accCols, col + 1); \
-    acc.packet[2] = res.template loadPacket<Packet>(row + iter*accCols, col + 2); \
-    acc.packet[3] = res.template loadPacket<Packet>(row + iter*accCols, col + 3); \
-    bscale<Packet>(acc, accZero##iter, pAlpha); \
-    res.template storePacketBlock<Packet,4>(row + iter*accCols, col, acc); \
+    if (MICRO_NORMAL_PARTIAL(iter)) { \
+      bload<DataMapper, Packet, 0, ColMajor, false, accRows>(acc, res, row + iter*accCols, 0); \
+      bscale<Packet,accRows>(acc, accZero##iter, pAlpha); \
+      bstore<DataMapper, Packet, accRows>(acc, res, row + iter*accCols); \
+    } else { \
+      bload_partial<DataMapper, Packet, 0, false, accRows>(acc, res, row + iter*accCols, accCols2); \
+      bscale<Packet,accRows>(acc, accZero##iter, pAlpha); \
+      bstore_partial<DataMapper, Packet, accRows>(acc, res, row + iter*accCols, accCols2); \
+    } \
   }
+#else
+#define MICRO_STORE_ONE(iter) \
+  if (unroll_factor > iter) { \
+    bload<DataMapper, Packet, 0, ColMajor, false, accRows>(acc, res, row + iter*accCols, 0); \
+    bscale<Packet,accRows,!(MICRO_NORMAL(iter))>(acc, accZero##iter, pAlpha, pMask); \
+    bstore<DataMapper, Packet, accRows>(acc, res, row + iter*accCols); \
+  }
+#endif
 
 #define MICRO_STORE MICRO_UNROLL(MICRO_STORE_ONE)
 
-#define MICRO_COL_STORE_ONE(iter) \
-  if (unroll_factor > iter) { \
-    acc.packet[0] = res.template loadPacket<Packet>(row + iter*accCols, col + 0); \
-    bscale<Packet>(acc, accZero##iter, pAlpha); \
-    res.template storePacketBlock<Packet,1>(row + iter*accCols, col, acc); \
-  }
-
-#define MICRO_COL_STORE MICRO_UNROLL(MICRO_COL_STORE_ONE)
-
-template<int unroll_factor, typename Scalar, typename Packet, typename DataMapper, typename Index, const Index accRows, const Index accCols>
-EIGEN_STRONG_INLINE void gemm_unrolled_iteration(
+#ifdef USE_PARTIAL_PACKETS
+template<int unroll_factor, typename Scalar, typename Packet, typename DataMapper, const Index accRows, const Index accCols, bool full>
+#else
+template<int unroll_factor, typename Scalar, typename Packet, typename DataMapper, const Index accRows, const Index accCols, const Index accCols2>
+#endif
+EIGEN_ALWAYS_INLINE void gemm_unrolled_iteration(
   const DataMapper& res,
   const Scalar* lhs_base,
   const Scalar* rhs_base,
   Index depth,
   Index strideA,
   Index offsetA,
+  Index strideB,
   Index& row,
-  Index col,
-  const Packet& pAlpha)
+  const Packet& pAlpha,
+#ifdef USE_PARTIAL_PACKETS
+  Index accCols2
+#else
+  const Packet& pMask
+#endif
+  )
 {
-  const Scalar* rhs_ptr = rhs_base;
-  const Scalar* lhs_ptr0 = NULL, *  lhs_ptr1 = NULL, * lhs_ptr2 = NULL, * lhs_ptr3 = NULL, * lhs_ptr4 = NULL, * lhs_ptr5 = NULL, * lhs_ptr6 = NULL, * lhs_ptr7 = NULL;
-  PacketBlock<Packet,4> accZero0, accZero1, accZero2, accZero3, accZero4, accZero5, accZero6, accZero7;
-  PacketBlock<Packet,4> acc;
+  const Scalar* rhs_ptr0 = rhs_base, * rhs_ptr1 = NULL, * rhs_ptr2 = NULL;
+  const Scalar* lhs_ptr0 = NULL, * lhs_ptr1 = NULL, * lhs_ptr2 = NULL, * lhs_ptr3 = NULL, * lhs_ptr4 = NULL, * lhs_ptr5 = NULL, * lhs_ptr6 = NULL, * lhs_ptr7 = NULL;
+  PacketBlock<Packet,accRows> accZero0, accZero1, accZero2, accZero3, accZero4, accZero5, accZero6, accZero7;
+  PacketBlock<Packet,accRows> acc;
 
+  MICRO_SRC2_PTR
   MICRO_SRC_PTR
   MICRO_DST_PTR
 
   Index k = 0;
   for(; k + PEEL <= depth; k+= PEEL)
   {
-    EIGEN_POWER_PREFETCH(rhs_ptr);
+    MICRO_PREFETCHN(accRows)
     MICRO_PREFETCH
     MICRO_ONE_PEEL4
   }
@@ -1590,197 +1684,139 @@ EIGEN_STRONG_INLINE void gemm_unrolled_iteration(
   }
   MICRO_STORE
 
-  row += unroll_factor*accCols;
+  MICRO_UPDATE
 }
 
-template<int unroll_factor, typename Scalar, typename Packet, typename DataMapper, typename Index, const Index accCols>
-EIGEN_STRONG_INLINE void gemm_unrolled_col_iteration(
+#ifdef USE_PARTIAL_PACKETS
+#define MICRO_UNROLL_ITER2(N, M) \
+  gemm_unrolled_iteration<N + ((M) ? 1 : 0), Scalar, Packet, DataMapper, accRows, accCols, !M>(res3, lhs_base, rhs_base, depth, strideA, offsetA, strideB, row, pAlpha, M ? remaining_rows : accCols); \
+  if (M) return;
+#else
+#define MICRO_UNROLL_ITER2(N, M) \
+  gemm_unrolled_iteration<N + ((M) ? 1 : 0), Scalar, Packet, DataMapper, accRows, accCols, M ? M : accCols>(res3, lhs_base, rhs_base, depth, strideA, offsetA, strideB, row, pAlpha, pMask); \
+  if (M) return;
+#endif
+
+template<typename Scalar, typename Packet, typename DataMapper, const Index accRows, const Index accCols>
+EIGEN_ALWAYS_INLINE void gemm_cols(
   const DataMapper& res,
-  const Scalar* lhs_base,
-  const Scalar* rhs_base,
+  const Scalar* blockA,
+  const Scalar* blockB,
   Index depth,
   Index strideA,
   Index offsetA,
-  Index& row,
+  Index strideB,
+  Index offsetB,
   Index col,
-  Index remaining_cols,
-  const Packet& pAlpha)
-{
-  const Scalar* rhs_ptr = rhs_base;
-  const Scalar* lhs_ptr0 = NULL, * lhs_ptr1 = NULL, * lhs_ptr2 = NULL, * lhs_ptr3 = NULL, * lhs_ptr4 = NULL, * lhs_ptr5 = NULL, * lhs_ptr6 = NULL, *lhs_ptr7 = NULL;
-  PacketBlock<Packet,1> accZero0, accZero1, accZero2, accZero3, accZero4, accZero5, accZero6, accZero7;
-  PacketBlock<Packet,1> acc;
-
-  MICRO_SRC_PTR
-  MICRO_DST_PTR
-
-  Index k = 0;
-  for(; k + PEEL <= depth; k+= PEEL)
-  {
-    EIGEN_POWER_PREFETCH(rhs_ptr);
-    MICRO_PREFETCH
-    MICRO_ONE_PEEL1
-  }
-  for(; k < depth; k++)
-  {
-    MICRO_ONE1
-  }
-  MICRO_COL_STORE
-
-  row += unroll_factor*accCols;
-}
-
-template<typename Scalar, typename Packet, typename DataMapper, typename Index, const Index accCols>
-EIGEN_STRONG_INLINE void gemm_unrolled_col(
-  const DataMapper& res,
-  const Scalar* lhs_base,
-  const Scalar* rhs_base,
-  Index depth,
-  Index strideA,
-  Index offsetA,
-  Index& row,
   Index rows,
-  Index col,
-  Index remaining_cols,
-  const Packet& pAlpha)
+  Index remaining_rows,
+  const Packet& pAlpha,
+  const Packet& pMask)
 {
-#define MAX_UNROLL 6
+  const DataMapper res3 = res.getSubMapper(0, col);
+
+  const Scalar* rhs_base = blockB + col*strideB + MICRO_NEW_ROWS*offsetB;
+  const Scalar* lhs_base = blockA + accCols*offsetA;
+  Index row = 0;
+
+#define MAX_UNROLL 7
   while(row + MAX_UNROLL*accCols <= rows) {
-    gemm_unrolled_col_iteration<MAX_UNROLL, Scalar, Packet, DataMapper, Index, accCols>(res, lhs_base, rhs_base, depth, strideA, offsetA, row, col, remaining_cols, pAlpha);
+    MICRO_UNROLL_ITER2(MAX_UNROLL, 0);
   }
   switch( (rows-row)/accCols ) {
 #if MAX_UNROLL > 7
     case 7:
-      gemm_unrolled_col_iteration<7, Scalar, Packet, DataMapper, Index, accCols>(res, lhs_base, rhs_base, depth, strideA, offsetA, row, col, remaining_cols, pAlpha);
+      MICRO_UNROLL_ITER(MICRO_UNROLL_ITER2, 7)
       break;
 #endif
 #if MAX_UNROLL > 6
     case 6:
-      gemm_unrolled_col_iteration<6, Scalar, Packet, DataMapper, Index, accCols>(res, lhs_base, rhs_base, depth, strideA, offsetA, row, col, remaining_cols, pAlpha);
+      MICRO_UNROLL_ITER(MICRO_UNROLL_ITER2, 6)
       break;
 #endif
 #if MAX_UNROLL > 5
-   case 5:
-      gemm_unrolled_col_iteration<5, Scalar, Packet, DataMapper, Index, accCols>(res, lhs_base, rhs_base, depth, strideA, offsetA, row, col, remaining_cols, pAlpha);
+    case 5:
+      MICRO_UNROLL_ITER(MICRO_UNROLL_ITER2, 5)
       break;
 #endif
 #if MAX_UNROLL > 4
-   case 4:
-      gemm_unrolled_col_iteration<4, Scalar, Packet, DataMapper, Index, accCols>(res, lhs_base, rhs_base, depth, strideA, offsetA, row, col, remaining_cols, pAlpha);
+    case 4:
+      MICRO_UNROLL_ITER(MICRO_UNROLL_ITER2, 4)
       break;
 #endif
 #if MAX_UNROLL > 3
-   case 3:
-     gemm_unrolled_col_iteration<3, Scalar, Packet, DataMapper, Index, accCols>(res, lhs_base, rhs_base, depth, strideA, offsetA, row, col, remaining_cols, pAlpha);
-     break;
+    case 3:
+      MICRO_UNROLL_ITER(MICRO_UNROLL_ITER2, 3)
+      break;
 #endif
 #if MAX_UNROLL > 2
-   case 2:
-     gemm_unrolled_col_iteration<2, Scalar, Packet, DataMapper, Index, accCols>(res, lhs_base, rhs_base, depth, strideA, offsetA, row, col, remaining_cols, pAlpha);
-     break;
+    case 2:
+      MICRO_UNROLL_ITER(MICRO_UNROLL_ITER2, 2)
+      break;
 #endif
 #if MAX_UNROLL > 1
-   case 1:
-     gemm_unrolled_col_iteration<1, Scalar, Packet, DataMapper, Index, accCols>(res, lhs_base, rhs_base, depth, strideA, offsetA, row, col, remaining_cols, pAlpha);
-     break;
+    case 1:
+      MICRO_UNROLL_ITER(MICRO_UNROLL_ITER2, 1)
+      break;
 #endif
-   default:
-     break;
+    default:
+      break;
   }
 #undef MAX_UNROLL
+
+  if(remaining_rows > 0)
+  {
+    gemm_extra_row<Scalar, Packet, DataMapper, accRows, accCols>(res3, blockA, rhs_base, depth, strideA, offsetA, strideB, row, rows, remaining_rows, pAlpha, pMask);
+  }
+}
+
+#define MICRO_EXTRA_COLS(N) \
+  gemm_cols<Scalar, Packet, DataMapper, N, accCols>(res, blockA, blockB, depth, strideA, offsetA, strideB, offsetB, col, rows, remaining_rows, pAlpha, pMask);
+
+template<typename Scalar, typename Packet, typename DataMapper, const Index accCols>
+EIGEN_STRONG_INLINE void gemm_extra_cols(
+  const DataMapper& res,
+  const Scalar* blockA,
+  const Scalar* blockB,
+  Index depth,
+  Index strideA,
+  Index offsetA,
+  Index strideB,
+  Index offsetB,
+  Index col,
+  Index rows,
+  Index cols,
+  Index remaining_rows,
+  const Packet& pAlpha,
+  const Packet& pMask)
+{
+  MICRO_EXTRA(MICRO_EXTRA_COLS, cols-col, true)
 }
 
 /****************
  * GEMM kernels *
  * **************/
-template<typename Scalar, typename Index, typename Packet, typename RhsPacket, typename DataMapper, const Index accRows, const Index accCols>
+template<typename Scalar, typename Packet, typename RhsPacket, typename DataMapper, const Index accRows, const Index accCols>
 EIGEN_STRONG_INLINE void gemm(const DataMapper& res, const Scalar* blockA, const Scalar* blockB, Index rows, Index depth, Index cols, Scalar alpha, Index strideA, Index strideB, Index offsetA, Index offsetB)
 {
       const Index remaining_rows = rows % accCols;
-      const Index remaining_cols = cols % accRows;
 
       if( strideA == -1 ) strideA = depth;
       if( strideB == -1 ) strideB = depth;
 
       const Packet pAlpha = pset1<Packet>(alpha);
-      const Packet pMask  = bmask<Packet>((const int)(remaining_rows));
+      const Packet pMask  = bmask<Packet>(remaining_rows);
 
       Index col = 0;
       for(; col + accRows <= cols; col += accRows)
       {
-        const Scalar* rhs_base = blockB + col*strideB + accRows*offsetB;
-        const Scalar* lhs_base = blockA;
-        Index row = 0;
-
-#define MAX_UNROLL 6
-        while(row + MAX_UNROLL*accCols <= rows) {
-          gemm_unrolled_iteration<MAX_UNROLL, Scalar, Packet, DataMapper, Index, accRows, accCols>(res, lhs_base, rhs_base, depth, strideA, offsetA, row, col, pAlpha);
-        }
-        switch( (rows-row)/accCols ) {
-#if MAX_UNROLL > 7
-          case 7:
-            gemm_unrolled_iteration<7, Scalar, Packet, DataMapper, Index, accRows, accCols>(res, lhs_base, rhs_base, depth, strideA, offsetA, row, col, pAlpha);
-            break;
-#endif
-#if MAX_UNROLL > 6
-          case 6:
-            gemm_unrolled_iteration<6, Scalar, Packet, DataMapper, Index, accRows, accCols>(res, lhs_base, rhs_base, depth, strideA, offsetA, row, col, pAlpha);
-            break;
-#endif
-#if MAX_UNROLL > 5
-          case 5:
-            gemm_unrolled_iteration<5, Scalar, Packet, DataMapper, Index, accRows, accCols>(res, lhs_base, rhs_base, depth, strideA, offsetA, row, col, pAlpha);
-            break;
-#endif
-#if MAX_UNROLL > 4
-          case 4:
-            gemm_unrolled_iteration<4, Scalar, Packet, DataMapper, Index, accRows, accCols>(res, lhs_base, rhs_base, depth, strideA, offsetA, row, col, pAlpha);
-            break;
-#endif
-#if MAX_UNROLL > 3
-          case 3:
-            gemm_unrolled_iteration<3, Scalar, Packet, DataMapper, Index, accRows, accCols>(res, lhs_base, rhs_base, depth, strideA, offsetA, row, col, pAlpha);
-            break;
-#endif
-#if MAX_UNROLL > 2
-          case 2:
-            gemm_unrolled_iteration<2, Scalar, Packet, DataMapper, Index, accRows, accCols>(res, lhs_base, rhs_base, depth, strideA, offsetA, row, col, pAlpha);
-            break;
-#endif
-#if MAX_UNROLL > 1
-          case 1:
-            gemm_unrolled_iteration<1, Scalar, Packet, DataMapper, Index, accRows, accCols>(res, lhs_base, rhs_base, depth, strideA, offsetA, row, col, pAlpha);
-            break;
-#endif
-          default:
-            break;
-        }
-#undef MAX_UNROLL
-
-        if(remaining_rows > 0)
-        {
-          gemm_extra_row<Scalar, Packet, DataMapper, Index, accRows, accCols>(res, lhs_base, rhs_base, depth, strideA, offsetA, row, col, rows, cols, remaining_rows, pAlpha, pMask);
-        }
-    }
-
-    if(remaining_cols > 0)
-    {
-      const Scalar* rhs_base = blockB + col*strideB + remaining_cols*offsetB;
-      const Scalar* lhs_base = blockA;
-
-      for(; col < cols; col++)
-      {
-        Index row = 0;
-
-        gemm_unrolled_col<Scalar, Packet, DataMapper, Index, accCols>(res, lhs_base, rhs_base, depth, strideA, offsetA, row, rows, col, remaining_cols, pAlpha);
-
-        if (remaining_rows > 0)
-        {
-          gemm_extra_col<Scalar, Packet, DataMapper, Index, accRows>(res, lhs_base, rhs_base, depth, strideA, offsetA, row, col, remaining_rows, remaining_cols, pAlpha);
-        }
-        rhs_base++;
+        gemm_cols<Scalar, Packet, DataMapper, accRows, accCols>(res, blockA, blockB, depth, strideA, offsetA, strideB, offsetB, col, rows, remaining_rows, pAlpha, pMask);
+      }
+
+      if (col != cols)
+      {
+        gemm_extra_cols<Scalar, Packet, DataMapper, accCols>(res, blockA, blockB, depth, strideA, offsetA, strideB, offsetB, col, rows, cols, remaining_rows, pAlpha, pMask);
       }
-    }
 }
 
 #define accColsC (accCols / 2)
@@ -1789,127 +1825,108 @@ EIGEN_STRONG_INLINE void gemm(const DataMapper& res, const Scalar* blockA, const
 
 // PEEL_COMPLEX loop factor.
 #define PEEL_COMPLEX 3
+#define PEEL_COMPLEX_ROW 3
 
-template<typename Scalar, typename Packet, typename Index, const Index accRows, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>
-EIGEN_ALWAYS_INLINE void MICRO_COMPLEX_EXTRA_COL(
-  const Scalar* &lhs_ptr_real, const Scalar* &lhs_ptr_imag,
-  const Scalar* &rhs_ptr_real, const Scalar* &rhs_ptr_imag,
-  PacketBlock<Packet,1> &accReal, PacketBlock<Packet,1> &accImag,
-  Index remaining_rows,
-  Index remaining_cols)
-{
-  Packet rhsV[1], rhsVi[1];
-  rhsV[0] = pset1<Packet>(rhs_ptr_real[0]);
-  if(!RhsIsReal) rhsVi[0] = pset1<Packet>(rhs_ptr_imag[0]);
-  pgerc<1, Scalar, Packet, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(&accReal, &accImag, lhs_ptr_real, lhs_ptr_imag, rhsV, rhsVi);
-  lhs_ptr_real += remaining_rows;
-  if(!LhsIsReal) lhs_ptr_imag += remaining_rows;
+#define MICRO_COMPLEX_UNROLL(func) \
+  func(0) func(1) func(2) func(3)
+
+#define MICRO_COMPLEX_ZERO_PEEL(peel) \
+  if ((PEEL_COMPLEX_ROW > peel) && (peel != 0)) { \
+    bsetzero<Packet, accRows>(accReal##peel); \
+    bsetzero<Packet, accRows>(accImag##peel); \
+  } else { \
+    EIGEN_UNUSED_VARIABLE(accReal##peel); \
+    EIGEN_UNUSED_VARIABLE(accImag##peel); \
+  }
+
+#define MICRO_COMPLEX_ADD_ROWS(N, used) \
+  MICRO_ADD(ptr_real, N) \
+  if (!RhsIsReal) { \
+    MICRO_ADD(ptr_imag, N) \
+  } else if (used) { \
+    EIGEN_UNUSED_VARIABLE(MICRO_RHS(ptr_imag,0)); \
+    EIGEN_UNUSED_VARIABLE(MICRO_RHS(ptr_imag,1)); \
+    EIGEN_UNUSED_VARIABLE(MICRO_RHS(ptr_imag,2)); \
+  }
+
+#define MICRO_COMPLEX_BROADCAST(peel) \
+  MICRO_BROADCAST1(peel, ptr_real, rhsV, false) \
+  if (!RhsIsReal) { \
+    MICRO_BROADCAST1(peel, ptr_imag, rhsVi, false) \
+  } else { \
+    EIGEN_UNUSED_VARIABLE(rhsVi##peel); \
+  }
+
+#define MICRO_COMPLEX_BROADCAST_EXTRA \
+  Packet rhsV[4], rhsVi[4]; \
+  MICRO_BROADCAST_EXTRA1(ptr_real, rhsV, false) \
+  if(!RhsIsReal) { \
+    MICRO_BROADCAST_EXTRA1(ptr_imag, rhsVi, false) \
+  } else { \
+    EIGEN_UNUSED_VARIABLE(rhsVi); \
+  } \
+  MICRO_COMPLEX_ADD_ROWS(1, true)
+
+#define MICRO_COMPLEX_SRC2_PTR \
+  MICRO_SRC2(ptr_real, strideB*advanceCols, 0) \
+  if (!RhsIsReal) { \
+    MICRO_RHS(ptr_imag,0) = rhs_base + MICRO_NEW_ROWS*strideB; \
+    MICRO_SRC2(ptr_imag, strideB*advanceCols, strideB) \
+  } else { \
+    EIGEN_UNUSED_VARIABLE(MICRO_RHS(ptr_imag,0)); \
+    EIGEN_UNUSED_VARIABLE(MICRO_RHS(ptr_imag,1)); \
+    EIGEN_UNUSED_VARIABLE(MICRO_RHS(ptr_imag,2)); \
+  }
+
+#define MICRO_COMPLEX_ZERO_PEEL_ROW MICRO_COMPLEX_UNROLL(MICRO_COMPLEX_ZERO_PEEL)
+
+#define MICRO_COMPLEX_WORK_PEEL(peel) \
+  if (PEEL_COMPLEX_ROW > peel) { \
+    MICRO_COMPLEX_BROADCAST(peel) \
+    pgerc<accRows, Scalar, Packet, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(&accReal##peel, &accImag##peel, lhs_ptr_real + (remaining_rows * peel), lhs_ptr_imag + (remaining_rows * peel), rhsV##peel, rhsVi##peel); \
+  } else { \
+    EIGEN_UNUSED_VARIABLE(rhsV##peel); \
+    EIGEN_UNUSED_VARIABLE(rhsVi##peel); \
+  }
+
+#define MICRO_COMPLEX_ADD_COLS(size) \
+  lhs_ptr_real += (remaining_rows * size); \
+  if(!LhsIsReal) lhs_ptr_imag += (remaining_rows * size); \
   else EIGEN_UNUSED_VARIABLE(lhs_ptr_imag);
-  rhs_ptr_real += remaining_cols;
-  if(!RhsIsReal) rhs_ptr_imag += remaining_cols;
-  else EIGEN_UNUSED_VARIABLE(rhs_ptr_imag);
-}
 
-template<typename Scalar, typename Packet, typename Packetc, typename DataMapper, typename Index, const Index accRows, const Index accCols, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>
-EIGEN_STRONG_INLINE void gemm_complex_extra_col(
-  const DataMapper& res,
-  const Scalar* lhs_base,
-  const Scalar* rhs_base,
-  Index depth,
-  Index strideA,
-  Index offsetA,
-  Index strideB,
-  Index row,
-  Index col,
-  Index remaining_rows,
-  Index remaining_cols,
-  const Packet& pAlphaReal,
-  const Packet& pAlphaImag)
-{
-  const Scalar* rhs_ptr_real = rhs_base;
-  const Scalar* rhs_ptr_imag;
-  if(!RhsIsReal) rhs_ptr_imag = rhs_base + remaining_cols*strideB;
-  else EIGEN_UNUSED_VARIABLE(rhs_ptr_imag);
-  const Scalar* lhs_ptr_real = lhs_base + advanceRows*row*strideA + remaining_rows*offsetA;
-  const Scalar* lhs_ptr_imag;
-  if(!LhsIsReal) lhs_ptr_imag = lhs_ptr_real + remaining_rows*strideA;
-  else EIGEN_UNUSED_VARIABLE(lhs_ptr_imag);
-  PacketBlock<Packet,1> accReal, accImag;
-  PacketBlock<Packet,1> taccReal, taccImag;
-  PacketBlock<Packetc,1> acc0, acc1;
+#define MICRO_COMPLEX_WORK_PEEL_ROW \
+  Packet rhsV0[4], rhsV1[4], rhsV2[4], rhsV3[4]; \
+  Packet rhsVi0[4], rhsVi1[4], rhsVi2[4], rhsVi3[4]; \
+  MICRO_COMPLEX_UNROLL(MICRO_COMPLEX_WORK_PEEL) \
+  MICRO_COMPLEX_ADD_COLS(PEEL_COMPLEX_ROW) \
+  MICRO_COMPLEX_ADD_ROWS(PEEL_COMPLEX_ROW, false)
 
-  bsetzero<Scalar, Packet>(accReal);
-  bsetzero<Scalar, Packet>(accImag);
-
-  Index remaining_depth = (depth & -accRows);
-  Index k = 0;
-  for(; k + PEEL_COMPLEX <= remaining_depth; k+= PEEL_COMPLEX)
-  {
-    EIGEN_POWER_PREFETCH(rhs_ptr_real);
-    if(!RhsIsReal) {
-      EIGEN_POWER_PREFETCH(rhs_ptr_imag);
-    }
-    EIGEN_POWER_PREFETCH(lhs_ptr_real);
-    if(!LhsIsReal) {
-      EIGEN_POWER_PREFETCH(lhs_ptr_imag);
-    }
-    for (int l = 0; l < PEEL_COMPLEX; l++) {
-      MICRO_COMPLEX_EXTRA_COL<Scalar, Packet, Index, accRows, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(lhs_ptr_real, lhs_ptr_imag, rhs_ptr_real, rhs_ptr_imag, accReal, accImag, remaining_rows, remaining_cols);
-    }
-  }
-  for(; k < remaining_depth; k++)
-  {
-    MICRO_COMPLEX_EXTRA_COL<Scalar, Packet, Index, accRows, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(lhs_ptr_real, lhs_ptr_imag, rhs_ptr_real, rhs_ptr_imag, accReal, accImag, remaining_rows, remaining_cols);
+#define MICRO_COMPLEX_ADD_PEEL(peel, sum) \
+  if (PEEL_COMPLEX_ROW > peel) { \
+    for (Index i = 0; i < accRows; i++) { \
+      accReal##sum.packet[i] += accReal##peel.packet[i]; \
+      accImag##sum.packet[i] += accImag##peel.packet[i]; \
+    } \
   }
 
-  for(; k < depth; k++)
-  {
-    Packet rhsV[1], rhsVi[1];
-    rhsV[0] = pset1<Packet>(rhs_ptr_real[0]);
-    if(!RhsIsReal) rhsVi[0] = pset1<Packet>(rhs_ptr_imag[0]);
-    pgerc<1, Scalar, Packet, Index, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(&accReal, &accImag, lhs_ptr_real, lhs_ptr_imag, rhsV, rhsVi, remaining_rows);
-    lhs_ptr_real += remaining_rows;
-    if(!LhsIsReal) lhs_ptr_imag += remaining_rows;
-    rhs_ptr_real += remaining_cols;
-    if(!RhsIsReal) rhs_ptr_imag += remaining_cols;
-  }
+#define MICRO_COMPLEX_ADD_PEEL_ROW \
+  MICRO_COMPLEX_ADD_PEEL(2, 0) MICRO_COMPLEX_ADD_PEEL(3, 1) \
+  MICRO_COMPLEX_ADD_PEEL(1, 0)
 
-  bscalec<Packet,1>(accReal, accImag, pAlphaReal, pAlphaImag, taccReal, taccImag);
-  bcouple_common<Packet, Packetc>(taccReal, taccImag, acc0, acc1);
-
-  if ((sizeof(Scalar) == sizeof(float)) && (remaining_rows == 1))
-  {
-    res(row + 0, col + 0) += pfirst<Packetc>(acc0.packet[0]);
-  } else {
-    acc0.packet[0] += res.template loadPacket<Packetc>(row + 0, col + 0);
-    res.template storePacketBlock<Packetc,1>(row + 0, col + 0, acc0);
-    if(remaining_rows > accColsC) {
-      res(row + accColsC, col + 0) += pfirst<Packetc>(acc1.packet[0]);
-    }
-  }
-}
-
-template<typename Scalar, typename Packet, typename Index, const Index accRows, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>
+template<typename Scalar, typename Packet, const Index accRows, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal, const Index remaining_rows>
 EIGEN_ALWAYS_INLINE void MICRO_COMPLEX_EXTRA_ROW(
   const Scalar* &lhs_ptr_real, const Scalar* &lhs_ptr_imag,
-  const Scalar* &rhs_ptr_real, const Scalar* &rhs_ptr_imag,
-  PacketBlock<Packet,4> &accReal, PacketBlock<Packet,4> &accImag,
-  Index remaining_rows)
+  const Scalar* &rhs_ptr_real0, const Scalar* &rhs_ptr_real1, const Scalar* &rhs_ptr_real2,
+  const Scalar* &rhs_ptr_imag0, const Scalar* &rhs_ptr_imag1, const Scalar* &rhs_ptr_imag2,
+  PacketBlock<Packet,accRows> &accReal, PacketBlock<Packet,accRows> &accImag)
 {
-  Packet rhsV[4], rhsVi[4];
-  pbroadcast4_old<Packet>(rhs_ptr_real, rhsV[0], rhsV[1], rhsV[2], rhsV[3]);
-  if(!RhsIsReal) pbroadcast4_old<Packet>(rhs_ptr_imag, rhsVi[0], rhsVi[1], rhsVi[2], rhsVi[3]);
-  pgerc<4, Scalar, Packet, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(&accReal, &accImag, lhs_ptr_real, lhs_ptr_imag, rhsV, rhsVi);
-  lhs_ptr_real += remaining_rows;
-  if(!LhsIsReal) lhs_ptr_imag += remaining_rows;
-  else EIGEN_UNUSED_VARIABLE(lhs_ptr_imag);
-  rhs_ptr_real += accRows;
-  if(!RhsIsReal) rhs_ptr_imag += accRows;
-  else EIGEN_UNUSED_VARIABLE(rhs_ptr_imag);
+  MICRO_COMPLEX_BROADCAST_EXTRA
+  pgerc<accRows, Scalar, Packet, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(&accReal, &accImag, lhs_ptr_real, lhs_ptr_imag, rhsV, rhsVi);
+  MICRO_COMPLEX_ADD_COLS(1)
 }
 
-template<typename Scalar, typename Packet, typename Packetc, typename DataMapper, typename Index, const Index accRows, const Index accCols, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>
-EIGEN_STRONG_INLINE void gemm_complex_extra_row(
+template<typename Scalar, typename Packet, typename Packetc, typename DataMapper, const Index accRows, const Index accCols, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal, const Index remaining_rows>
+EIGEN_ALWAYS_INLINE void gemm_unrolled_complex_row_iteration(
   const DataMapper& res,
   const Scalar* lhs_base,
   const Scalar* rhs_base,
@@ -1918,150 +1935,113 @@ EIGEN_STRONG_INLINE void gemm_complex_extra_row(
   Index offsetA,
   Index strideB,
   Index row,
-  Index col,
   Index rows,
-  Index cols,
-  Index remaining_rows,
   const Packet& pAlphaReal,
   const Packet& pAlphaImag,
   const Packet& pMask)
 {
-  const Scalar* rhs_ptr_real = rhs_base;
-  const Scalar* rhs_ptr_imag;
-  if(!RhsIsReal) rhs_ptr_imag = rhs_base + accRows*strideB;
-  else EIGEN_UNUSED_VARIABLE(rhs_ptr_imag);
+  const Scalar* rhs_ptr_real0 = rhs_base, * rhs_ptr_real1 = NULL, * rhs_ptr_real2 = NULL;
+  const Scalar* rhs_ptr_imag0 = NULL, * rhs_ptr_imag1 = NULL, * rhs_ptr_imag2 = NULL;
   const Scalar* lhs_ptr_real = lhs_base + advanceRows*row*strideA + remaining_rows*offsetA;
-  const Scalar* lhs_ptr_imag;
+  const Scalar* lhs_ptr_imag = NULL;
   if(!LhsIsReal) lhs_ptr_imag = lhs_ptr_real + remaining_rows*strideA;
   else EIGEN_UNUSED_VARIABLE(lhs_ptr_imag);
-  PacketBlock<Packet,4> accReal, accImag;
-  PacketBlock<Packet,4> taccReal, taccImag;
-  PacketBlock<Packetc,4> acc0, acc1;
-  PacketBlock<Packetc,8> tRes;
+  PacketBlock<Packet,accRows> accReal0, accImag0, accReal1, accImag1, accReal2, accImag2, accReal3, accImag3;
+  PacketBlock<Packet,accRows> taccReal, taccImag;
+  PacketBlock<Packetc,accRows> acc0, acc1;
+  PacketBlock<Packetc,accRows*2> tRes;
 
-  bsetzero<Scalar, Packet>(accReal);
-  bsetzero<Scalar, Packet>(accImag);
+  MICRO_COMPLEX_SRC2_PTR
 
-  Index remaining_depth = (col + accRows < cols) ? depth : (depth & -accRows);
+  bsetzero<Packet, accRows>(accReal0);
+  bsetzero<Packet, accRows>(accImag0);
+
+  Index remaining_depth = depth & -quad_traits<Scalar>::rows;
   Index k = 0;
-  for(; k + PEEL_COMPLEX <= remaining_depth; k+= PEEL_COMPLEX)
-  {
-    EIGEN_POWER_PREFETCH(rhs_ptr_real);
-    if(!RhsIsReal) {
-      EIGEN_POWER_PREFETCH(rhs_ptr_imag);
-    }
-    EIGEN_POWER_PREFETCH(lhs_ptr_real);
-    if(!LhsIsReal) {
-      EIGEN_POWER_PREFETCH(lhs_ptr_imag);
-    }
-    for (int l = 0; l < PEEL_COMPLEX; l++) {
-      MICRO_COMPLEX_EXTRA_ROW<Scalar, Packet, Index, accRows, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(lhs_ptr_real, lhs_ptr_imag, rhs_ptr_real, rhs_ptr_imag, accReal, accImag, remaining_rows);
-    }
-  }
-  for(; k < remaining_depth; k++)
-  {
-    MICRO_COMPLEX_EXTRA_ROW<Scalar, Packet, Index, accRows, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(lhs_ptr_real, lhs_ptr_imag, rhs_ptr_real, rhs_ptr_imag, accReal, accImag, remaining_rows);
-  }
-
-  if ((remaining_depth == depth) && (rows >= accCols))
-  {
-    bload<DataMapper, Packetc, Index, accColsC, 0, ColMajor>(tRes, res, row, col);
-    bscalec<Packet>(accReal, accImag, pAlphaReal, pAlphaImag, taccReal, taccImag, pMask);
-    bcouple<Packet, Packetc>(taccReal, taccImag, tRes, acc0, acc1);
-    res.template storePacketBlock<Packetc,4>(row + 0, col, acc0);
-    res.template storePacketBlock<Packetc,4>(row + accColsC, col, acc1);
-  } else {
-    for(; k < depth; k++)
+  if (remaining_depth >= PEEL_COMPLEX_ROW) {
+    MICRO_COMPLEX_ZERO_PEEL_ROW
+    do
     {
-      Packet rhsV[4], rhsVi[4];
-      pbroadcast4_old<Packet>(rhs_ptr_real, rhsV[0], rhsV[1], rhsV[2], rhsV[3]);
-      if(!RhsIsReal) pbroadcast4_old<Packet>(rhs_ptr_imag, rhsVi[0], rhsVi[1], rhsVi[2], rhsVi[3]);
-      pgerc<4, Scalar, Packet, Index, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(&accReal, &accImag, lhs_ptr_real, lhs_ptr_imag, rhsV, rhsVi, remaining_rows);
-      lhs_ptr_real += remaining_rows;
-      if(!LhsIsReal) lhs_ptr_imag += remaining_rows;
-      rhs_ptr_real += accRows;
-      if(!RhsIsReal) rhs_ptr_imag += accRows;
-    }
+      MICRO_COMPLEX_PREFETCHN(accRows)
+      EIGEN_POWER_PREFETCH(lhs_ptr_real);
+      if(!LhsIsReal) {
+        EIGEN_POWER_PREFETCH(lhs_ptr_imag);
+      }
+      MICRO_COMPLEX_WORK_PEEL_ROW
+    } while ((k += PEEL_COMPLEX_ROW) + PEEL_COMPLEX_ROW <= remaining_depth);
+    MICRO_COMPLEX_ADD_PEEL_ROW
+  }
+  for(; k < depth; k++)
+  {
+    MICRO_COMPLEX_EXTRA_ROW<Scalar, Packet, accRows, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal, remaining_rows>(lhs_ptr_real, lhs_ptr_imag, rhs_ptr_real0, rhs_ptr_real1, rhs_ptr_real2, rhs_ptr_imag0, rhs_ptr_imag1, rhs_ptr_imag2, accReal0, accImag0);
+  }
 
-    bscalec<Packet,4>(accReal, accImag, pAlphaReal, pAlphaImag, taccReal, taccImag);
-    bcouple_common<Packet, Packetc>(taccReal, taccImag, acc0, acc1);
+  constexpr bool full = (remaining_rows > accColsC);
+  bload<DataMapper, Packetc, accColsC, ColMajor, true, accRows, full>(tRes, res, row, 0);
+  if ((accRows == 1) || (rows >= accCols))
+  {
+    bscalec<Packet,accRows,true>(accReal0, accImag0, pAlphaReal, pAlphaImag, taccReal, taccImag, pMask);
+    bcouple<Packet, Packetc, accRows, full>(taccReal, taccImag, tRes, acc0, acc1);
+    bstore<DataMapper, Packetc, accRows>(acc0, res, row + 0);
+    if (full) {
+      bstore<DataMapper, Packetc, accRows>(acc1, res, row + accColsC);
+    }
+  } else {
+    bscalec<Packet,accRows,false>(accReal0, accImag0, pAlphaReal, pAlphaImag, taccReal, taccImag, pMask);
+    bcouple<Packet, Packetc, accRows, full>(taccReal, taccImag, tRes, acc0, acc1);
 
     if ((sizeof(Scalar) == sizeof(float)) && (remaining_rows == 1))
     {
-      for(Index j = 0; j < 4; j++) {
-        res(row + 0, col + j) += pfirst<Packetc>(acc0.packet[j]);
+      for(Index j = 0; j < accRows; j++) {
+        res(row + 0, j) = pfirst<Packetc>(acc0.packet[j]);
       }
     } else {
-      for(Index j = 0; j < 4; j++) {
-        PacketBlock<Packetc,1> acc2;
-        acc2.packet[0] = res.template loadPacket<Packetc>(row + 0, col + j) + acc0.packet[j];
-        res.template storePacketBlock<Packetc,1>(row + 0, col + j, acc2);
-        if(remaining_rows > accColsC) {
-          res(row + accColsC, col + j) += pfirst<Packetc>(acc1.packet[j]);
+      bstore<DataMapper, Packetc, accRows>(acc0, res, row + 0);
+      if (full) {
+        for(Index j = 0; j < accRows; j++) {
+          res(row + accColsC, j) = pfirst<Packetc>(acc1.packet[j]);
         }
       }
     }
   }
 }
 
-#define MICRO_COMPLEX_UNROLL(func) \
-  func(0) func(1) func(2) func(3) func(4)
+#define MICRO_COMPLEX_EXTRA_ROWS(N) \
+  gemm_unrolled_complex_row_iteration<Scalar, Packet, Packetc, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal, N>(res, lhs_base, rhs_base, depth, strideA, offsetA, strideB, row, rows, pAlphaReal, pAlphaImag, pMask);
+
+template<typename Scalar, typename Packet, typename Packetc, typename DataMapper, const Index accRows, const Index accCols, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>
+EIGEN_ALWAYS_INLINE void gemm_complex_extra_row(
+  const DataMapper& res,
+  const Scalar* lhs_base,
+  const Scalar* rhs_base,
+  Index depth,
+  Index strideA,
+  Index offsetA,
+  Index strideB,
+  Index row,
+  Index rows,
+  Index remaining_rows,
+  const Packet& pAlphaReal,
+  const Packet& pAlphaImag,
+  const Packet& pMask)
+{
+  MICRO_EXTRA(MICRO_COMPLEX_EXTRA_ROWS, remaining_rows, false)
+}
 
 #define MICRO_COMPLEX_UNROLL_WORK(func, func2, peel) \
-    MICRO_COMPLEX_UNROLL(func2); \
-    func(0,peel) func(1,peel) func(2,peel) func(3,peel) func(4,peel)
-
-#define MICRO_COMPLEX_LOAD_ONE(iter) \
-  if (unroll_factor > iter) { \
-    lhsV##iter = ploadLhs<Scalar, Packet>(lhs_ptr_real##iter); \
-    lhs_ptr_real##iter += accCols; \
-    if(!LhsIsReal) { \
-      lhsVi##iter = ploadLhs<Scalar, Packet>(lhs_ptr_imag##iter); \
-      lhs_ptr_imag##iter += accCols; \
-    } else { \
-      EIGEN_UNUSED_VARIABLE(lhsVi##iter); \
-    } \
-  } else { \
-    EIGEN_UNUSED_VARIABLE(lhsV##iter); \
-    EIGEN_UNUSED_VARIABLE(lhsVi##iter); \
-  }
+  MICRO_COMPLEX_UNROLL(func2); \
+  func(0,peel) func(1,peel) func(2,peel) func(3,peel)
 
 #define MICRO_COMPLEX_WORK_ONE4(iter, peel) \
   if (unroll_factor > iter) { \
-    pgerc_common<4, Packet, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(&accReal##iter, &accImag##iter, lhsV##iter, lhsVi##iter, rhsV##peel, rhsVi##peel); \
-  }
-
-#define MICRO_COMPLEX_WORK_ONE1(iter, peel) \
-  if (unroll_factor > iter) { \
-    pgerc_common<1, Packet, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(&accReal##iter, &accImag##iter, lhsV##iter, lhsVi##iter, rhsV##peel, rhsVi##peel); \
+    pgerc_common<accRows, Packet, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(&accReal##iter, &accImag##iter, lhsV##iter, lhsVi##iter, rhsV##peel, rhsVi##peel); \
   }
 
 #define MICRO_COMPLEX_TYPE_PEEL4(func, func2, peel) \
   if (PEEL_COMPLEX > peel) { \
-    Packet lhsV0, lhsV1, lhsV2, lhsV3, lhsV4; \
-    Packet lhsVi0, lhsVi1, lhsVi2, lhsVi3, lhsVi4; \
-    pbroadcast4_old<Packet>(rhs_ptr_real + (accRows * peel), rhsV##peel[0], rhsV##peel[1], rhsV##peel[2], rhsV##peel[3]); \
-    if(!RhsIsReal) { \
-      pbroadcast4_old<Packet>(rhs_ptr_imag + (accRows * peel), rhsVi##peel[0], rhsVi##peel[1], rhsVi##peel[2], rhsVi##peel[3]); \
-    } else { \
-      EIGEN_UNUSED_VARIABLE(rhsVi##peel); \
-    } \
-    MICRO_COMPLEX_UNROLL_WORK(func, func2, peel) \
-  } else { \
-    EIGEN_UNUSED_VARIABLE(rhsV##peel); \
-    EIGEN_UNUSED_VARIABLE(rhsVi##peel); \
-  }
-
-#define MICRO_COMPLEX_TYPE_PEEL1(func, func2, peel) \
-  if (PEEL_COMPLEX > peel) { \
-    Packet lhsV0, lhsV1, lhsV2, lhsV3, lhsV4; \
-    Packet lhsVi0, lhsVi1, lhsVi2, lhsVi3, lhsVi4; \
-    rhsV##peel[0] = pset1<Packet>(rhs_ptr_real[remaining_cols * peel]); \
-    if(!RhsIsReal) { \
-      rhsVi##peel[0] = pset1<Packet>(rhs_ptr_imag[remaining_cols * peel]); \
-    } else { \
-      EIGEN_UNUSED_VARIABLE(rhsVi##peel); \
-    } \
+    Packet lhsV0, lhsV1, lhsV2, lhsV3; \
+    Packet lhsVi0, lhsVi1, lhsVi2, lhsVi3; \
+    MICRO_COMPLEX_BROADCAST(peel) \
     MICRO_COMPLEX_UNROLL_WORK(func, func2, peel) \
   } else { \
     EIGEN_UNUSED_VARIABLE(rhsV##peel); \
@@ -2069,42 +2049,27 @@ EIGEN_STRONG_INLINE void gemm_complex_extra_row(
   }
 
 #define MICRO_COMPLEX_UNROLL_TYPE_PEEL(M, func, func1, func2) \
-  Packet rhsV0[M], rhsV1[M], rhsV2[M], rhsV3[M], rhsV4[M], rhsV5[M], rhsV6[M], rhsV7[M], rhsV8[M], rhsV9[M]; \
-  Packet rhsVi0[M], rhsVi1[M], rhsVi2[M], rhsVi3[M], rhsVi4[M], rhsVi5[M], rhsVi6[M], rhsVi7[M], rhsVi8[M], rhsVi9[M]; \
-  func(func1,func2,0); func(func1,func2,1); \
-  func(func1,func2,2); func(func1,func2,3); \
-  func(func1,func2,4); func(func1,func2,5); \
-  func(func1,func2,6); func(func1,func2,7); \
-  func(func1,func2,8); func(func1,func2,9);
+  Packet rhsV0[M], rhsV1[M], rhsV2[M], rhsV3[M]; \
+  Packet rhsVi0[M], rhsVi1[M], rhsVi2[M], rhsVi3[M]; \
+  func(func1,func2,0) func(func1,func2,1) \
+  func(func1,func2,2) func(func1,func2,3)
 
 #define MICRO_COMPLEX_UNROLL_TYPE_ONE(M, func, func1, func2) \
   Packet rhsV0[M], rhsVi0[M];\
-  func(func1,func2,0);
+  func(func1,func2,0)
 
-#define MICRO_COMPLEX_ONE_PEEL4 \
-  MICRO_COMPLEX_UNROLL_TYPE_PEEL(4, MICRO_COMPLEX_TYPE_PEEL4, MICRO_COMPLEX_WORK_ONE4, MICRO_COMPLEX_LOAD_ONE); \
-  rhs_ptr_real += (accRows * PEEL_COMPLEX); \
-  if(!RhsIsReal) rhs_ptr_imag += (accRows * PEEL_COMPLEX);
+#define MICRO_COMPLEX_UNROLL_TYPE(MICRO_COMPLEX_TYPE, size) \
+  MICRO_COMPLEX_TYPE(4, MICRO_COMPLEX_TYPE_PEEL4, MICRO_COMPLEX_WORK_ONE4, MICRO_COMPLEX_LOAD_ONE) \
+  MICRO_COMPLEX_ADD_ROWS(size, false)
 
-#define MICRO_COMPLEX_ONE4 \
-  MICRO_COMPLEX_UNROLL_TYPE_ONE(4, MICRO_COMPLEX_TYPE_PEEL4, MICRO_COMPLEX_WORK_ONE4, MICRO_COMPLEX_LOAD_ONE); \
-  rhs_ptr_real += accRows; \
-  if(!RhsIsReal) rhs_ptr_imag += accRows;
+#define MICRO_COMPLEX_ONE_PEEL4 MICRO_COMPLEX_UNROLL_TYPE(MICRO_COMPLEX_UNROLL_TYPE_PEEL, PEEL_COMPLEX)
 
-#define MICRO_COMPLEX_ONE_PEEL1 \
-  MICRO_COMPLEX_UNROLL_TYPE_PEEL(1, MICRO_COMPLEX_TYPE_PEEL1, MICRO_COMPLEX_WORK_ONE1, MICRO_COMPLEX_LOAD_ONE); \
-  rhs_ptr_real += (remaining_cols * PEEL_COMPLEX); \
-  if(!RhsIsReal) rhs_ptr_imag += (remaining_cols * PEEL_COMPLEX);
-
-#define MICRO_COMPLEX_ONE1 \
-  MICRO_COMPLEX_UNROLL_TYPE_ONE(1, MICRO_COMPLEX_TYPE_PEEL1, MICRO_COMPLEX_WORK_ONE1, MICRO_COMPLEX_LOAD_ONE); \
-  rhs_ptr_real += remaining_cols; \
-  if(!RhsIsReal) rhs_ptr_imag += remaining_cols;
+#define MICRO_COMPLEX_ONE4 MICRO_COMPLEX_UNROLL_TYPE(MICRO_COMPLEX_UNROLL_TYPE_ONE, 1)
 
 #define MICRO_COMPLEX_DST_PTR_ONE(iter) \
   if (unroll_factor > iter) { \
-    bsetzero<Scalar, Packet>(accReal##iter); \
-    bsetzero<Scalar, Packet>(accImag##iter); \
+    bsetzero<Packet, accRows>(accReal##iter); \
+    bsetzero<Packet, accRows>(accImag##iter); \
   } else { \
     EIGEN_UNUSED_VARIABLE(accReal##iter); \
     EIGEN_UNUSED_VARIABLE(accImag##iter); \
@@ -2112,55 +2077,26 @@ EIGEN_STRONG_INLINE void gemm_complex_extra_row(
 
 #define MICRO_COMPLEX_DST_PTR MICRO_COMPLEX_UNROLL(MICRO_COMPLEX_DST_PTR_ONE)
 
-#define MICRO_COMPLEX_SRC_PTR_ONE(iter) \
-  if (unroll_factor > iter) { \
-    lhs_ptr_real##iter = lhs_base + ( ((advanceRows*row)/accCols) + iter*advanceRows )*strideA*accCols + accCols*offsetA; \
-    if(!LhsIsReal) { \
-      lhs_ptr_imag##iter = lhs_ptr_real##iter + accCols*strideA; \
-    } else { \
-      EIGEN_UNUSED_VARIABLE(lhs_ptr_imag##iter); \
-    } \
-  } else { \
-    EIGEN_UNUSED_VARIABLE(lhs_ptr_real##iter); \
-    EIGEN_UNUSED_VARIABLE(lhs_ptr_imag##iter); \
-  }
-
 #define MICRO_COMPLEX_SRC_PTR MICRO_COMPLEX_UNROLL(MICRO_COMPLEX_SRC_PTR_ONE)
 
-#define MICRO_COMPLEX_PREFETCH_ONE(iter) \
-  if (unroll_factor > iter) { \
-    EIGEN_POWER_PREFETCH(lhs_ptr_real##iter); \
-    if(!LhsIsReal) { \
-      EIGEN_POWER_PREFETCH(lhs_ptr_imag##iter); \
-    } \
-  }
-
 #define MICRO_COMPLEX_PREFETCH MICRO_COMPLEX_UNROLL(MICRO_COMPLEX_PREFETCH_ONE)
 
 #define MICRO_COMPLEX_STORE_ONE(iter) \
   if (unroll_factor > iter) { \
-    bload<DataMapper, Packetc, Index, accColsC, 0, ColMajor>(tRes, res, row + iter*accCols, col); \
-    bscalec<Packet,4>(accReal##iter, accImag##iter, pAlphaReal, pAlphaImag, taccReal, taccImag); \
-    bcouple<Packet, Packetc>(taccReal, taccImag, tRes, acc0, acc1); \
-    res.template storePacketBlock<Packetc,4>(row + iter*accCols + 0, col, acc0); \
-    res.template storePacketBlock<Packetc,4>(row + iter*accCols + accColsC, col, acc1); \
+    constexpr bool full = ((MICRO_NORMAL(iter)) || (accCols2 > accColsC)); \
+    bload<DataMapper, Packetc, accColsC, ColMajor, true, accRows, full>(tRes, res, row + iter*accCols, 0); \
+    bscalec<Packet,accRows,!(MICRO_NORMAL(iter))>(accReal##iter, accImag##iter, pAlphaReal, pAlphaImag, taccReal, taccImag, pMask); \
+    bcouple<Packet, Packetc, accRows, full>(taccReal, taccImag, tRes, acc0, acc1); \
+    bstore<DataMapper, Packetc, accRows>(acc0, res, row + iter*accCols + 0); \
+    if (full) { \
+      bstore<DataMapper, Packetc, accRows>(acc1, res, row + iter*accCols + accColsC); \
+    } \
   }
 
 #define MICRO_COMPLEX_STORE MICRO_COMPLEX_UNROLL(MICRO_COMPLEX_STORE_ONE)
 
-#define MICRO_COMPLEX_COL_STORE_ONE(iter) \
-  if (unroll_factor > iter) { \
-    bload<DataMapper, Packetc, Index, accColsC, 0, ColMajor>(tRes, res, row + iter*accCols, col); \
-    bscalec<Packet,1>(accReal##iter, accImag##iter, pAlphaReal, pAlphaImag, taccReal, taccImag); \
-    bcouple<Packet, Packetc>(taccReal, taccImag, tRes, acc0, acc1); \
-    res.template storePacketBlock<Packetc,1>(row + iter*accCols + 0, col, acc0); \
-    res.template storePacketBlock<Packetc,1>(row + iter*accCols + accColsC, col, acc1); \
-  }
-
-#define MICRO_COMPLEX_COL_STORE MICRO_COMPLEX_UNROLL(MICRO_COMPLEX_COL_STORE_ONE)
-
-template<int unroll_factor, typename Scalar, typename Packet, typename Packetc, typename DataMapper, typename Index, const Index accRows, const Index accCols, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>
-EIGEN_STRONG_INLINE void gemm_complex_unrolled_iteration(
+template<int unroll_factor, typename Scalar, typename Packet, typename Packetc, typename DataMapper, const Index accRows, const Index accCols, const Index accCols2, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>
+EIGEN_ALWAYS_INLINE void gemm_complex_unrolled_iteration(
   const DataMapper& res,
   const Scalar* lhs_base,
   const Scalar* rhs_base,
@@ -2169,37 +2105,30 @@ EIGEN_STRONG_INLINE void gemm_complex_unrolled_iteration(
   Index offsetA,
   Index strideB,
   Index& row,
-  Index col,
   const Packet& pAlphaReal,
-  const Packet& pAlphaImag)
+  const Packet& pAlphaImag,
+  const Packet& pMask)
 {
-  const Scalar* rhs_ptr_real = rhs_base;
-  const Scalar* rhs_ptr_imag;
-  if(!RhsIsReal) {
-    rhs_ptr_imag = rhs_base + accRows*strideB;
-  } else {
-    EIGEN_UNUSED_VARIABLE(rhs_ptr_imag);
-  }
-  const Scalar* lhs_ptr_real0 = NULL, * lhs_ptr_imag0 = NULL, * lhs_ptr_real1 = NULL, * lhs_ptr_imag1 = NULL;
-  const Scalar* lhs_ptr_real2 = NULL, * lhs_ptr_imag2 = NULL, * lhs_ptr_real3 = NULL, * lhs_ptr_imag3 = NULL;
-  const Scalar* lhs_ptr_real4 = NULL, * lhs_ptr_imag4 = NULL;
-  PacketBlock<Packet,4> accReal0, accImag0, accReal1, accImag1;
-  PacketBlock<Packet,4> accReal2, accImag2, accReal3, accImag3;
-  PacketBlock<Packet,4> accReal4, accImag4;
-  PacketBlock<Packet,4> taccReal, taccImag;
-  PacketBlock<Packetc,4> acc0, acc1;
-  PacketBlock<Packetc,8> tRes;
+  const Scalar* rhs_ptr_real0 = rhs_base, * rhs_ptr_real1 = NULL, * rhs_ptr_real2 = NULL;
+  const Scalar* rhs_ptr_imag0 = NULL, * rhs_ptr_imag1 = NULL, * rhs_ptr_imag2 = NULL;
+  const Index imag_delta = accCols*strideA;
+  const Index imag_delta2 = accCols2*strideA;
+  const Scalar* lhs_ptr_real0 = NULL, * lhs_ptr_real1 = NULL;
+  const Scalar* lhs_ptr_real2 = NULL, * lhs_ptr_real3 = NULL;
+  PacketBlock<Packet,accRows> accReal0, accImag0, accReal1, accImag1;
+  PacketBlock<Packet,accRows> accReal2, accImag2, accReal3, accImag3;
+  PacketBlock<Packet,accRows> taccReal, taccImag;
+  PacketBlock<Packetc,accRows> acc0, acc1;
+  PacketBlock<Packetc,accRows*2> tRes;
 
+  MICRO_COMPLEX_SRC2_PTR
   MICRO_COMPLEX_SRC_PTR
   MICRO_COMPLEX_DST_PTR
 
   Index k = 0;
   for(; k + PEEL_COMPLEX <= depth; k+= PEEL_COMPLEX)
   {
-    EIGEN_POWER_PREFETCH(rhs_ptr_real);
-    if(!RhsIsReal) {
-      EIGEN_POWER_PREFETCH(rhs_ptr_imag);
-    }
+    MICRO_COMPLEX_PREFETCHN(accRows)
     MICRO_COMPLEX_PREFETCH
     MICRO_COMPLEX_ONE_PEEL4
   }
@@ -2209,122 +2138,107 @@ EIGEN_STRONG_INLINE void gemm_complex_unrolled_iteration(
   }
   MICRO_COMPLEX_STORE
 
-  row += unroll_factor*accCols;
+  MICRO_COMPLEX_UPDATE
 }
 
-template<int unroll_factor, typename Scalar, typename Packet, typename Packetc, typename DataMapper, typename Index, const Index accCols, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>
-EIGEN_STRONG_INLINE void gemm_complex_unrolled_col_iteration(
+#define MICRO_COMPLEX_UNROLL_ITER2(N, M) \
+  gemm_complex_unrolled_iteration<N + (M ? 1 : 0), Scalar, Packet, Packetc, DataMapper, accRows, accCols, M ? M : accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res3, lhs_base, rhs_base, depth, strideA, offsetA, strideB, row, pAlphaReal, pAlphaImag, pMask); \
+  if (M) return;
+
+template<typename Scalar, typename Packet, typename Packetc, typename DataMapper, const Index accRows, const Index accCols, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>
+EIGEN_ALWAYS_INLINE void gemm_complex_cols(
   const DataMapper& res,
-  const Scalar* lhs_base,
-  const Scalar* rhs_base,
+  const Scalar* blockA,
+  const Scalar* blockB,
   Index depth,
   Index strideA,
   Index offsetA,
   Index strideB,
-  Index& row,
+  Index offsetB,
   Index col,
-  Index remaining_cols,
-  const Packet& pAlphaReal,
-  const Packet& pAlphaImag)
-{
-  const Scalar* rhs_ptr_real = rhs_base;
-  const Scalar* rhs_ptr_imag;
-  if(!RhsIsReal) {
-    rhs_ptr_imag = rhs_base + remaining_cols*strideB;
-  } else {
-    EIGEN_UNUSED_VARIABLE(rhs_ptr_imag);
-  }
-  const Scalar* lhs_ptr_real0 = NULL, * lhs_ptr_imag0 = NULL, * lhs_ptr_real1 = NULL, * lhs_ptr_imag1 = NULL;
-  const Scalar* lhs_ptr_real2 = NULL, * lhs_ptr_imag2 = NULL, * lhs_ptr_real3 = NULL, * lhs_ptr_imag3 = NULL;
-  const Scalar* lhs_ptr_real4 = NULL, * lhs_ptr_imag4 = NULL;
-  PacketBlock<Packet,1> accReal0, accImag0, accReal1, accImag1;
-  PacketBlock<Packet,1> accReal2, accImag2, accReal3, accImag3;
-  PacketBlock<Packet,1> accReal4, accImag4;
-  PacketBlock<Packet,1> taccReal, taccImag;
-  PacketBlock<Packetc,1> acc0, acc1;
-  PacketBlock<Packetc,2> tRes;
-
-  MICRO_COMPLEX_SRC_PTR
-  MICRO_COMPLEX_DST_PTR
-
-  Index k = 0;
-  for(; k + PEEL_COMPLEX <= depth; k+= PEEL_COMPLEX)
-  {
-    EIGEN_POWER_PREFETCH(rhs_ptr_real);
-    if(!RhsIsReal) {
-      EIGEN_POWER_PREFETCH(rhs_ptr_imag);
-    }
-    MICRO_COMPLEX_PREFETCH
-    MICRO_COMPLEX_ONE_PEEL1
-  }
-  for(; k < depth; k++)
-  {
-    MICRO_COMPLEX_ONE1
-  }
-  MICRO_COMPLEX_COL_STORE
-
-  row += unroll_factor*accCols;
-}
-
-template<typename Scalar, typename Packet, typename Packetc, typename DataMapper, typename Index, const Index accCols, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>
-EIGEN_STRONG_INLINE void gemm_complex_unrolled_col(
-  const DataMapper& res,
-  const Scalar* lhs_base,
-  const Scalar* rhs_base,
-  Index depth,
-  Index strideA,
-  Index offsetA,
-  Index strideB,
-  Index& row,
   Index rows,
-  Index col,
-  Index remaining_cols,
+  Index remaining_rows,
   const Packet& pAlphaReal,
-  const Packet& pAlphaImag)
+  const Packet& pAlphaImag,
+  const Packet& pMask)
 {
-#define MAX_COMPLEX_UNROLL 3
+  const DataMapper res3 = res.getSubMapper(0, col);
+
+  const Scalar* rhs_base = blockB + advanceCols*col*strideB + MICRO_NEW_ROWS*offsetB;
+  const Scalar* lhs_base = blockA + accCols*offsetA;
+  Index row = 0;
+
+#define MAX_COMPLEX_UNROLL 4
   while(row + MAX_COMPLEX_UNROLL*accCols <= rows) {
-    gemm_complex_unrolled_col_iteration<MAX_COMPLEX_UNROLL, Scalar, Packet, Packetc, DataMapper, Index, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res, lhs_base, rhs_base, depth, strideA, offsetA, strideB, row, col, remaining_cols, pAlphaReal, pAlphaImag);
+    MICRO_COMPLEX_UNROLL_ITER2(MAX_COMPLEX_UNROLL, 0);
   }
   switch( (rows-row)/accCols ) {
 #if MAX_COMPLEX_UNROLL > 4
-   case 4:
-     gemm_complex_unrolled_col_iteration<4, Scalar, Packet, Packetc, DataMapper, Index, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res, lhs_base, rhs_base, depth, strideA, offsetA, strideB, row, col, remaining_cols, pAlphaReal, pAlphaImag);
-     break;
+    case 4:
+      MICRO_COMPLEX_UNROLL_ITER(MICRO_COMPLEX_UNROLL_ITER2, 4)
+      break;
 #endif
 #if MAX_COMPLEX_UNROLL > 3
-   case 3:
-     gemm_complex_unrolled_col_iteration<3, Scalar, Packet, Packetc, DataMapper, Index, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res, lhs_base, rhs_base, depth, strideA, offsetA, strideB, row, col, remaining_cols, pAlphaReal, pAlphaImag);
-     break;
+    case 3:
+      MICRO_COMPLEX_UNROLL_ITER(MICRO_COMPLEX_UNROLL_ITER2, 3)
+      break;
 #endif
 #if MAX_COMPLEX_UNROLL > 2
-   case 2:
-     gemm_complex_unrolled_col_iteration<2, Scalar, Packet, Packetc, DataMapper, Index, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res, lhs_base, rhs_base, depth, strideA, offsetA, strideB, row, col, remaining_cols, pAlphaReal, pAlphaImag);
-     break;
+    case 2:
+      MICRO_COMPLEX_UNROLL_ITER(MICRO_COMPLEX_UNROLL_ITER2, 2)
+      break;
 #endif
 #if MAX_COMPLEX_UNROLL > 1
-   case 1:
-     gemm_complex_unrolled_col_iteration<1, Scalar, Packet, Packetc, DataMapper, Index, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res, lhs_base, rhs_base, depth, strideA, offsetA, strideB, row, col, remaining_cols, pAlphaReal, pAlphaImag);
-     break;
+    case 1:
+      MICRO_COMPLEX_UNROLL_ITER(MICRO_COMPLEX_UNROLL_ITER2, 1)
+      break;
 #endif
-   default:
-     break;
+    default:
+      break;
   }
 #undef MAX_COMPLEX_UNROLL
+
+  if(remaining_rows > 0)
+  {
+    gemm_complex_extra_row<Scalar, Packet, Packetc, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res3, blockA, rhs_base, depth, strideA, offsetA, strideB, row, rows, remaining_rows, pAlphaReal, pAlphaImag, pMask);
+  }
 }
 
-template<typename LhsScalar, typename RhsScalar, typename Scalarc, typename Scalar, typename Index, typename Packet, typename Packetc, typename RhsPacket, typename DataMapper, const Index accRows, const Index accCols, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>
+#define MICRO_COMPLEX_EXTRA_COLS(N) \
+  gemm_complex_cols<Scalar, Packet, Packetc, DataMapper, N, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res, blockA, blockB, depth, strideA, offsetA, strideB, offsetB, col, rows, remaining_rows, pAlphaReal, pAlphaImag, pMask);
+
+template<typename Scalar, typename Packet, typename Packetc, typename DataMapper, const Index accCols, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>
+EIGEN_STRONG_INLINE void gemm_complex_extra_cols(
+  const DataMapper& res,
+  const Scalar* blockA,
+  const Scalar* blockB,
+  Index depth,
+  Index strideA,
+  Index offsetA,
+  Index strideB,
+  Index offsetB,
+  Index col,
+  Index rows,
+  Index cols,
+  Index remaining_rows,
+  const Packet& pAlphaReal,
+  const Packet& pAlphaImag,
+  const Packet& pMask)
+{
+  MICRO_EXTRA(MICRO_COMPLEX_EXTRA_COLS, cols-col, true)
+}
+
+template<typename LhsScalar, typename RhsScalar, typename Scalarc, typename Scalar, typename Packet, typename Packetc, typename RhsPacket, typename DataMapper, const Index accRows, const Index accCols, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>
 EIGEN_STRONG_INLINE void gemm_complex(const DataMapper& res, const LhsScalar* blockAc, const RhsScalar* blockBc, Index rows, Index depth, Index cols, Scalarc alpha, Index strideA, Index strideB, Index offsetA, Index offsetB)
 {
       const Index remaining_rows = rows % accCols;
-      const Index remaining_cols = cols % accRows;
 
       if( strideA == -1 ) strideA = depth;
       if( strideB == -1 ) strideB = depth;
 
       const Packet pAlphaReal = pset1<Packet>(alpha.real());
       const Packet pAlphaImag = pset1<Packet>(alpha.imag());
-      const Packet pMask = bmask<Packet>((const int)(remaining_rows));
+      const Packet pMask = bmask<Packet>(remaining_rows);
 
       const Scalar* blockA = (Scalar *) blockAc;
       const Scalar* blockB = (Scalar *) blockBc;
@@ -2332,63 +2246,12 @@ EIGEN_STRONG_INLINE void gemm_complex(const DataMapper& res, const LhsScalar* bl
       Index col = 0;
       for(; col + accRows <= cols; col += accRows)
       {
-        const Scalar* rhs_base = blockB + advanceCols*col*strideB + accRows*offsetB;
-        const Scalar* lhs_base = blockA;
-        Index row = 0;
-
-#define MAX_COMPLEX_UNROLL 3
-        while(row + MAX_COMPLEX_UNROLL*accCols <= rows) {
-          gemm_complex_unrolled_iteration<MAX_COMPLEX_UNROLL, Scalar, Packet, Packetc, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res, lhs_base, rhs_base, depth, strideA, offsetA, strideB, row, col, pAlphaReal, pAlphaImag);
-        }
-        switch( (rows-row)/accCols ) {
-#if MAX_COMPLEX_UNROLL > 4
-          case 4:
-            gemm_complex_unrolled_iteration<4, Scalar, Packet, Packetc, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res, lhs_base, rhs_base, depth, strideA, offsetA, strideB, row, col, pAlphaReal, pAlphaImag);
-            break;
-#endif
-#if MAX_COMPLEX_UNROLL > 3
-          case 3:
-            gemm_complex_unrolled_iteration<3, Scalar, Packet, Packetc, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res, lhs_base, rhs_base, depth, strideA, offsetA, strideB, row, col, pAlphaReal, pAlphaImag);
-            break;
-#endif
-#if MAX_COMPLEX_UNROLL > 2
-          case 2:
-            gemm_complex_unrolled_iteration<2, Scalar, Packet, Packetc, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res, lhs_base, rhs_base, depth, strideA, offsetA, strideB, row, col, pAlphaReal, pAlphaImag);
-            break;
-#endif
-#if MAX_COMPLEX_UNROLL > 1
-          case 1:
-            gemm_complex_unrolled_iteration<1, Scalar, Packet, Packetc, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res, lhs_base, rhs_base, depth, strideA, offsetA, strideB, row, col, pAlphaReal, pAlphaImag);
-            break;
-#endif
-          default:
-            break;
-        }
-#undef MAX_COMPLEX_UNROLL
-
-        if(remaining_rows > 0)
-        {
-          gemm_complex_extra_row<Scalar, Packet, Packetc, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res, lhs_base, rhs_base, depth, strideA, offsetA, strideB, row, col, rows, cols, remaining_rows, pAlphaReal, pAlphaImag, pMask);
-        }
+        gemm_complex_cols<Scalar, Packet, Packetc, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res, blockA, blockB, depth, strideA, offsetA, strideB, offsetB, col, rows, remaining_rows, pAlphaReal, pAlphaImag, pMask);
       }
 
-      if(remaining_cols > 0)
+      if (col != cols)
       {
-        const Scalar* rhs_base = blockB + advanceCols*col*strideB + remaining_cols*offsetB;
-        const Scalar* lhs_base = blockA;
-
-        for(; col < cols; col++)
-        {
-          Index row = 0;
-
-          gemm_complex_unrolled_col<Scalar, Packet, Packetc, DataMapper, Index, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res, lhs_base, rhs_base, depth, strideA, offsetA, strideB, row, rows, col, remaining_cols, pAlphaReal, pAlphaImag);
-
-          if (remaining_rows > 0)
-          {
-            gemm_complex_extra_col<Scalar, Packet, Packetc, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res, lhs_base, rhs_base, depth, strideA, offsetA, strideB, row, col, remaining_rows, remaining_cols, pAlphaReal, pAlphaImag);
-          }
-          rhs_base++;
-        }
+        gemm_complex_extra_cols<Scalar, Packet, Packetc, DataMapper, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res, blockA, blockB, depth, strideA, offsetA, strideB, offsetB, col, rows, cols, remaining_rows, pAlphaReal, pAlphaImag, pMask);
       }
 }
 
@@ -2396,6 +2259,8 @@ EIGEN_STRONG_INLINE void gemm_complex(const DataMapper& res, const LhsScalar* bl
 #undef advanceCols
 #undef advanceRows
 
+#include "MatrixVectorProduct.h"
+
 /************************************
  * ppc64le template specializations *
  * **********************************/
@@ -2409,7 +2274,7 @@ template<typename Index, typename DataMapper, int Pack1, int Pack2, typename Pac
 void gemm_pack_lhs<double, Index, DataMapper, Pack1, Pack2, Packet, ColMajor, Conjugate, PanelMode>
   ::operator()(double* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride, Index offset)
 {
-    dhs_pack<double, Index, DataMapper, Packet2d, ColMajor, PanelMode, true> pack;
+    dhs_pack<double, DataMapper, Packet2d, ColMajor, PanelMode, true> pack;
     pack(blockA, lhs, depth, rows, stride, offset);
 }
 
@@ -2423,7 +2288,7 @@ template<typename Index, typename DataMapper, int Pack1, int Pack2, typename Pac
 void gemm_pack_lhs<double, Index, DataMapper, Pack1, Pack2, Packet, RowMajor, Conjugate, PanelMode>
   ::operator()(double* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride, Index offset)
 {
-    dhs_pack<double, Index, DataMapper, Packet2d, RowMajor, PanelMode, true> pack;
+    dhs_pack<double, DataMapper, Packet2d, RowMajor, PanelMode, true> pack;
     pack(blockA, lhs, depth, rows, stride, offset);
 }
 
@@ -2438,7 +2303,7 @@ template<typename Index, typename DataMapper, int nr, bool Conjugate, bool Panel
 void gemm_pack_rhs<double, Index, DataMapper, nr, ColMajor, Conjugate, PanelMode>
   ::operator()(double* blockB, const DataMapper& rhs, Index depth, Index cols, Index stride, Index offset)
 {
-  dhs_pack<double, Index, DataMapper, Packet2d, ColMajor, PanelMode, false> pack;
+  dhs_pack<double, DataMapper, Packet2d, ColMajor, PanelMode, false> pack;
   pack(blockB, rhs, depth, cols, stride, offset);
 }
 
@@ -2452,7 +2317,7 @@ template<typename Index, typename DataMapper, int nr, bool Conjugate, bool Panel
 void gemm_pack_rhs<double, Index, DataMapper, nr, RowMajor, Conjugate, PanelMode>
   ::operator()(double* blockB, const DataMapper& rhs, Index depth, Index cols, Index stride, Index offset)
 {
-  dhs_pack<double, Index, DataMapper, Packet2d, RowMajor, PanelMode, false> pack;
+  dhs_pack<double, DataMapper, Packet2d, RowMajor, PanelMode, false> pack;
   pack(blockB, rhs, depth, cols, stride, offset);
 }
 #endif
@@ -2467,7 +2332,7 @@ template<typename Index, typename DataMapper, int Pack1, int Pack2, typename Pac
 void gemm_pack_lhs<float, Index, DataMapper, Pack1, Pack2, Packet, RowMajor, Conjugate, PanelMode>
   ::operator()(float* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride, Index offset)
 {
-  dhs_pack<float, Index, DataMapper, Packet4f, RowMajor, PanelMode, true> pack;
+  dhs_pack<float, DataMapper, Packet4f, RowMajor, PanelMode, true> pack;
   pack(blockA, lhs, depth, rows, stride, offset);
 }
 
@@ -2481,7 +2346,7 @@ template<typename Index, typename DataMapper, int Pack1, int Pack2, typename Pac
 void gemm_pack_lhs<float, Index, DataMapper, Pack1, Pack2, Packet, ColMajor, Conjugate, PanelMode>
   ::operator()(float* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride, Index offset)
 {
-  dhs_pack<float, Index, DataMapper, Packet4f, ColMajor, PanelMode, true> pack;
+  dhs_pack<float, DataMapper, Packet4f, ColMajor, PanelMode, true> pack;
   pack(blockA, lhs, depth, rows, stride, offset);
 }
 
@@ -2495,7 +2360,7 @@ template<typename Index, typename DataMapper, int Pack1, int Pack2, typename Pac
 void gemm_pack_lhs<std::complex<float>, Index, DataMapper, Pack1, Pack2, Packet, RowMajor, Conjugate, PanelMode>
   ::operator()(std::complex<float>* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride, Index offset)
 {
-  dhs_cpack<float, Index, DataMapper, Packet4f, Packet2cf, RowMajor, Conjugate, PanelMode, true> pack;
+  dhs_cpack<float, DataMapper, Packet4f, Packet2cf, RowMajor, Conjugate, PanelMode, true> pack;
   pack(blockA, lhs, depth, rows, stride, offset);
 }
 
@@ -2509,7 +2374,7 @@ template<typename Index, typename DataMapper, int Pack1, int Pack2, typename Pac
 void gemm_pack_lhs<std::complex<float>, Index, DataMapper, Pack1, Pack2, Packet, ColMajor, Conjugate, PanelMode>
   ::operator()(std::complex<float>* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride, Index offset)
 {
-  dhs_cpack<float, Index, DataMapper, Packet4f, Packet2cf, ColMajor, Conjugate, PanelMode, true> pack;
+  dhs_cpack<float, DataMapper, Packet4f, Packet2cf, ColMajor, Conjugate, PanelMode, true> pack;
   pack(blockA, lhs, depth, rows, stride, offset);
 }
 
@@ -2524,7 +2389,7 @@ template<typename Index, typename DataMapper, int nr, bool Conjugate, bool Panel
 void gemm_pack_rhs<float, Index, DataMapper, nr, ColMajor, Conjugate, PanelMode>
   ::operator()(float* blockB, const DataMapper& rhs, Index depth, Index cols, Index stride, Index offset)
 {
-  dhs_pack<float, Index, DataMapper, Packet4f, ColMajor, PanelMode, false> pack;
+  dhs_pack<float, DataMapper, Packet4f, ColMajor, PanelMode, false> pack;
   pack(blockB, rhs, depth, cols, stride, offset);
 }
 
@@ -2538,7 +2403,7 @@ template<typename Index, typename DataMapper, int nr, bool Conjugate, bool Panel
 void gemm_pack_rhs<float, Index, DataMapper, nr, RowMajor, Conjugate, PanelMode>
   ::operator()(float* blockB, const DataMapper& rhs, Index depth, Index cols, Index stride, Index offset)
 {
-  dhs_pack<float, Index, DataMapper, Packet4f, RowMajor, PanelMode, false> pack;
+  dhs_pack<float, DataMapper, Packet4f, RowMajor, PanelMode, false> pack;
   pack(blockB, rhs, depth, cols, stride, offset);
 }
 #endif
@@ -2553,7 +2418,7 @@ template<typename Index, typename DataMapper, int nr, bool Conjugate, bool Panel
 void gemm_pack_rhs<std::complex<float>, Index, DataMapper, nr, ColMajor, Conjugate, PanelMode>
   ::operator()(std::complex<float>* blockB, const DataMapper& rhs, Index depth, Index cols, Index stride, Index offset)
 {
-  dhs_cpack<float, Index, DataMapper, Packet4f, Packet2cf, ColMajor, Conjugate, PanelMode, false> pack;
+  dhs_cpack<float, DataMapper, Packet4f, Packet2cf, ColMajor, Conjugate, PanelMode, false> pack;
   pack(blockB, rhs, depth, cols, stride, offset);
 }
 
@@ -2567,7 +2432,7 @@ template<typename Index, typename DataMapper, int nr, bool Conjugate, bool Panel
 void gemm_pack_rhs<std::complex<float>, Index, DataMapper, nr, RowMajor, Conjugate, PanelMode>
   ::operator()(std::complex<float>* blockB, const DataMapper& rhs, Index depth, Index cols, Index stride, Index offset)
 {
-  dhs_cpack<float, Index, DataMapper, Packet4f, Packet2cf, RowMajor, Conjugate, PanelMode, false> pack;
+  dhs_cpack<float, DataMapper, Packet4f, Packet2cf, RowMajor, Conjugate, PanelMode, false> pack;
   pack(blockB, rhs, depth, cols, stride, offset);
 }
 
@@ -2581,7 +2446,7 @@ template<typename Index, typename DataMapper, int Pack1, int Pack2, typename Pac
 void gemm_pack_lhs<std::complex<double>, Index, DataMapper, Pack1, Pack2, Packet, RowMajor, Conjugate, PanelMode>
   ::operator()(std::complex<double>* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride, Index offset)
 {
-  dhs_cpack<double, Index, DataMapper, Packet2d, Packet1cd, RowMajor, Conjugate, PanelMode, true> pack;
+  dhs_cpack<double, DataMapper, Packet2d, Packet1cd, RowMajor, Conjugate, PanelMode, true> pack;
   pack(blockA, lhs, depth, rows, stride, offset);
 }
 
@@ -2595,7 +2460,7 @@ template<typename Index, typename DataMapper, int Pack1, int Pack2, typename Pac
 void gemm_pack_lhs<std::complex<double>, Index, DataMapper, Pack1, Pack2, Packet, ColMajor, Conjugate, PanelMode>
   ::operator()(std::complex<double>* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride, Index offset)
 {
-  dhs_cpack<double, Index, DataMapper, Packet2d, Packet1cd, ColMajor, Conjugate, PanelMode, true> pack;
+  dhs_cpack<double, DataMapper, Packet2d, Packet1cd, ColMajor, Conjugate, PanelMode, true> pack;
   pack(blockA, lhs, depth, rows, stride, offset);
 }
 
@@ -2609,7 +2474,7 @@ template<typename Index, typename DataMapper, int nr, bool Conjugate, bool Panel
 void gemm_pack_rhs<std::complex<double>, Index, DataMapper, nr, ColMajor, Conjugate, PanelMode>
   ::operator()(std::complex<double>* blockB, const DataMapper& rhs, Index depth, Index cols, Index stride, Index offset)
 {
-  dhs_cpack<double, Index, DataMapper, Packet2d, Packet1cd, ColMajor, Conjugate, PanelMode, false> pack;
+  dhs_cpack<double, DataMapper, Packet2d, Packet1cd, ColMajor, Conjugate, PanelMode, false> pack;
   pack(blockB, rhs, depth, cols, stride, offset);
 }
 
@@ -2623,7 +2488,7 @@ template<typename Index, typename DataMapper, int nr, bool Conjugate, bool Panel
 void gemm_pack_rhs<std::complex<double>, Index, DataMapper, nr, RowMajor, Conjugate, PanelMode>
   ::operator()(std::complex<double>* blockB, const DataMapper& rhs, Index depth, Index cols, Index stride, Index offset)
 {
-  dhs_cpack<double, Index, DataMapper, Packet2d, Packet1cd, RowMajor, Conjugate, PanelMode, false> pack;
+  dhs_cpack<double, DataMapper, Packet2d, Packet1cd, RowMajor, Conjugate, PanelMode, false> pack;
   pack(blockB, rhs, depth, cols, stride, offset);
 }
 
@@ -2649,20 +2514,20 @@ void gebp_kernel<float, float, Index, DataMapper, mr, nr, ConjugateLhs, Conjugat
     const Index accCols = quad_traits<float>::size;
     void (*gemm_function)(const DataMapper&, const float*, const float*, Index, Index, Index, float, Index, Index, Index, Index);
 
-    #ifdef EIGEN_ALTIVEC_MMA_ONLY
+    #if defined(EIGEN_ALTIVEC_MMA_ONLY)
       //generate with MMA only
-      gemm_function = &Eigen::internal::gemmMMA<float, Index, Packet, RhsPacket, DataMapper, accRows, accCols>;
-    #elif defined(ALTIVEC_MMA_SUPPORT) && !defined(EIGEN_ALTIVEC_DISABLE_MMA)
+      gemm_function = &Eigen::internal::gemmMMA<float, Packet, RhsPacket, DataMapper, accRows, accCols>;
+    #elif defined(EIGEN_ALTIVEC_MMA_DYNAMIC_DISPATCH)
       if (__builtin_cpu_supports ("arch_3_1") && __builtin_cpu_supports ("mma")){
-        gemm_function = &Eigen::internal::gemmMMA<float, Index, Packet, RhsPacket, DataMapper, accRows, accCols>;
+        gemm_function = &Eigen::internal::gemmMMA<float, Packet, RhsPacket, DataMapper, accRows, accCols>;
       }
       else{
-        gemm_function = &Eigen::internal::gemm<float, Index, Packet, RhsPacket, DataMapper, accRows, accCols>;
+        gemm_function = &Eigen::internal::gemm<float, Packet, RhsPacket, DataMapper, accRows, accCols>;
       }
     #else
-      gemm_function = &Eigen::internal::gemm<float, Index, Packet, RhsPacket, DataMapper, accRows, accCols>;
+      gemm_function = &Eigen::internal::gemm<float, Packet, RhsPacket, DataMapper, accRows, accCols>;
     #endif
-      gemm_function(res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB);
+    gemm_function(res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB);
   }
 
 template<typename Index, typename DataMapper, int mr, int nr, bool ConjugateLhs, bool ConjugateRhs>
@@ -2688,20 +2553,20 @@ void gebp_kernel<std::complex<float>, std::complex<float>, Index, DataMapper, mr
     void (*gemm_function)(const DataMapper&, const std::complex<float>*, const std::complex<float>*,
           Index, Index, Index, std::complex<float>, Index, Index, Index, Index);
 
-    #ifdef EIGEN_ALTIVEC_MMA_ONLY
-       //generate with MMA only
-       gemm_function = &Eigen::internal::gemm_complexMMA<std::complex<float>, std::complex<float>, std::complex<float>, float, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, false>;
-     #elif defined(ALTIVEC_MMA_SUPPORT) && !defined(EIGEN_ALTIVEC_DISABLE_MMA)
-       if (__builtin_cpu_supports ("arch_3_1") && __builtin_cpu_supports ("mma")){
-         gemm_function = &Eigen::internal::gemm_complexMMA<std::complex<float>, std::complex<float>, std::complex<float>, float, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, false>;
-       }
-       else{
-         gemm_function = &Eigen::internal::gemm_complex<std::complex<float>, std::complex<float>, std::complex<float>, float, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, false>;
-       }
-     #else
-       gemm_function = &Eigen::internal::gemm_complex<std::complex<float>, std::complex<float>, std::complex<float>, float, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, false>;
-     #endif
-      gemm_function(res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB);
+    #if defined(EIGEN_ALTIVEC_MMA_ONLY)
+      //generate with MMA only
+      gemm_function = &Eigen::internal::gemm_complexMMA<std::complex<float>, std::complex<float>, std::complex<float>, float, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, false>;
+    #elif defined(EIGEN_ALTIVEC_MMA_DYNAMIC_DISPATCH)
+      if (__builtin_cpu_supports ("arch_3_1") && __builtin_cpu_supports ("mma")){
+        gemm_function = &Eigen::internal::gemm_complexMMA<std::complex<float>, std::complex<float>, std::complex<float>, float, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, false>;
+      }
+      else{
+        gemm_function = &Eigen::internal::gemm_complex<std::complex<float>, std::complex<float>, std::complex<float>, float, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, false>;
+      }
+    #else
+      gemm_function = &Eigen::internal::gemm_complex<std::complex<float>, std::complex<float>, std::complex<float>, float, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, false>;
+    #endif
+    gemm_function(res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB);
   }
 
 template<typename Index, typename DataMapper, int mr, int nr, bool ConjugateLhs, bool ConjugateRhs>
@@ -2726,20 +2591,20 @@ void gebp_kernel<float, std::complex<float>, Index, DataMapper, mr, nr, Conjugat
     const Index accCols = quad_traits<float>::size;
     void (*gemm_function)(const DataMapper&, const float*, const std::complex<float>*,
           Index, Index, Index, std::complex<float>, Index, Index, Index, Index);
-    #ifdef EIGEN_ALTIVEC_MMA_ONLY
-       //generate with MMA only
-       gemm_function = &Eigen::internal::gemm_complexMMA<float, std::complex<float>, std::complex<float>, float, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, true, false>;
-     #elif defined(ALTIVEC_MMA_SUPPORT) && !defined(EIGEN_ALTIVEC_DISABLE_MMA)
-       if (__builtin_cpu_supports ("arch_3_1") && __builtin_cpu_supports ("mma")){
-         gemm_function = &Eigen::internal::gemm_complexMMA<float, std::complex<float>, std::complex<float>, float, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, true, false>;
-       }
-       else{
-         gemm_function = &Eigen::internal::gemm_complex<float, std::complex<float>, std::complex<float>, float, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, true, false>;
-       }
-     #else
-       gemm_function = &Eigen::internal::gemm_complex<float, std::complex<float>, std::complex<float>, float, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, true, false>;
-     #endif
-       gemm_function(res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB);
+    #if defined(EIGEN_ALTIVEC_MMA_ONLY)
+      //generate with MMA only
+      gemm_function = &Eigen::internal::gemm_complexMMA<float, std::complex<float>, std::complex<float>, float, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, true, false>;
+    #elif defined(EIGEN_ALTIVEC_MMA_DYNAMIC_DISPATCH)
+      if (__builtin_cpu_supports ("arch_3_1") && __builtin_cpu_supports ("mma")){
+        gemm_function = &Eigen::internal::gemm_complexMMA<float, std::complex<float>, std::complex<float>, float, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, true, false>;
+      }
+      else{
+        gemm_function = &Eigen::internal::gemm_complex<float, std::complex<float>, std::complex<float>, float, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, true, false>;
+      }
+    #else
+      gemm_function = &Eigen::internal::gemm_complex<float, std::complex<float>, std::complex<float>, float, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, true, false>;
+    #endif
+    gemm_function(res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB);
   }
 
 template<typename Index, typename DataMapper, int mr, int nr, bool ConjugateLhs, bool ConjugateRhs>
@@ -2764,20 +2629,20 @@ void gebp_kernel<std::complex<float>, float, Index, DataMapper, mr, nr, Conjugat
     const Index accCols = quad_traits<float>::size;
     void (*gemm_function)(const DataMapper&, const std::complex<float>*, const float*,
           Index, Index, Index, std::complex<float>, Index, Index, Index, Index);
-    #ifdef EIGEN_ALTIVEC_MMA_ONLY
-       //generate with MMA only
-       gemm_function = &Eigen::internal::gemm_complexMMA<std::complex<float>, float, std::complex<float>, float, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, true>;
-     #elif defined(ALTIVEC_MMA_SUPPORT) && !defined(EIGEN_ALTIVEC_DISABLE_MMA)
-       if (__builtin_cpu_supports ("arch_3_1") && __builtin_cpu_supports ("mma")){
-         gemm_function = &Eigen::internal::gemm_complexMMA<std::complex<float>, float, std::complex<float>, float, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, true>;
-       }
-       else{
-         gemm_function = &Eigen::internal::gemm_complex<std::complex<float>, float, std::complex<float>, float, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, true>;
-       }
-     #else
-       gemm_function = &Eigen::internal::gemm_complex<std::complex<float>, float, std::complex<float>, float, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, true>;
-     #endif
-       gemm_function(res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB);
+    #if defined(EIGEN_ALTIVEC_MMA_ONLY)
+      //generate with MMA only
+      gemm_function = &Eigen::internal::gemm_complexMMA<std::complex<float>, float, std::complex<float>, float, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, true>;
+    #elif defined(EIGEN_ALTIVEC_MMA_DYNAMIC_DISPATCH)
+      if (__builtin_cpu_supports ("arch_3_1") && __builtin_cpu_supports ("mma")){
+        gemm_function = &Eigen::internal::gemm_complexMMA<std::complex<float>, float, std::complex<float>, float, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, true>;
+      }
+      else{
+        gemm_function = &Eigen::internal::gemm_complex<std::complex<float>, float, std::complex<float>, float, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, true>;
+      }
+    #else
+      gemm_function = &Eigen::internal::gemm_complex<std::complex<float>, float, std::complex<float>, float, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, true>;
+    #endif
+    gemm_function(res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB);
   }
 
 template<typename Index, typename DataMapper, int mr, int nr, bool ConjugateLhs, bool ConjugateRhs>
@@ -2801,20 +2666,20 @@ void gebp_kernel<double, double, Index, DataMapper, mr, nr, ConjugateLhs, Conjug
     const Index accCols = quad_traits<double>::size;
     void (*gemm_function)(const DataMapper&, const double*, const double*, Index, Index, Index, double, Index, Index, Index, Index);
 
-    #ifdef EIGEN_ALTIVEC_MMA_ONLY
+    #if defined(EIGEN_ALTIVEC_MMA_ONLY)
       //generate with MMA only
-      gemm_function = &Eigen::internal::gemmMMA<double, Index, Packet, RhsPacket, DataMapper, accRows, accCols>;
-    #elif defined(ALTIVEC_MMA_SUPPORT) && !defined(EIGEN_ALTIVEC_DISABLE_MMA)
+      gemm_function = &Eigen::internal::gemmMMA<double, Packet, RhsPacket, DataMapper, accRows, accCols>;
+    #elif defined(EIGEN_ALTIVEC_MMA_DYNAMIC_DISPATCH)
       if (__builtin_cpu_supports ("arch_3_1") && __builtin_cpu_supports ("mma")){
-        gemm_function = &Eigen::internal::gemmMMA<double, Index, Packet, RhsPacket, DataMapper, accRows, accCols>;
+        gemm_function = &Eigen::internal::gemmMMA<double, Packet, RhsPacket, DataMapper, accRows, accCols>;
       }
       else{
-        gemm_function = &Eigen::internal::gemm<double, Index, Packet, RhsPacket, DataMapper, accRows, accCols>;
+        gemm_function = &Eigen::internal::gemm<double, Packet, RhsPacket, DataMapper, accRows, accCols>;
       }
     #else
-      gemm_function = &Eigen::internal::gemm<double, Index, Packet, RhsPacket, DataMapper, accRows, accCols>;
+      gemm_function = &Eigen::internal::gemm<double, Packet, RhsPacket, DataMapper, accRows, accCols>;
     #endif
-      gemm_function(res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB);
+    gemm_function(res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB);
   }
 
 template<typename Index, typename DataMapper, int mr, int nr, bool ConjugateLhs, bool ConjugateRhs>
@@ -2839,20 +2704,20 @@ void gebp_kernel<std::complex<double>, std::complex<double>, Index, DataMapper,
     const Index accCols = quad_traits<double>::size;
     void (*gemm_function)(const DataMapper&, const std::complex<double>*, const std::complex<double>*,
           Index, Index, Index, std::complex<double>, Index, Index, Index, Index);
-    #ifdef EIGEN_ALTIVEC_MMA_ONLY
-       //generate with MMA only
-       gemm_function = &Eigen::internal::gemm_complexMMA<std::complex<double>, std::complex<double>, std::complex<double>, double, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, false>;
-     #elif defined(ALTIVEC_MMA_SUPPORT) && !defined(EIGEN_ALTIVEC_DISABLE_MMA)
-       if (__builtin_cpu_supports ("arch_3_1") && __builtin_cpu_supports ("mma")){
-         gemm_function = &Eigen::internal::gemm_complexMMA<std::complex<double>, std::complex<double>, std::complex<double>, double, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, false>;
-       }
-       else{
-         gemm_function = &Eigen::internal::gemm_complex<std::complex<double>, std::complex<double>, std::complex<double>, double, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, false>;
-       }
-     #else
-       gemm_function = &Eigen::internal::gemm_complex<std::complex<double>, std::complex<double>, std::complex<double>, double, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, false>;
-     #endif
-       gemm_function(res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB);
+    #if defined(EIGEN_ALTIVEC_MMA_ONLY)
+      //generate with MMA only
+      gemm_function = &Eigen::internal::gemm_complexMMA<std::complex<double>, std::complex<double>, std::complex<double>, double, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, false>;
+    #elif defined(EIGEN_ALTIVEC_MMA_DYNAMIC_DISPATCH)
+      if (__builtin_cpu_supports ("arch_3_1") && __builtin_cpu_supports ("mma")){
+        gemm_function = &Eigen::internal::gemm_complexMMA<std::complex<double>, std::complex<double>, std::complex<double>, double, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, false>;
+      }
+      else{
+        gemm_function = &Eigen::internal::gemm_complex<std::complex<double>, std::complex<double>, std::complex<double>, double, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, false>;
+      }
+    #else
+      gemm_function = &Eigen::internal::gemm_complex<std::complex<double>, std::complex<double>, std::complex<double>, double, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, false>;
+    #endif
+    gemm_function(res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB);
   }
 
 template<typename Index, typename DataMapper, int mr, int nr, bool ConjugateLhs, bool ConjugateRhs>
@@ -2877,20 +2742,20 @@ void gebp_kernel<std::complex<double>, double, Index, DataMapper, mr, nr, Conjug
     const Index accCols = quad_traits<double>::size;
     void (*gemm_function)(const DataMapper&, const std::complex<double>*, const double*,
           Index, Index, Index, std::complex<double>, Index, Index, Index, Index);
-    #ifdef EIGEN_ALTIVEC_MMA_ONLY
-       //generate with MMA only
-       gemm_function = &Eigen::internal::gemm_complexMMA<std::complex<double>, double, std::complex<double>, double, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, true>;
-     #elif defined(ALTIVEC_MMA_SUPPORT) && !defined(EIGEN_ALTIVEC_DISABLE_MMA)
-       if (__builtin_cpu_supports ("arch_3_1") && __builtin_cpu_supports ("mma")){
-         gemm_function = &Eigen::internal::gemm_complexMMA<std::complex<double>, double, std::complex<double>, double, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, true>;
-       }
-       else{
-         gemm_function = &Eigen::internal::gemm_complex<std::complex<double>, double, std::complex<double>, double, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, true>;
-       }
-     #else
-       gemm_function = &Eigen::internal::gemm_complex<std::complex<double>, double, std::complex<double>, double, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, true>;
-     #endif
-       gemm_function(res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB);
+    #if defined(EIGEN_ALTIVEC_MMA_ONLY)
+      //generate with MMA only
+      gemm_function = &Eigen::internal::gemm_complexMMA<std::complex<double>, double, std::complex<double>, double, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, true>;
+    #elif defined(EIGEN_ALTIVEC_MMA_DYNAMIC_DISPATCH)
+      if (__builtin_cpu_supports ("arch_3_1") && __builtin_cpu_supports ("mma")){
+        gemm_function = &Eigen::internal::gemm_complexMMA<std::complex<double>, double, std::complex<double>, double, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, true>;
+      }
+      else{
+        gemm_function = &Eigen::internal::gemm_complex<std::complex<double>, double, std::complex<double>, double, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, true>;
+      }
+    #else
+      gemm_function = &Eigen::internal::gemm_complex<std::complex<double>, double, std::complex<double>, double, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, true>;
+    #endif
+    gemm_function(res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB);
   }
 
 template<typename Index, typename DataMapper, int mr, int nr, bool ConjugateLhs, bool ConjugateRhs>
@@ -2915,21 +2780,46 @@ void gebp_kernel<double, std::complex<double>, Index, DataMapper, mr, nr, Conjug
     const Index accCols = quad_traits<double>::size;
     void (*gemm_function)(const DataMapper&, const double*, const std::complex<double>*,
           Index, Index, Index, std::complex<double>, Index, Index, Index, Index);
-    #ifdef EIGEN_ALTIVEC_MMA_ONLY
-       //generate with MMA only
-       gemm_function = &Eigen::internal::gemm_complexMMA<double, std::complex<double>, std::complex<double>, double, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, true, false>;
-     #elif defined(ALTIVEC_MMA_SUPPORT) && !defined(EIGEN_ALTIVEC_DISABLE_MMA)
-       if (__builtin_cpu_supports ("arch_3_1") && __builtin_cpu_supports ("mma")){
-         gemm_function = &Eigen::internal::gemm_complexMMA<double, std::complex<double>, std::complex<double>, double, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, true, false>;
-       }
-       else{
-         gemm_function = &Eigen::internal::gemm_complex<double, std::complex<double>, std::complex<double>, double, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, true, false>;
-       }
-     #else
-       gemm_function = &Eigen::internal::gemm_complex<double, std::complex<double>, std::complex<double>, double, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, true, false>;
-     #endif
-       gemm_function(res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB);
+    #if defined(EIGEN_ALTIVEC_MMA_ONLY)
+      //generate with MMA only
+      gemm_function = &Eigen::internal::gemm_complexMMA<double, std::complex<double>, std::complex<double>, double, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, true, false>;
+    #elif defined(EIGEN_ALTIVEC_MMA_DYNAMIC_DISPATCH)
+      if (__builtin_cpu_supports ("arch_3_1") && __builtin_cpu_supports ("mma")){
+        gemm_function = &Eigen::internal::gemm_complexMMA<double, std::complex<double>, std::complex<double>, double, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, true, false>;
+      }
+      else{
+        gemm_function = &Eigen::internal::gemm_complex<double, std::complex<double>, std::complex<double>, double, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, true, false>;
+      }
+    #else
+      gemm_function = &Eigen::internal::gemm_complex<double, std::complex<double>, std::complex<double>, double, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, true, false>;
+    #endif
+    gemm_function(res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB);
   }
+
+#if defined(__MMA__)
+template<typename Index, typename DataMapper, int mr, int nr, bool ConjugateLhs, bool ConjugateRhs>
+struct gebp_kernel<bfloat16, bfloat16, Index, DataMapper, mr, nr, ConjugateLhs, ConjugateRhs>
+{
+  typedef typename quad_traits<bfloat16>::vectortype   Packet;
+  typedef typename quad_traits<bfloat16>::rhstype      RhsPacket;
+
+  void operator()(const DataMapper& res, const bfloat16* blockA, const bfloat16* blockB,
+                  Index rows, Index depth, Index cols, bfloat16 alpha,
+                  Index strideA=-1, Index strideB=-1, Index offsetA=0, Index offsetB=0);
+};
+
+template<typename Index, typename DataMapper, int mr, int nr, bool ConjugateLhs, bool ConjugateRhs>
+void gebp_kernel<bfloat16, bfloat16, Index, DataMapper, mr, nr, ConjugateLhs, ConjugateRhs>
+  ::operator()(const DataMapper& res, const bfloat16* blockA, const bfloat16* blockB,
+               Index rows, Index depth, Index cols, bfloat16 alpha,
+               Index strideA, Index strideB, Index offsetA, Index offsetB)
+  {
+    const Index accRows = quad_traits<bfloat16>::rows;
+    const Index accCols = quad_traits<bfloat16>::size;
+
+    Eigen::internal::gemmMMAbfloat16<Index, Packet, RhsPacket, DataMapper, accRows, accCols>(res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB);
+  }
+#endif
 } // end namespace internal
 
 } // end namespace Eigen
diff --git a/libs/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h b/libs/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h
index 33d5434..28868ca 100644
--- a/libs/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h
+++ b/libs/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h
@@ -5,33 +5,41 @@
 #define EIGEN_POWER_PREFETCH(p)
 #endif
 
+#ifdef _ARCH_PWR9
+#define USE_PARTIAL_PACKETS
+#endif
+
+#include "../../InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
 
-template<typename Scalar, typename Packet, typename DataMapper, typename Index, const Index accRows>
-EIGEN_STRONG_INLINE void gemm_extra_col(
+template<typename Scalar, typename Packet, typename DataMapper, const Index accRows, const Index accCols>
+EIGEN_ALWAYS_INLINE void gemm_extra_row(
   const DataMapper& res,
   const Scalar* lhs_base,
   const Scalar* rhs_base,
   Index depth,
   Index strideA,
   Index offsetA,
+  Index strideB,
   Index row,
-  Index col,
+  Index rows,
   Index remaining_rows,
-  Index remaining_cols,
-  const Packet& pAlpha);
+  const Packet& pAlpha,
+  const Packet& pMask);
 
-template<typename Scalar, typename Packet, typename DataMapper, typename Index, const Index accRows, const Index accCols>
-EIGEN_STRONG_INLINE void gemm_extra_row(
+template<typename Scalar, typename Packet, typename DataMapper, const Index accCols, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>
+EIGEN_STRONG_INLINE void gemm_extra_cols(
   const DataMapper& res,
-  const Scalar* lhs_base,
-  const Scalar* rhs_base,
+  const Scalar* blockA,
+  const Scalar* blockB,
   Index depth,
   Index strideA,
   Index offsetA,
-  Index row,
+  Index strideB,
+  Index offsetB,
   Index col,
   Index rows,
   Index cols,
@@ -39,25 +47,11 @@ EIGEN_STRONG_INLINE void gemm_extra_row(
   const Packet& pAlpha,
   const Packet& pMask);
 
-template<typename Scalar, typename Packet, typename DataMapper, typename Index, const Index accCols>
-EIGEN_STRONG_INLINE void gemm_unrolled_col(
-  const DataMapper& res,
-  const Scalar* lhs_base,
-  const Scalar* rhs_base,
-  Index depth,
-  Index strideA,
-  Index offsetA,
-  Index& row,
-  Index rows,
-  Index col,
-  Index remaining_cols,
-  const Packet& pAlpha);
-
 template<typename Packet>
-EIGEN_ALWAYS_INLINE Packet bmask(const int remaining_rows);
+EIGEN_ALWAYS_INLINE Packet bmask(const Index remaining_rows);
 
-template<typename Scalar, typename Packet, typename Packetc, typename DataMapper, typename Index, const Index accRows, const Index accCols, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>
-EIGEN_STRONG_INLINE void gemm_complex_extra_col(
+template<typename Scalar, typename Packet, typename Packetc, typename DataMapper, const Index accRows, const Index accCols, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>
+EIGEN_ALWAYS_INLINE void gemm_complex_extra_row(
   const DataMapper& res,
   const Scalar* lhs_base,
   const Scalar* rhs_base,
@@ -66,22 +60,22 @@ EIGEN_STRONG_INLINE void gemm_complex_extra_col(
   Index offsetA,
   Index strideB,
   Index row,
-  Index col,
+  Index rows,
   Index remaining_rows,
-  Index remaining_cols,
   const Packet& pAlphaReal,
-  const Packet& pAlphaImag);
+  const Packet& pAlphaImag,
+  const Packet& pMask);
 
-template<typename Scalar, typename Packet, typename Packetc, typename DataMapper, typename Index, const Index accRows, const Index accCols, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>
-EIGEN_STRONG_INLINE void gemm_complex_extra_row(
+template<typename Scalar, typename Packet, typename Packetc, typename DataMapper, const Index accCols, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>
+EIGEN_STRONG_INLINE void gemm_complex_extra_cols(
   const DataMapper& res,
-  const Scalar* lhs_base,
-  const Scalar* rhs_base,
+  const Scalar* blockA,
+  const Scalar* blockB,
   Index depth,
   Index strideA,
   Index offsetA,
   Index strideB,
-  Index row,
+  Index offsetB,
   Index col,
   Index rows,
   Index cols,
@@ -90,132 +84,133 @@ EIGEN_STRONG_INLINE void gemm_complex_extra_row(
   const Packet& pAlphaImag,
   const Packet& pMask);
 
-template<typename Scalar, typename Packet, typename Packetc, typename DataMapper, typename Index, const Index accCols, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>
-EIGEN_STRONG_INLINE void gemm_complex_unrolled_col(
-  const DataMapper& res,
-  const Scalar* lhs_base,
-  const Scalar* rhs_base,
-  Index depth,
-  Index strideA,
-  Index offsetA,
-  Index strideB,
-  Index& row,
-  Index rows,
-  Index col,
-  Index remaining_cols,
-  const Packet& pAlphaReal,
-  const Packet& pAlphaImag);
-
-template<typename Scalar, typename Packet>
-EIGEN_ALWAYS_INLINE Packet ploadLhs(const Scalar* lhs);
-
-template<typename DataMapper, typename Packet, typename Index, const Index accCols, int N, int StorageOrder>
-EIGEN_ALWAYS_INLINE void bload(PacketBlock<Packet,4>& acc, const DataMapper& res, Index row, Index col);
-
-template<typename DataMapper, typename Packet, typename Index, const Index accCols, int N, int StorageOrder>
-EIGEN_ALWAYS_INLINE void bload(PacketBlock<Packet,8>& acc, const DataMapper& res, Index row, Index col);
-
 template<typename Packet>
-EIGEN_ALWAYS_INLINE void bscale(PacketBlock<Packet,4>& acc, PacketBlock<Packet,4>& accZ, const Packet& pAlpha);
+EIGEN_ALWAYS_INLINE Packet ploadLhs(const __UNPACK_TYPE__(Packet)* lhs);
+
+template<typename DataMapper, typename Packet, const Index accCols, int StorageOrder, bool Complex, int N, bool full = true>
+EIGEN_ALWAYS_INLINE void bload(PacketBlock<Packet,N*(Complex?2:1)>& acc, const DataMapper& res, Index row, Index col);
+
+template<typename DataMapper, typename Packet, int N>
+EIGEN_ALWAYS_INLINE void bstore(PacketBlock<Packet,N>& acc, const DataMapper& res, Index row);
+
+#ifdef USE_PARTIAL_PACKETS
+template<typename DataMapper, typename Packet, const Index accCols, bool Complex, Index N, bool full = true>
+EIGEN_ALWAYS_INLINE void bload_partial(PacketBlock<Packet,N*(Complex?2:1)>& acc, const DataMapper& res, Index row, Index elements);
+
+template<typename DataMapper, typename Packet, Index N>
+EIGEN_ALWAYS_INLINE void bstore_partial(PacketBlock<Packet,N>& acc, const DataMapper& res, Index row, Index elements);
+#endif
 
 template<typename Packet, int N>
-EIGEN_ALWAYS_INLINE void bscalec(PacketBlock<Packet,N>& aReal, PacketBlock<Packet,N>& aImag, const Packet& bReal, const Packet& bImag, PacketBlock<Packet,N>& cReal, PacketBlock<Packet,N>& cImag);
+EIGEN_ALWAYS_INLINE void bscale(PacketBlock<Packet,N>& acc, PacketBlock<Packet,N>& accZ, const Packet& pAlpha);
 
-const static Packet16uc p16uc_SETCOMPLEX32_FIRST = {  0,  1,  2,  3,
-                                                     16, 17, 18, 19,
-                                                      4,  5,  6,  7,
-                                                     20, 21, 22, 23};
+template<typename Packet, int N, bool mask>
+EIGEN_ALWAYS_INLINE void bscale(PacketBlock<Packet,N>& acc, PacketBlock<Packet,N>& accZ, const Packet& pAlpha, const Packet& pMask);
 
-const static Packet16uc p16uc_SETCOMPLEX32_SECOND = {  8,  9, 10, 11,
-                                                      24, 25, 26, 27,
-                                                      12, 13, 14, 15,
-                                                      28, 29, 30, 31};
-//[a,b],[ai,bi] = [a,ai] - This is equivalent to p16uc_GETREAL64
-const static Packet16uc p16uc_SETCOMPLEX64_FIRST = {  0,  1,  2,  3,  4,  5,  6,  7,
-                                                     16, 17, 18, 19, 20, 21, 22, 23};
+template<typename Packet, int N, bool mask>
+EIGEN_ALWAYS_INLINE void bscalec(PacketBlock<Packet,N>& aReal, PacketBlock<Packet,N>& aImag, const Packet& bReal, const Packet& bImag, PacketBlock<Packet,N>& cReal, PacketBlock<Packet,N>& cImag, const Packet& pMask);
 
-//[a,b],[ai,bi] = [b,bi] - This is equivalent to p16uc_GETIMAG64
-const static Packet16uc p16uc_SETCOMPLEX64_SECOND = {  8,  9, 10, 11, 12, 13, 14, 15,
-                                                      24, 25, 26, 27, 28, 29, 30, 31};
+template<typename Packet, typename Packetc, int N, bool full>
+EIGEN_ALWAYS_INLINE void bcouple(PacketBlock<Packet,N>& taccReal, PacketBlock<Packet,N>& taccImag, PacketBlock<Packetc,N*2>& tRes, PacketBlock<Packetc, N>& acc1, PacketBlock<Packetc, N>& acc2);
 
+#define MICRO_NORMAL(iter) \
+  (accCols == accCols2) || (unroll_factor != (iter + 1))
 
-// Grab two decouples real/imaginary PacketBlocks and return two coupled (real/imaginary pairs) PacketBlocks.
-template<typename Packet, typename Packetc>
-EIGEN_ALWAYS_INLINE void bcouple_common(PacketBlock<Packet,4>& taccReal, PacketBlock<Packet,4>& taccImag, PacketBlock<Packetc, 4>& acc1, PacketBlock<Packetc, 4>& acc2)
-{
-  acc1.packet[0].v = vec_perm(taccReal.packet[0], taccImag.packet[0], p16uc_SETCOMPLEX32_FIRST);
-  acc1.packet[1].v = vec_perm(taccReal.packet[1], taccImag.packet[1], p16uc_SETCOMPLEX32_FIRST);
-  acc1.packet[2].v = vec_perm(taccReal.packet[2], taccImag.packet[2], p16uc_SETCOMPLEX32_FIRST);
-  acc1.packet[3].v = vec_perm(taccReal.packet[3], taccImag.packet[3], p16uc_SETCOMPLEX32_FIRST);
+#define MICRO_UNROLL_ITER1(func, N) \
+  switch (remaining_rows) { \
+    default: \
+      func(N, 0) \
+      break; \
+    case 1: \
+      func(N, 1) \
+      break; \
+    case 2: \
+      if (sizeof(Scalar) == sizeof(float)) { \
+        func(N, 2) \
+      } \
+      break; \
+    case 3: \
+      if (sizeof(Scalar) == sizeof(float)) { \
+        func(N, 3) \
+      } \
+      break; \
+  }
 
-  acc2.packet[0].v = vec_perm(taccReal.packet[0], taccImag.packet[0], p16uc_SETCOMPLEX32_SECOND);
-  acc2.packet[1].v = vec_perm(taccReal.packet[1], taccImag.packet[1], p16uc_SETCOMPLEX32_SECOND);
-  acc2.packet[2].v = vec_perm(taccReal.packet[2], taccImag.packet[2], p16uc_SETCOMPLEX32_SECOND);
-  acc2.packet[3].v = vec_perm(taccReal.packet[3], taccImag.packet[3], p16uc_SETCOMPLEX32_SECOND);
-}
+#ifdef USE_PARTIAL_PACKETS
+#define MICRO_UNROLL_ITER(func, N) \
+  if (remaining_rows) { \
+    func(N, true); \
+  } else { \
+    func(N, false); \
+  }
 
-template<typename Packet, typename Packetc>
-EIGEN_ALWAYS_INLINE void bcouple(PacketBlock<Packet,4>& taccReal, PacketBlock<Packet,4>& taccImag, PacketBlock<Packetc,8>& tRes, PacketBlock<Packetc, 4>& acc1, PacketBlock<Packetc, 4>& acc2)
-{
-  bcouple_common<Packet, Packetc>(taccReal, taccImag, acc1, acc2);
+#define MICRO_NORMAL_PARTIAL(iter) \
+  full || (unroll_factor != (iter + 1))
+#else
+#define MICRO_UNROLL_ITER(func, N) MICRO_UNROLL_ITER1(func, N)
+#endif
 
-  acc1.packet[0] = padd<Packetc>(tRes.packet[0], acc1.packet[0]);
-  acc1.packet[1] = padd<Packetc>(tRes.packet[1], acc1.packet[1]);
-  acc1.packet[2] = padd<Packetc>(tRes.packet[2], acc1.packet[2]);
-  acc1.packet[3] = padd<Packetc>(tRes.packet[3], acc1.packet[3]);
+#define MICRO_COMPLEX_UNROLL_ITER(func, N) MICRO_UNROLL_ITER1(func, N)
 
-  acc2.packet[0] = padd<Packetc>(tRes.packet[4], acc2.packet[0]);
-  acc2.packet[1] = padd<Packetc>(tRes.packet[5], acc2.packet[1]);
-  acc2.packet[2] = padd<Packetc>(tRes.packet[6], acc2.packet[2]);
-  acc2.packet[3] = padd<Packetc>(tRes.packet[7], acc2.packet[3]);
-}
+#define MICRO_NORMAL_COLS(iter, a, b) ((MICRO_NORMAL(iter)) ? a : b)
 
-template<typename Packet, typename Packetc>
-EIGEN_ALWAYS_INLINE void bcouple_common(PacketBlock<Packet,1>& taccReal, PacketBlock<Packet,1>& taccImag, PacketBlock<Packetc, 1>& acc1, PacketBlock<Packetc, 1>& acc2)
-{
-  acc1.packet[0].v = vec_perm(taccReal.packet[0], taccImag.packet[0], p16uc_SETCOMPLEX32_FIRST);
+#define MICRO_LOAD1(lhs_ptr, iter) \
+  if (unroll_factor > iter) { \
+    lhsV##iter = ploadLhs<Packet>(lhs_ptr##iter); \
+    lhs_ptr##iter += MICRO_NORMAL_COLS(iter, accCols, accCols2); \
+  } else { \
+    EIGEN_UNUSED_VARIABLE(lhsV##iter); \
+  }
 
-  acc2.packet[0].v = vec_perm(taccReal.packet[0], taccImag.packet[0], p16uc_SETCOMPLEX32_SECOND);
-}
+#define MICRO_LOAD_ONE(iter) MICRO_LOAD1(lhs_ptr, iter)
 
-template<typename Packet, typename Packetc>
-EIGEN_ALWAYS_INLINE void bcouple(PacketBlock<Packet,1>& taccReal, PacketBlock<Packet,1>& taccImag, PacketBlock<Packetc,2>& tRes, PacketBlock<Packetc, 1>& acc1, PacketBlock<Packetc, 1>& acc2)
-{
-  bcouple_common<Packet, Packetc>(taccReal, taccImag, acc1, acc2);
+#define MICRO_COMPLEX_LOAD_ONE(iter) \
+  if (!LhsIsReal && (unroll_factor > iter)) { \
+    lhsVi##iter = ploadLhs<Packet>(lhs_ptr_real##iter + MICRO_NORMAL_COLS(iter, imag_delta, imag_delta2)); \
+  } else { \
+    EIGEN_UNUSED_VARIABLE(lhsVi##iter); \
+  } \
+  MICRO_LOAD1(lhs_ptr_real, iter) \
 
-  acc1.packet[0] = padd<Packetc>(tRes.packet[0], acc1.packet[0]);
+#define MICRO_SRC_PTR1(lhs_ptr, advRows, iter) \
+  if (unroll_factor > iter) { \
+    lhs_ptr##iter = lhs_base + (row+(iter*accCols))*strideA*advRows - MICRO_NORMAL_COLS(iter, 0, (accCols-accCols2)*offsetA); \
+  } else { \
+    EIGEN_UNUSED_VARIABLE(lhs_ptr##iter); \
+  }
 
-  acc2.packet[0] = padd<Packetc>(tRes.packet[1], acc2.packet[0]);
-}
+#define MICRO_SRC_PTR_ONE(iter) MICRO_SRC_PTR1(lhs_ptr, 1, iter)
 
-template<>
-EIGEN_ALWAYS_INLINE void bcouple_common<Packet2d, Packet1cd>(PacketBlock<Packet2d,4>& taccReal, PacketBlock<Packet2d,4>& taccImag, PacketBlock<Packet1cd, 4>& acc1, PacketBlock<Packet1cd, 4>& acc2)
-{
-  acc1.packet[0].v = vec_perm(taccReal.packet[0], taccImag.packet[0], p16uc_SETCOMPLEX64_FIRST);
-  acc1.packet[1].v = vec_perm(taccReal.packet[1], taccImag.packet[1], p16uc_SETCOMPLEX64_FIRST);
-  acc1.packet[2].v = vec_perm(taccReal.packet[2], taccImag.packet[2], p16uc_SETCOMPLEX64_FIRST);
-  acc1.packet[3].v = vec_perm(taccReal.packet[3], taccImag.packet[3], p16uc_SETCOMPLEX64_FIRST);
+#define MICRO_COMPLEX_SRC_PTR_ONE(iter) MICRO_SRC_PTR1(lhs_ptr_real, advanceRows, iter)
 
-  acc2.packet[0].v = vec_perm(taccReal.packet[0], taccImag.packet[0], p16uc_SETCOMPLEX64_SECOND);
-  acc2.packet[1].v = vec_perm(taccReal.packet[1], taccImag.packet[1], p16uc_SETCOMPLEX64_SECOND);
-  acc2.packet[2].v = vec_perm(taccReal.packet[2], taccImag.packet[2], p16uc_SETCOMPLEX64_SECOND);
-  acc2.packet[3].v = vec_perm(taccReal.packet[3], taccImag.packet[3], p16uc_SETCOMPLEX64_SECOND);
-}
+#define MICRO_PREFETCH1(lhs_ptr, iter) \
+  if (unroll_factor > iter) { \
+    EIGEN_POWER_PREFETCH(lhs_ptr##iter); \
+  }
 
-template<>
-EIGEN_ALWAYS_INLINE void bcouple_common<Packet2d, Packet1cd>(PacketBlock<Packet2d,1>& taccReal, PacketBlock<Packet2d,1>& taccImag, PacketBlock<Packet1cd, 1>& acc1, PacketBlock<Packet1cd, 1>& acc2)
-{
-  acc1.packet[0].v = vec_perm(taccReal.packet[0], taccImag.packet[0], p16uc_SETCOMPLEX64_FIRST);
+#define MICRO_PREFETCH_ONE(iter) MICRO_PREFETCH1(lhs_ptr, iter)
 
-  acc2.packet[0].v = vec_perm(taccReal.packet[0], taccImag.packet[0], p16uc_SETCOMPLEX64_SECOND);
-}
+#define MICRO_COMPLEX_PREFETCH_ONE(iter) MICRO_PREFETCH1(lhs_ptr_real, iter)
+
+#ifdef USE_PARTIAL_PACKETS
+#define MICRO_UPDATE_MASK
+#else
+#define MICRO_UPDATE_MASK EIGEN_UNUSED_VARIABLE(pMask);
+#endif
+
+#define MICRO_UPDATE \
+  if (accCols == accCols2) { \
+    MICRO_UPDATE_MASK \
+    EIGEN_UNUSED_VARIABLE(offsetA); \
+    row += unroll_factor*accCols; \
+  }
+
+#define MICRO_COMPLEX_UPDATE \
+  MICRO_UPDATE \
+  if(LhsIsReal || (accCols == accCols2)) { \
+    EIGEN_UNUSED_VARIABLE(imag_delta2); \
+  }
 
-// This is necessary because ploadRhs for double returns a pair of vectors when MMA is enabled.
-template<typename Scalar, typename Packet>
-EIGEN_ALWAYS_INLINE Packet ploadRhs(const Scalar* rhs)
-{
-  return ploadu<Packet>(rhs);
-}
 
 } // end namespace internal
 } // end namespace Eigen
diff --git a/libs/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h b/libs/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h
index 6540c6f..e4013a7 100644
--- a/libs/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h
+++ b/libs/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h
@@ -11,56 +11,87 @@
 #ifndef EIGEN_MATRIX_PRODUCT_MMA_ALTIVEC_H
 #define EIGEN_MATRIX_PRODUCT_MMA_ALTIVEC_H
 
-#pragma GCC target("cpu=power10")
+// If using dynamic dispatch, set the CPU target.
+#if defined(EIGEN_ALTIVEC_MMA_DYNAMIC_DISPATCH)
+#pragma GCC push_options
+#pragma GCC target("cpu=power10,htm")
+#endif
 
 #ifdef __has_builtin
 #if !__has_builtin(__builtin_vsx_assemble_pair)
 #define __builtin_vsx_assemble_pair __builtin_mma_assemble_pair
 #endif
+#if !__has_builtin(__builtin_vsx_disassemble_pair)
+#define __builtin_vsx_disassemble_pair __builtin_mma_disassemble_pair
 #endif
+#endif
+
+#include "../../InternalHeaderCheck.h"
+
+#include "MatrixProductMMAbfloat16.h"
 
 namespace Eigen {
 
 namespace internal {
 
-template<typename Scalar, typename Packet>
+#define accColsC (accCols / 2)
+
 EIGEN_ALWAYS_INLINE void bsetzeroMMA(__vector_quad* acc)
 {
   __builtin_mma_xxsetaccz(acc);
 }
 
-template<typename DataMapper, typename Index, typename Packet, const Index accCols>
-EIGEN_ALWAYS_INLINE void storeAccumulator(Index i, Index j, const DataMapper& data, const Packet& alpha, __vector_quad* acc)
+#ifdef USE_PARTIAL_PACKETS
+template<typename DataMapper, typename Packet, bool full>
+EIGEN_ALWAYS_INLINE void storeAccumulator(Index i, const DataMapper& data, const Packet& alpha, const Index elements, __vector_quad* acc)
+#else
+template<typename DataMapper, typename Packet, const Index accCols, const Index accCols2>
+EIGEN_ALWAYS_INLINE void storeAccumulator(Index i, const DataMapper& data, const Packet& alpha, const Packet& pMask, __vector_quad* acc)
+#endif
 {
   PacketBlock<Packet, 4> result;
   __builtin_mma_disassemble_acc(&result.packet, acc);
 
   PacketBlock<Packet, 4> tRes;
-  bload<DataMapper, Packet, Index, accCols, 0, ColMajor>(tRes, data, i, j);
-
-  bscale<Packet>(tRes, result, alpha);
-
-  data.template storePacketBlock<Packet, 4>(i, j, tRes);
+#ifdef USE_PARTIAL_PACKETS
+  if (full) {
+    EIGEN_UNUSED_VARIABLE(elements);
+    bload<DataMapper, Packet, 0, ColMajor, false, 4>(tRes, data, i, 0);
+    bscale<Packet, 4>(tRes, result, alpha);
+    bstore<DataMapper, Packet, 4>(tRes, data, i);
+  } else {
+    bload_partial<DataMapper, Packet, 0, false, 4>(tRes, data, i, elements);
+    bscale<Packet, 4>(tRes, result, alpha);
+    bstore_partial<DataMapper, Packet, 4>(tRes, data, i, elements);
+  }
+#else
+  bload<DataMapper, Packet, 0, ColMajor, false, 4>(tRes, data, i, 0);
+  bscale<Packet, 4, (accCols != accCols2)>(tRes, result, alpha, pMask);
+  bstore<DataMapper, Packet, 4>(tRes, data, i);
+#endif
 }
 
-template<typename DataMapper, typename Index, typename Packet, typename Packetc, const Index accColsC, int N>
-EIGEN_ALWAYS_INLINE void storeComplexAccumulator(Index i, Index j, const DataMapper& data, const Packet& alphaReal, const Packet& alphaImag, __vector_quad* accReal, __vector_quad* accImag)
+template<typename DataMapper, typename Packet, typename Packetc, const Index accCols, const Index accCols2>
+EIGEN_ALWAYS_INLINE void storeComplexAccumulator(Index i, const DataMapper& data, const Packet& alphaReal, const Packet& alphaImag, const Packet& pMask, __vector_quad* accReal, __vector_quad* accImag)
 {
+  constexpr bool full = (accCols2 > accColsC);
   PacketBlock<Packet, 4> resultReal, resultImag;
   __builtin_mma_disassemble_acc(&resultReal.packet, accReal);
   __builtin_mma_disassemble_acc(&resultImag.packet, accImag);
 
   PacketBlock<Packetc, 8> tRes;
-  bload<DataMapper, Packetc, Index, accColsC, N, ColMajor>(tRes, data, i, j);
+  bload<DataMapper, Packetc, accColsC, ColMajor, true, 4, full>(tRes, data, i, 0);
 
-  PacketBlock<Packet,4> taccReal, taccImag;
-  bscalec<Packet,4>(resultReal, resultImag, alphaReal, alphaImag, taccReal, taccImag);
+  PacketBlock<Packet, 4> taccReal, taccImag;
+  bscalec<Packet, 4, (accCols != accCols2)>(resultReal, resultImag, alphaReal, alphaImag, taccReal, taccImag, pMask);
 
   PacketBlock<Packetc, 4> acc1, acc2;
-  bcouple<Packet, Packetc>(taccReal, taccImag, tRes, acc1, acc2);
+  bcouple<Packet, Packetc, 4, full>(taccReal, taccImag, tRes, acc1, acc2);
 
-  data.template storePacketBlock<Packetc, 4>(i + N*accColsC, j, acc1);
-  data.template storePacketBlock<Packetc, 4>(i + (N+1)*accColsC, j, acc2);
+  bstore<DataMapper, Packetc, 4>(acc1, data, i);
+  if (full) {
+    bstore<DataMapper, Packetc, 4>(acc2, data, i + accColsC);
+  }
 }
 
 // Defaults to float32, since Eigen still supports C++03 we can't use default template arguments
@@ -75,18 +106,6 @@ EIGEN_ALWAYS_INLINE void pgerMMA(__vector_quad* acc, const RhsPacket& a, const L
   }
 }
 
-template<typename LhsPacket, typename RhsPacket, bool NegativeAccumulate>
-EIGEN_ALWAYS_INLINE void pgerMMA(__vector_quad* acc, const PacketBlock<Packet2d,2>& a, const Packet2d& b)
-{
-  __vector_pair* a0 = (__vector_pair *)(&a.packet[0]);
-  if(NegativeAccumulate)
-  {
-    __builtin_mma_xvf64gernp(acc, *a0, (__vector unsigned char)b);
-  } else {
-    __builtin_mma_xvf64gerpp(acc, *a0, (__vector unsigned char)b);
-  }
-}
-
 template<typename LhsPacket, typename RhsPacket, bool NegativeAccumulate>
 EIGEN_ALWAYS_INLINE void pgerMMA(__vector_quad* acc, const __vector_pair& a, const Packet2d& b)
 {
@@ -98,18 +117,13 @@ EIGEN_ALWAYS_INLINE void pgerMMA(__vector_quad* acc, const __vector_pair& a, con
   }
 }
 
-template<typename LhsPacket, typename RhsPacket, bool NegativeAccumulate>
-EIGEN_ALWAYS_INLINE void pgerMMA(__vector_quad*, const __vector_pair&, const Packet4f&)
-{
-  // Just for compilation
-}
-
-template<typename Scalar, typename Packet, typename RhsPacket, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>
-EIGEN_ALWAYS_INLINE void pgercMMA(__vector_quad* accReal, __vector_quad* accImag, const Packet& lhsV, const Packet& lhsVi, const RhsPacket& rhsV, const RhsPacket& rhsVi)
+template<typename Packet, typename RhsPacket, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>
+EIGEN_ALWAYS_INLINE void pgercMMA(__vector_quad* accReal, __vector_quad* accImag, const Packet& lhsV, Packet& lhsVi, const RhsPacket& rhsV, RhsPacket& rhsVi)
 {
   pgerMMA<Packet, RhsPacket, false>(accReal,  rhsV,  lhsV);
   if(LhsIsReal) {
     pgerMMA<Packet, RhsPacket, ConjugateRhs>(accImag, rhsVi,  lhsV);
+    EIGEN_UNUSED_VARIABLE(lhsVi);
   } else {
     if(!RhsIsReal) {
       pgerMMA<Packet, RhsPacket, ConjugateLhs == ConjugateRhs>(accReal, rhsVi, lhsVi);
@@ -122,129 +136,178 @@ EIGEN_ALWAYS_INLINE void pgercMMA(__vector_quad* accReal, __vector_quad* accImag
 }
 
 // This is necessary because ploadRhs for double returns a pair of vectors when MMA is enabled.
+template<typename Packet>
+EIGEN_ALWAYS_INLINE Packet ploadRhs(const __UNPACK_TYPE__(Packet)* rhs)
+{
+  return ploadu<Packet>(rhs);
+}
+
 template<typename Scalar, typename Packet>
 EIGEN_ALWAYS_INLINE void ploadRhsMMA(const Scalar* rhs, Packet& rhsV)
 {
-  rhsV = ploadRhs<Scalar, Packet>((const Scalar*)(rhs));
+  rhsV = ploadRhs<Packet>(rhs);
 } 
 
 template<>
-EIGEN_ALWAYS_INLINE void ploadRhsMMA<double, PacketBlock<Packet2d, 2> >(const double* rhs, PacketBlock<Packet2d, 2>& rhsV)
-{
-  rhsV.packet[0] = ploadRhs<double, Packet2d>((const double *)((Packet2d *)rhs      ));
-  rhsV.packet[1] = ploadRhs<double, Packet2d>((const double *)(((Packet2d *)rhs) + 1));
-}
-
-template<>
-EIGEN_ALWAYS_INLINE void ploadRhsMMA<double, __vector_pair>(const double* rhs, __vector_pair& rhsV)
+EIGEN_ALWAYS_INLINE void ploadRhsMMA(const double* rhs, __vector_pair& rhsV)
 {
 #if EIGEN_COMP_LLVM
   __builtin_vsx_assemble_pair(&rhsV,
-    (__vector unsigned char)(ploadRhs<double, Packet2d>((const double *)(((Packet2d *)rhs) + 1))),
-    (__vector unsigned char)(ploadRhs<double, Packet2d>((const double *)((Packet2d *)rhs      ))));
+    reinterpret_cast<__vector unsigned char>(ploadRhs<Packet2d>(rhs + (sizeof(Packet2d) / sizeof(double)))),
+    reinterpret_cast<__vector unsigned char>(ploadRhs<Packet2d>(rhs)));
 #else
-  __asm__ ("lxvp %x0,%1" : "=wa" (rhsV) : "Y" (*rhs));
+  rhsV = *reinterpret_cast<__vector_pair *>(const_cast<double *>(rhs));
 #endif
 }
 
-template<>
-EIGEN_ALWAYS_INLINE void ploadRhsMMA(const float*, __vector_pair&)
+EIGEN_ALWAYS_INLINE void ploadLhsMMA(const double* lhs, __vector_pair& lhsV)
 {
-  // Just for compilation
+  ploadRhsMMA(lhs, lhsV);
 }
 
+#if (EIGEN_COMP_LLVM || (__GNUC__ >= 11))
+#define VECTOR_PAIR_LOADS_LHS
+#endif
+
 // PEEL_MMA loop factor.
 #define PEEL_MMA 7
 
 #define MICRO_MMA_UNROLL(func) \
   func(0) func(1) func(2) func(3) func(4) func(5) func(6) func(7)
 
-#define MICRO_MMA_LOAD_ONE(iter) \
-  if (unroll_factor > iter) { \
-    lhsV##iter = ploadLhs<Scalar, Packet>(lhs_ptr##iter); \
-    lhs_ptr##iter += accCols; \
-  } else { \
-    EIGEN_UNUSED_VARIABLE(lhsV##iter); \
-  }
+#define MICRO_MMA_WORK(func, type, peel) \
+  func(0,type,peel) func(1,type,peel) func(2,type,peel) func(3,type,peel) \
+  func(4,type,peel) func(5,type,peel) func(6,type,peel) func(7,type,peel)
 
 #define MICRO_MMA_WORK_ONE(iter, type, peel) \
   if (unroll_factor > iter) { \
-    pgerMMA<Packet, type, false>(&accZero##iter, rhsV##peel, lhsV##iter); \
+    pgerMMA<Packet, type, false>(&accZero##iter, rhsV[peel], lhsV##iter); \
   }
 
-#define MICRO_MMA_TYPE_PEEL(func, func2, type, peel) \
+#ifdef VECTOR_PAIR_LOADS_LHS
+#define MICRO_MMA_WORK_TWO(iter, type, peel) \
+  if (unroll_factor > iter) { \
+    pgerMMA<Packet, type, false>(&accZero##iter, rhsV[peel], lhsV2##iter.packet[peel & 1]); \
+  }
+
+#define MICRO_MMA_LOAD1_TWO(lhs_ptr, iter) \
+  if (unroll_factor > iter) { \
+    if (MICRO_NORMAL(iter)) { \
+      ploadLhsMMA(reinterpret_cast<const double*>(lhs_ptr##iter), plhsV##iter); \
+      __builtin_vsx_disassemble_pair(reinterpret_cast<void*>(&lhsV2##iter.packet), &plhsV##iter); \
+      lhs_ptr##iter += accCols*2; \
+    } else { \
+      lhsV2##iter.packet[0] = ploadLhs<Packet>(lhs_ptr##iter); \
+      lhsV2##iter.packet[1] = ploadLhs<Packet>(lhs_ptr##iter + accCols2); \
+      lhs_ptr##iter += accCols2*2; \
+      EIGEN_UNUSED_VARIABLE(plhsV##iter) \
+    } \
+  } else { \
+    EIGEN_UNUSED_VARIABLE(lhsV2##iter); \
+    EIGEN_UNUSED_VARIABLE(plhsV##iter) \
+  }
+
+#define MICRO_MMA_LOAD_TWO(iter) MICRO_MMA_LOAD1_TWO(lhs_ptr, iter)
+#endif
+
+#define MICRO_MMA_TYPE_PEEL(funcw, funcl, type, peel) \
   if (PEEL_MMA > peel) { \
     Packet lhsV0, lhsV1, lhsV2, lhsV3, lhsV4, lhsV5, lhsV6, lhsV7; \
-    ploadRhsMMA<Scalar, type>(rhs_ptr + (accRows * peel), rhsV##peel); \
-    MICRO_MMA_UNROLL(func2); \
-    func(0,type,peel) func(1,type,peel) func(2,type,peel) func(3,type,peel) \
-    func(4,type,peel) func(5,type,peel) func(6,type,peel) func(7,type,peel) \
-  } else { \
-    EIGEN_UNUSED_VARIABLE(rhsV##peel); \
+    ploadRhsMMA(rhs_ptr + (accRows * peel), rhsV[peel]); \
+    MICRO_MMA_UNROLL(funcl) \
+    MICRO_MMA_WORK(funcw, type, peel) \
   }
 
-#define MICRO_MMA_UNROLL_TYPE_PEEL(func, func2, type) \
-  type rhsV0, rhsV1, rhsV2, rhsV3, rhsV4, rhsV5, rhsV6, rhsV7, rhsV8, rhsV9; \
-  MICRO_MMA_TYPE_PEEL(func,func2,type,0); MICRO_MMA_TYPE_PEEL(func,func2,type,1); \
-  MICRO_MMA_TYPE_PEEL(func,func2,type,2); MICRO_MMA_TYPE_PEEL(func,func2,type,3); \
-  MICRO_MMA_TYPE_PEEL(func,func2,type,4); MICRO_MMA_TYPE_PEEL(func,func2,type,5); \
-  MICRO_MMA_TYPE_PEEL(func,func2,type,6); MICRO_MMA_TYPE_PEEL(func,func2,type,7); \
-  MICRO_MMA_TYPE_PEEL(func,func2,type,8); MICRO_MMA_TYPE_PEEL(func,func2,type,9);
-
-#define MICRO_MMA_UNROLL_TYPE_ONE(func, func2, type) \
-  type rhsV0; \
-  MICRO_MMA_TYPE_PEEL(func,func2,type,0);
-
-#define MICRO_MMA_ONE_PEEL \
-  if (sizeof(Scalar) == sizeof(float)) { \
-    MICRO_MMA_UNROLL_TYPE_PEEL(MICRO_MMA_WORK_ONE, MICRO_MMA_LOAD_ONE, RhsPacket); \
+#ifndef VECTOR_PAIR_LOADS_LHS
+#define MICRO_MMA_UNROLL_TYPE_PEEL(funcw, funcl, type) \
+  type rhsV[8]; \
+  MICRO_MMA_TYPE_PEEL(funcw,funcl,type,0) MICRO_MMA_TYPE_PEEL(funcw,funcl,type,1) \
+  MICRO_MMA_TYPE_PEEL(funcw,funcl,type,2) MICRO_MMA_TYPE_PEEL(funcw,funcl,type,3) \
+  MICRO_MMA_TYPE_PEEL(funcw,funcl,type,4) MICRO_MMA_TYPE_PEEL(funcw,funcl,type,5) \
+  MICRO_MMA_TYPE_PEEL(funcw,funcl,type,6) MICRO_MMA_TYPE_PEEL(funcw,funcl,type,7)
+#else
+#define MICRO_MMA_TYPE_PEEL2(funcw1, funcl1, funcw2, funcl2, type, peel1, peel2) \
+  if (PEEL_MMA > peel2) { \
+    PacketBlock<Packet,2> lhsV20, lhsV21, lhsV22, lhsV23, lhsV24, lhsV25, lhsV26, lhsV27; \
+    __vector_pair plhsV0, plhsV1, plhsV2, plhsV3, plhsV4, plhsV5, plhsV6, plhsV7; \
+    if (sizeof(type) == 16) { \
+      ploadRhsMMA(reinterpret_cast<const double*>(rhs_ptr + (accRows * peel1)), prhsV##peel1); \
+      __builtin_vsx_disassemble_pair(reinterpret_cast<void*>(&rhsV[peel1]), &prhsV##peel1); \
+    } else { \
+      EIGEN_UNUSED_VARIABLE(prhsV##peel1); \
+      ploadRhsMMA(rhs_ptr + (accRows * peel1), rhsV[peel1]); \
+      ploadRhsMMA(rhs_ptr + (accRows * peel2), rhsV[peel2]); \
+    } \
+    MICRO_MMA_UNROLL(funcl2) \
+    MICRO_MMA_WORK(funcw2, type, peel1) \
+    MICRO_MMA_WORK(funcw2, type, peel2) \
   } else { \
-    MICRO_MMA_UNROLL_TYPE_PEEL(MICRO_MMA_WORK_ONE, MICRO_MMA_LOAD_ONE, __vector_pair); \
-  } \
-  rhs_ptr += (accRows * PEEL_MMA);
+    EIGEN_UNUSED_VARIABLE(prhsV##peel1); \
+    MICRO_MMA_TYPE_PEEL(funcw1, funcl1, type, peel1) \
+  }
 
-#define MICRO_MMA_ONE \
-  if (sizeof(Scalar) == sizeof(float)) { \
-    MICRO_MMA_UNROLL_TYPE_ONE(MICRO_MMA_WORK_ONE, MICRO_MMA_LOAD_ONE, RhsPacket); \
-  } else { \
-    MICRO_MMA_UNROLL_TYPE_ONE(MICRO_MMA_WORK_ONE, MICRO_MMA_LOAD_ONE, __vector_pair); \
-  } \
-  rhs_ptr += accRows;
+#define MICRO_MMA_UNROLL_TYPE_PEEL2(funcw1, funcl1, funcw2, funcl2, type) \
+  type rhsV[8]; \
+  __vector_pair prhsV0, prhsV2, prhsV4, prhsV6; \
+  MICRO_MMA_TYPE_PEEL2(funcw1,funcl1,funcw2,funcl2,type,0,1) \
+  MICRO_MMA_TYPE_PEEL2(funcw1,funcl1,funcw2,funcl2,type,2,3) \
+  MICRO_MMA_TYPE_PEEL2(funcw1,funcl1,funcw2,funcl2,type,4,5) \
+  MICRO_MMA_TYPE_PEEL2(funcw1,funcl1,funcw2,funcl2,type,6,7)
+#endif
+
+#define MICRO_MMA_UNROLL_TYPE_ONE(funcw, funcl, type) \
+  type rhsV[1]; \
+  MICRO_MMA_TYPE_PEEL(funcw,funcl,type,0)
+
+#define MICRO_MMA_UNROLL_TYPE(MICRO_MMA_TYPE, size) \
+  MICRO_MMA_TYPE(MICRO_MMA_WORK_ONE, MICRO_LOAD_ONE, RhsPacket) \
+  rhs_ptr += (accRows * size);
+
+#ifndef VECTOR_PAIR_LOADS_LHS
+#define MICRO_MMA_ONE_PEEL MICRO_MMA_UNROLL_TYPE(MICRO_MMA_UNROLL_TYPE_PEEL, PEEL_MMA)
+#else
+#define MICRO_MMA_UNROLL_TYPE2(MICRO_MMA_TYPE, size) \
+  MICRO_MMA_TYPE(MICRO_MMA_WORK_ONE, MICRO_LOAD_ONE, MICRO_MMA_WORK_TWO, MICRO_MMA_LOAD_TWO, RhsPacket) \
+  rhs_ptr += (accRows * size);
+
+#define MICRO_MMA_ONE_PEEL MICRO_MMA_UNROLL_TYPE2(MICRO_MMA_UNROLL_TYPE_PEEL2, PEEL_MMA)
+#endif
+
+#define MICRO_MMA_ONE MICRO_MMA_UNROLL_TYPE(MICRO_MMA_UNROLL_TYPE_ONE, 1)
 
 #define MICRO_MMA_DST_PTR_ONE(iter) \
   if (unroll_factor > iter) { \
-    bsetzeroMMA<Scalar, Packet>(&accZero##iter); \
+    bsetzeroMMA(&accZero##iter); \
   } else { \
     EIGEN_UNUSED_VARIABLE(accZero##iter); \
   }
 
 #define MICRO_MMA_DST_PTR MICRO_MMA_UNROLL(MICRO_MMA_DST_PTR_ONE)
 
-#define MICRO_MMA_SRC_PTR_ONE(iter) \
-  if (unroll_factor > iter) { \
-    lhs_ptr##iter = lhs_base + ( (row/accCols) + iter )*strideA*accCols + accCols*offsetA; \
-  } else { \
-    EIGEN_UNUSED_VARIABLE(lhs_ptr##iter); \
-  }
+#define MICRO_MMA_SRC_PTR MICRO_MMA_UNROLL(MICRO_SRC_PTR_ONE)
 
-#define MICRO_MMA_SRC_PTR MICRO_MMA_UNROLL(MICRO_MMA_SRC_PTR_ONE)
-
-#define MICRO_MMA_PREFETCH_ONE(iter) \
-  if (unroll_factor > iter) { \
-    EIGEN_POWER_PREFETCH(lhs_ptr##iter); \
-  }
-
-#define MICRO_MMA_PREFETCH MICRO_MMA_UNROLL(MICRO_MMA_PREFETCH_ONE)
+#define MICRO_MMA_PREFETCH MICRO_MMA_UNROLL(MICRO_PREFETCH_ONE)
 
+#ifdef USE_PARTIAL_PACKETS
 #define MICRO_MMA_STORE_ONE(iter) \
   if (unroll_factor > iter) { \
-    storeAccumulator<DataMapper, Index, Packet, accCols>(row + iter*accCols, col, res, pAlpha, &accZero##iter); \
+    storeAccumulator<DataMapper, Packet, MICRO_NORMAL_PARTIAL(iter)>(row + iter*accCols, res, pAlpha, accCols2, &accZero##iter); \
   }
+#else
+#define MICRO_MMA_STORE_ONE(iter) \
+  if (unroll_factor > iter) { \
+    storeAccumulator<DataMapper, Packet, accCols, (unroll_factor != (iter + 1)) ? accCols : accCols2>(row + iter*accCols, res, pAlpha, pMask, &accZero##iter); \
+  }
+#endif
 
 #define MICRO_MMA_STORE MICRO_MMA_UNROLL(MICRO_MMA_STORE_ONE)
 
-template<int unroll_factor, typename Scalar, typename Packet, typename RhsPacket, typename DataMapper, typename Index, const Index accRows, const Index accCols>
-EIGEN_STRONG_INLINE void gemm_unrolled_MMA_iteration(
+#ifdef USE_PARTIAL_PACKETS
+template<int unroll_factor, typename Scalar, typename Packet, typename RhsPacket, typename DataMapper, const Index accRows, const Index accCols, bool full>
+#else
+template<int unroll_factor, typename Scalar, typename Packet, typename RhsPacket, typename DataMapper, const Index accRows, const Index accCols, const Index accCols2>
+#endif
+EIGEN_ALWAYS_INLINE void gemm_unrolled_MMA_iteration(
   const DataMapper& res,
   const Scalar* lhs_base,
   const Scalar* rhs_base,
@@ -252,8 +315,13 @@ EIGEN_STRONG_INLINE void gemm_unrolled_MMA_iteration(
   Index strideA,
   Index offsetA,
   Index& row,
-  Index col,
-  const Packet& pAlpha)
+  const Packet& pAlpha,
+#ifdef USE_PARTIAL_PACKETS
+  Index accCols2
+#else
+  const Packet& pMask
+#endif
+  )
 {
   const Scalar* rhs_ptr = rhs_base;
   const Scalar* lhs_ptr0 = NULL, * lhs_ptr1 = NULL, * lhs_ptr2 = NULL, * lhs_ptr3 = NULL, * lhs_ptr4 = NULL, * lhs_ptr5 = NULL, * lhs_ptr6 = NULL, * lhs_ptr7 = NULL;
@@ -262,8 +330,8 @@ EIGEN_STRONG_INLINE void gemm_unrolled_MMA_iteration(
   MICRO_MMA_SRC_PTR
   MICRO_MMA_DST_PTR
 
-  Index k = 0;
-  for(; k + PEEL_MMA <= depth; k+= PEEL_MMA)
+  Index k = 0, depth2 = depth - PEEL_MMA;
+  for(; k <= depth2; k += PEEL_MMA)
   {
     EIGEN_POWER_PREFETCH(rhs_ptr);
     MICRO_MMA_PREFETCH
@@ -275,181 +343,245 @@ EIGEN_STRONG_INLINE void gemm_unrolled_MMA_iteration(
   }
   MICRO_MMA_STORE
 
-  row += unroll_factor*accCols;
+  MICRO_UPDATE
 }
 
-template<typename Scalar, typename Index, typename Packet, typename RhsPacket, typename DataMapper, const Index accRows, const Index accCols>
+#ifdef USE_PARTIAL_PACKETS
+#define MICRO_MMA_UNROLL_ITER2(N, M) \
+  gemm_unrolled_MMA_iteration<N + (M ? 1 : 0), Scalar, Packet, RhsPacket, DataMapper, accRows, accCols, !M>(res3, lhs_base, rhs_base, depth, strideA, offsetA, row, pAlpha, M ? remaining_rows : accCols); \
+  if (M) return;
+#else
+#define MICRO_MMA_UNROLL_ITER2(N, M) \
+  gemm_unrolled_MMA_iteration<N + (M ? 1 : 0), Scalar, Packet, RhsPacket, DataMapper, accRows, accCols, M ? M : accCols>(res3, lhs_base, rhs_base, depth, strideA, offsetA, row, pAlpha, pMask); \
+  if (M) return;
+#endif
+
+template<typename Scalar, typename Packet, typename RhsPacket, typename DataMapper, const Index accRows, const Index accCols>
+EIGEN_ALWAYS_INLINE void gemmMMA_cols(
+  const DataMapper& res,
+  const Scalar* blockA,
+  const Scalar* blockB,
+  Index depth,
+  Index strideA,
+  Index offsetA,
+  Index strideB,
+  Index offsetB,
+  Index col,
+  Index rows,
+  Index remaining_rows,
+  const Packet& pAlpha,
+  const Packet& pMask)
+{
+  const DataMapper res3 = res.getSubMapper(0, col);
+
+  const Scalar* rhs_base = blockB + col*strideB + accRows*offsetB;
+  const Scalar* lhs_base = blockA + accCols*offsetA;
+  Index row = 0;
+
+#define MAX_MMA_UNROLL 7
+  while(row + MAX_MMA_UNROLL*accCols <= rows) {
+    MICRO_MMA_UNROLL_ITER2(MAX_MMA_UNROLL, 0);
+  }
+  switch( (rows-row)/accCols ) {
+#if MAX_MMA_UNROLL > 7
+    case 7:
+      MICRO_UNROLL_ITER(MICRO_MMA_UNROLL_ITER2, 7)
+      break;
+#endif
+#if MAX_MMA_UNROLL > 6
+    case 6:
+      MICRO_UNROLL_ITER(MICRO_MMA_UNROLL_ITER2, 6)
+      break;
+#endif
+#if MAX_MMA_UNROLL > 5
+    case 5:
+      MICRO_UNROLL_ITER(MICRO_MMA_UNROLL_ITER2, 5)
+      break;
+#endif
+#if MAX_MMA_UNROLL > 4
+    case 4:
+      MICRO_UNROLL_ITER(MICRO_MMA_UNROLL_ITER2, 4)
+      break;
+#endif
+#if MAX_MMA_UNROLL > 3
+    case 3:
+      MICRO_UNROLL_ITER(MICRO_MMA_UNROLL_ITER2, 3)
+      break;
+#endif
+#if MAX_MMA_UNROLL > 2
+    case 2:
+      MICRO_UNROLL_ITER(MICRO_MMA_UNROLL_ITER2, 2)
+      break;
+#endif
+#if MAX_MMA_UNROLL > 1
+    case 1:
+      MICRO_UNROLL_ITER(MICRO_MMA_UNROLL_ITER2, 1)
+      break;
+#endif
+    default:
+      break;
+  }
+#undef MAX_MMA_UNROLL
+
+  if(remaining_rows > 0)
+  {
+    gemm_extra_row<Scalar, Packet, DataMapper, accRows, accCols>(res3, blockA, rhs_base, depth, strideA, offsetA, strideB, row, rows, remaining_rows, pAlpha, pMask);
+  }
+}
+
+template<typename Scalar, typename Packet, typename RhsPacket, typename DataMapper, const Index accRows, const Index accCols>
 void gemmMMA(const DataMapper& res, const Scalar* blockA, const Scalar* blockB, Index rows, Index depth, Index cols, Scalar alpha, Index strideA, Index strideB, Index offsetA, Index offsetB)
 {
       const Index remaining_rows = rows % accCols;
-      const Index remaining_cols = cols % accRows;
 
       if( strideA == -1 ) strideA = depth;
       if( strideB == -1 ) strideB = depth;
 
       const Packet pAlpha = pset1<Packet>(alpha);
-      const Packet pMask  = bmask<Packet>((const int)(remaining_rows));
+      const Packet pMask  = bmask<Packet>(remaining_rows);
+
+      typedef typename std::conditional_t<(sizeof(Scalar) == sizeof(float)), RhsPacket, __vector_pair> RhsPacket2;
 
       Index col = 0;
       for(; col + accRows <= cols; col += accRows)
       {
-        const Scalar* rhs_base = blockB + col*strideB + accRows*offsetB;
-        const Scalar* lhs_base = blockA;
-
-        Index row = 0;
-#define MAX_MMA_UNROLL 7
-        while(row + MAX_MMA_UNROLL*accCols <= rows) {
-          gemm_unrolled_MMA_iteration<MAX_MMA_UNROLL, Scalar, Packet, RhsPacket, DataMapper, Index, accRows, accCols>(res, lhs_base, rhs_base, depth, strideA, offsetA, row, col, pAlpha);
-        }
-        switch( (rows-row)/accCols ) {
-#if MAX_MMA_UNROLL > 7
-          case 7:
-            gemm_unrolled_MMA_iteration<7, Scalar, Packet, RhsPacket, DataMapper, Index, accRows, accCols>(res, lhs_base, rhs_base, depth, strideA, offsetA, row, col, pAlpha);
-            break;
-#endif
-#if MAX_MMA_UNROLL > 6
-          case 6:
-            gemm_unrolled_MMA_iteration<6, Scalar, Packet, RhsPacket, DataMapper, Index, accRows, accCols>(res, lhs_base, rhs_base, depth, strideA, offsetA, row, col, pAlpha);
-            break;
-#endif
-#if MAX_MMA_UNROLL > 5
-          case 5:
-            gemm_unrolled_MMA_iteration<5, Scalar, Packet, RhsPacket, DataMapper, Index, accRows, accCols>(res, lhs_base, rhs_base, depth, strideA, offsetA, row, col, pAlpha);
-            break;
-#endif
-#if MAX_MMA_UNROLL > 4
-          case 4:
-            gemm_unrolled_MMA_iteration<4, Scalar, Packet, RhsPacket, DataMapper, Index, accRows, accCols>(res, lhs_base, rhs_base, depth, strideA, offsetA, row, col, pAlpha);
-            break;
-#endif
-#if MAX_MMA_UNROLL > 3
-          case 3:
-            gemm_unrolled_MMA_iteration<3, Scalar, Packet, RhsPacket, DataMapper, Index, accRows, accCols>(res, lhs_base, rhs_base, depth, strideA, offsetA, row, col, pAlpha);
-            break;
-#endif
-#if MAX_MMA_UNROLL > 2
-          case 2:
-            gemm_unrolled_MMA_iteration<2, Scalar, Packet, RhsPacket, DataMapper, Index, accRows, accCols>(res, lhs_base, rhs_base, depth, strideA, offsetA, row, col, pAlpha);
-            break;
-#endif
-#if MAX_MMA_UNROLL > 1
-          case 1:
-            gemm_unrolled_MMA_iteration<1, Scalar, Packet, RhsPacket, DataMapper, Index, accRows, accCols>(res, lhs_base, rhs_base, depth, strideA, offsetA, row, col, pAlpha);
-            break;
-#endif
-          default:
-            break;
-        }
-#undef MAX_MMA_UNROLL
-
-        if(remaining_rows > 0)
-        {
-          gemm_extra_row<Scalar, Packet, DataMapper, Index, accRows, accCols>(res, lhs_base, rhs_base, depth, strideA, offsetA, row, col, rows, cols, remaining_rows, pAlpha, pMask);
-        }
+        gemmMMA_cols<Scalar, Packet, RhsPacket2, DataMapper, accRows, accCols>(res, blockA, blockB, depth, strideA, offsetA, strideB, offsetB, col, rows, remaining_rows, pAlpha, pMask);
       }
 
-      if(remaining_cols > 0)
+      if (col != cols)
       {
-        const Scalar* rhs_base = blockB + col*strideB + remaining_cols*offsetB;
-        const Scalar* lhs_base = blockA;
-
-        for(; col < cols; col++)
-        {
-          Index row = 0;
-
-          gemm_unrolled_col<Scalar, Packet, DataMapper, Index, accCols>(res, lhs_base, rhs_base, depth, strideA, offsetA, row, rows, col, remaining_cols, pAlpha);
-
-          if (remaining_rows > 0)
-          {
-            gemm_extra_col<Scalar, Packet, DataMapper, Index, accRows>(res, lhs_base, rhs_base, depth, strideA, offsetA, row, col, remaining_rows, remaining_cols, pAlpha);
-          }
-          rhs_base++;
-        }
+        gemm_extra_cols<Scalar, Packet, DataMapper, accCols>(res, blockA, blockB, depth, strideA, offsetA, strideB, offsetB, col, rows, cols, remaining_rows, pAlpha, pMask);
       }
 }
 
-#define accColsC (accCols / 2)
 #define advanceRows ((LhsIsReal) ? 1 : 2)
 #define advanceCols ((RhsIsReal) ? 1 : 2)
 
 // PEEL_COMPLEX_MMA loop factor.
-#define PEEL_COMPLEX_MMA 7
+#define PEEL_COMPLEX_MMA 3
 
 #define MICRO_COMPLEX_MMA_UNROLL(func) \
-  func(0) func(1) func(2) func(3) func(4)
+  func(0) func(1) func(2) func(3)
 
-#define MICRO_COMPLEX_MMA_LOAD_ONE(iter) \
-  if (unroll_factor > iter) { \
-    lhsV##iter = ploadLhs<Scalar, Packet>(lhs_ptr_real##iter); \
-    lhs_ptr_real##iter += accCols; \
-    if(!LhsIsReal) { \
-      lhsVi##iter = ploadLhs<Scalar, Packet>(lhs_ptr_imag##iter); \
-      lhs_ptr_imag##iter += accCols; \
-    } else { \
-      EIGEN_UNUSED_VARIABLE(lhsVi##iter); \
-    } \
-  } else { \
-    EIGEN_UNUSED_VARIABLE(lhsV##iter); \
-    EIGEN_UNUSED_VARIABLE(lhsVi##iter); \
-  }
+#define MICRO_COMPLEX_MMA_WORK(func, type, peel) \
+  func(0,type,peel) func(1,type,peel) func(2,type,peel) func(3,type,peel)
 
 #define MICRO_COMPLEX_MMA_WORK_ONE(iter, type, peel) \
   if (unroll_factor > iter) { \
-    pgercMMA<Scalar, Packet, type, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(&accReal##iter, &accImag##iter, lhsV##iter, lhsVi##iter, rhsV##peel, rhsVi##peel); \
+    pgercMMA<Packet, type, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(&accReal##iter, &accImag##iter, lhsV##iter, lhsVi##iter, rhsV[peel], rhsVi[peel]); \
   }
 
-#define MICRO_COMPLEX_MMA_TYPE_PEEL(func, func2, type, peel) \
-  if (PEEL_COMPLEX_MMA > peel) { \
-    Packet lhsV0, lhsV1, lhsV2, lhsV3, lhsV4; \
-    Packet lhsVi0, lhsVi1, lhsVi2, lhsVi3, lhsVi4; \
-    ploadRhsMMA<Scalar, type>(rhs_ptr_real + (accRows * peel), rhsV##peel); \
-    if(!RhsIsReal) { \
-      ploadRhsMMA<Scalar, type>(rhs_ptr_imag + (accRows * peel), rhsVi##peel); \
+#ifdef VECTOR_PAIR_LOADS_LHS
+#define MICRO_COMPLEX_MMA_WORK_TWO(iter, type, peel) \
+  if (unroll_factor > iter) { \
+    pgercMMA<Packet, type, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(&accReal##iter, &accImag##iter, lhsV2##iter.packet[peel & 1], lhsVi2##iter.packet[peel & 1], rhsV[peel], rhsVi[peel]); \
+  }
+
+#define MICRO_COMPLEX_MMA_LOAD1_TWO(lhs_ptr, iter) \
+  if (!LhsIsReal && (unroll_factor > iter)) { \
+    if (MICRO_NORMAL(iter)) { \
+      ploadLhsMMA(reinterpret_cast<const double*>(lhs_ptr_real##iter + imag_delta), plhsVi##iter); \
+      __builtin_vsx_disassemble_pair(reinterpret_cast<void*>(&lhsVi2##iter.packet), &plhsVi##iter); \
     } else { \
-      EIGEN_UNUSED_VARIABLE(rhsVi##peel); \
+      lhsVi2##iter.packet[0] = ploadLhs<Packet>(lhs_ptr_real##iter + imag_delta2); \
+      lhsVi2##iter.packet[1] = ploadLhs<Packet>(lhs_ptr_real##iter + imag_delta2 + accCols2); \
+      EIGEN_UNUSED_VARIABLE(plhsVi##iter) \
     } \
-    MICRO_COMPLEX_MMA_UNROLL(func2); \
-    func(0,type,peel) func(1,type,peel) func(2,type,peel) func(3,type,peel) func(4,type,peel) \
   } else { \
-    EIGEN_UNUSED_VARIABLE(rhsV##peel); \
-    EIGEN_UNUSED_VARIABLE(rhsVi##peel); \
+    EIGEN_UNUSED_VARIABLE(lhsVi2##iter); \
+    EIGEN_UNUSED_VARIABLE(plhsVi##iter) \
+  } \
+  MICRO_MMA_LOAD1_TWO(lhs_ptr_real, iter)
+
+#define MICRO_COMPLEX_MMA_LOAD_TWO(iter) MICRO_COMPLEX_MMA_LOAD1_TWO(lhs_ptr, iter)
+#endif
+
+#define MICRO_COMPLEX_MMA_TYPE_PEEL(funcw, funcl, type, peel) \
+  if (PEEL_COMPLEX_MMA > peel) { \
+    Packet lhsV0, lhsV1, lhsV2, lhsV3; \
+    Packet lhsVi0, lhsVi1, lhsVi2, lhsVi3; \
+    ploadRhsMMA(rhs_ptr_real + (accRows * peel), rhsV[peel]); \
+    if(!RhsIsReal) { \
+      ploadRhsMMA(rhs_ptr_imag + (accRows * peel), rhsVi[peel]); \
+    } \
+    MICRO_COMPLEX_MMA_UNROLL(funcl) \
+    MICRO_COMPLEX_MMA_WORK(funcw, type, peel) \
   }
 
-#define MICRO_COMPLEX_MMA_UNROLL_TYPE_PEEL(func, func2, type) \
-  type rhsV0, rhsV1, rhsV2, rhsV3, rhsV4, rhsV5, rhsV6, rhsV7, rhsV8, rhsV9; \
-  type rhsVi0, rhsVi1, rhsVi2, rhsVi3, rhsVi4, rhsVi5, rhsVi6, rhsVi7, rhsVi8, rhsVi9; \
-  MICRO_COMPLEX_MMA_TYPE_PEEL(func,func2,type,0); MICRO_COMPLEX_MMA_TYPE_PEEL(func,func2,type,1); \
-  MICRO_COMPLEX_MMA_TYPE_PEEL(func,func2,type,2); MICRO_COMPLEX_MMA_TYPE_PEEL(func,func2,type,3); \
-  MICRO_COMPLEX_MMA_TYPE_PEEL(func,func2,type,4); MICRO_COMPLEX_MMA_TYPE_PEEL(func,func2,type,5); \
-  MICRO_COMPLEX_MMA_TYPE_PEEL(func,func2,type,6); MICRO_COMPLEX_MMA_TYPE_PEEL(func,func2,type,7); \
-  MICRO_COMPLEX_MMA_TYPE_PEEL(func,func2,type,8); MICRO_COMPLEX_MMA_TYPE_PEEL(func,func2,type,9);
-
-#define MICRO_COMPLEX_MMA_UNROLL_TYPE_ONE(func, func2, type) \
-  type rhsV0, rhsVi0; \
-  MICRO_COMPLEX_MMA_TYPE_PEEL(func,func2,type,0);
-
-#define MICRO_COMPLEX_MMA_ONE_PEEL \
-  if (sizeof(Scalar) == sizeof(float)) { \
-    MICRO_COMPLEX_MMA_UNROLL_TYPE_PEEL(MICRO_COMPLEX_MMA_WORK_ONE, MICRO_COMPLEX_MMA_LOAD_ONE, RhsPacket); \
+#ifndef VECTOR_PAIR_LOADS_LHS
+#define MICRO_COMPLEX_MMA_UNROLL_TYPE_PEEL(funcw, funcl, type) \
+  type rhsV[4], rhsVi[4]; \
+  MICRO_COMPLEX_MMA_TYPE_PEEL(funcw,funcl,type,0) MICRO_COMPLEX_MMA_TYPE_PEEL(funcw,funcl,type,1) \
+  MICRO_COMPLEX_MMA_TYPE_PEEL(funcw,funcl,type,2) MICRO_COMPLEX_MMA_TYPE_PEEL(funcw,funcl,type,3)
+#else
+#define MICRO_COMPLEX_MMA_TYPE_PEEL2(funcw1, funcl1, funcw2, funcl2, type, peel1, peel2) \
+  if (PEEL_COMPLEX_MMA > peel2) { \
+    PacketBlock<Packet,2> lhsV20, lhsV21, lhsV22, lhsV23; \
+    PacketBlock<Packet,2> lhsVi20, lhsVi21, lhsVi22, lhsVi23; \
+    __vector_pair plhsV0, plhsV1, plhsV2, plhsV3; \
+    __vector_pair plhsVi0, plhsVi1, plhsVi2, plhsVi3; \
+    if (sizeof(type) == 16) { \
+      ploadRhsMMA(reinterpret_cast<const double*>(rhs_ptr_real + (accRows * peel1)), prhsV##peel1); \
+      __builtin_vsx_disassemble_pair(reinterpret_cast<void*>(&rhsV[peel1]), &prhsV##peel1); \
+      if(!RhsIsReal) { \
+        ploadRhsMMA(reinterpret_cast<const double*>(rhs_ptr_imag + (accRows * peel1)), prhsVi##peel1); \
+        __builtin_vsx_disassemble_pair(reinterpret_cast<void*>(&rhsVi[peel1]), &prhsVi##peel1); \
+      } else { \
+        EIGEN_UNUSED_VARIABLE(prhsVi##peel1); \
+      } \
+    } else { \
+      EIGEN_UNUSED_VARIABLE(prhsV##peel1); \
+      EIGEN_UNUSED_VARIABLE(prhsVi##peel1); \
+      ploadRhsMMA(rhs_ptr_real + (accRows * peel1), rhsV[peel1]); \
+      ploadRhsMMA(rhs_ptr_real + (accRows * peel2), rhsV[peel2]); \
+      if(!RhsIsReal) { \
+        ploadRhsMMA(rhs_ptr_imag + (accRows * peel1), rhsVi[peel1]); \
+        ploadRhsMMA(rhs_ptr_imag + (accRows * peel2), rhsVi[peel2]); \
+      } \
+    } \
+    MICRO_COMPLEX_MMA_UNROLL(funcl2) \
+    MICRO_COMPLEX_MMA_WORK(funcw2, type, peel1) \
+    MICRO_COMPLEX_MMA_WORK(funcw2, type, peel2) \
   } else { \
-    MICRO_COMPLEX_MMA_UNROLL_TYPE_PEEL(MICRO_COMPLEX_MMA_WORK_ONE, MICRO_COMPLEX_MMA_LOAD_ONE, __vector_pair); \
-  } \
-  rhs_ptr_real += (accRows * PEEL_COMPLEX_MMA); \
-  if(!RhsIsReal) rhs_ptr_imag += (accRows * PEEL_COMPLEX_MMA);
+    EIGEN_UNUSED_VARIABLE(prhsV##peel1); \
+    EIGEN_UNUSED_VARIABLE(prhsVi##peel1); \
+    MICRO_COMPLEX_MMA_TYPE_PEEL(funcw1, funcl1, type, peel1) \
+  }
 
-#define MICRO_COMPLEX_MMA_ONE \
-  if (sizeof(Scalar) == sizeof(float)) { \
-    MICRO_COMPLEX_MMA_UNROLL_TYPE_ONE(MICRO_COMPLEX_MMA_WORK_ONE, MICRO_COMPLEX_MMA_LOAD_ONE, RhsPacket); \
-  } else { \
-    MICRO_COMPLEX_MMA_UNROLL_TYPE_ONE(MICRO_COMPLEX_MMA_WORK_ONE, MICRO_COMPLEX_MMA_LOAD_ONE, __vector_pair); \
-  } \
-  rhs_ptr_real += accRows; \
-  if(!RhsIsReal) rhs_ptr_imag += accRows;
+#define MICRO_COMPLEX_MMA_UNROLL_TYPE_PEEL2(funcw1, funcl1, funcw2, funcl2, type) \
+  type rhsV[4], rhsVi[4]; \
+  __vector_pair prhsV0, prhsV2; \
+  __vector_pair prhsVi0, prhsVi2; \
+  MICRO_COMPLEX_MMA_TYPE_PEEL2(funcw1,funcl1,funcw2,funcl2,type,0,1) \
+  MICRO_COMPLEX_MMA_TYPE_PEEL2(funcw1,funcl1,funcw2,funcl2,type,2,3)
+#endif
+
+#define MICRO_COMPLEX_MMA_UNROLL_TYPE_ONE(funcw, funcl, type) \
+  type rhsV[1], rhsVi[1]; \
+  MICRO_COMPLEX_MMA_TYPE_PEEL(funcw,funcl,type,0)
+
+#define MICRO_COMPLEX_MMA_UNROLL_TYPE(MICRO_COMPLEX_MMA_TYPE, size) \
+  MICRO_COMPLEX_MMA_TYPE(MICRO_COMPLEX_MMA_WORK_ONE, MICRO_COMPLEX_LOAD_ONE, RhsPacket) \
+  rhs_ptr_real += (accRows * size); \
+  if(!RhsIsReal) rhs_ptr_imag += (accRows * size);
+
+#ifndef VECTOR_PAIR_LOADS_LHS
+#define MICRO_COMPLEX_MMA_ONE_PEEL MICRO_COMPLEX_MMA_UNROLL_TYPE(MICRO_COMPLEX_MMA_UNROLL_TYPE_PEEL, PEEL_COMPLEX_MMA)
+#else
+#define MICRO_COMPLEX_MMA_UNROLL_TYPE2(MICRO_COMPLEX_MMA_TYPE, size) \
+  MICRO_COMPLEX_MMA_TYPE(MICRO_COMPLEX_MMA_WORK_ONE, MICRO_COMPLEX_LOAD_ONE, MICRO_COMPLEX_MMA_WORK_TWO, MICRO_COMPLEX_MMA_LOAD_TWO, RhsPacket) \
+  rhs_ptr_real += (accRows * size); \
+  if(!RhsIsReal) rhs_ptr_imag += (accRows * size);
+
+#define MICRO_COMPLEX_MMA_ONE_PEEL MICRO_COMPLEX_MMA_UNROLL_TYPE2(MICRO_COMPLEX_MMA_UNROLL_TYPE_PEEL2, PEEL_COMPLEX_MMA)
+#endif
+
+#define MICRO_COMPLEX_MMA_ONE MICRO_COMPLEX_MMA_UNROLL_TYPE(MICRO_COMPLEX_MMA_UNROLL_TYPE_ONE, 1)
 
 #define MICRO_COMPLEX_MMA_DST_PTR_ONE(iter) \
   if (unroll_factor > iter) { \
-    bsetzeroMMA<Scalar, Packet>(&accReal##iter); \
-    bsetzeroMMA<Scalar, Packet>(&accImag##iter); \
+    bsetzeroMMA(&accReal##iter); \
+    bsetzeroMMA(&accImag##iter); \
   } else { \
     EIGEN_UNUSED_VARIABLE(accReal##iter); \
     EIGEN_UNUSED_VARIABLE(accImag##iter); \
@@ -457,40 +589,19 @@ void gemmMMA(const DataMapper& res, const Scalar* blockA, const Scalar* blockB,
 
 #define MICRO_COMPLEX_MMA_DST_PTR MICRO_COMPLEX_MMA_UNROLL(MICRO_COMPLEX_MMA_DST_PTR_ONE)
 
-#define MICRO_COMPLEX_MMA_SRC_PTR_ONE(iter) \
-  if (unroll_factor > iter) { \
-    lhs_ptr_real##iter = lhs_base + ( ((advanceRows*row)/accCols) + iter*advanceRows )*strideA*accCols + accCols*offsetA; \
-    if(!LhsIsReal) { \
-      lhs_ptr_imag##iter = lhs_ptr_real##iter + accCols*strideA; \
-    } else { \
-      EIGEN_UNUSED_VARIABLE(lhs_ptr_imag##iter); \
-    } \
-  } else { \
-    EIGEN_UNUSED_VARIABLE(lhs_ptr_real##iter); \
-    EIGEN_UNUSED_VARIABLE(lhs_ptr_imag##iter); \
-  }
+#define MICRO_COMPLEX_MMA_SRC_PTR MICRO_COMPLEX_MMA_UNROLL(MICRO_COMPLEX_SRC_PTR_ONE)
 
-#define MICRO_COMPLEX_MMA_SRC_PTR MICRO_COMPLEX_MMA_UNROLL(MICRO_COMPLEX_MMA_SRC_PTR_ONE)
-
-#define MICRO_COMPLEX_MMA_PREFETCH_ONE(iter) \
-  if (unroll_factor > iter) { \
-    EIGEN_POWER_PREFETCH(lhs_ptr_real##iter); \
-    if(!LhsIsReal) { \
-      EIGEN_POWER_PREFETCH(lhs_ptr_imag##iter); \
-    } \
-  }
-
-#define MICRO_COMPLEX_MMA_PREFETCH MICRO_COMPLEX_MMA_UNROLL(MICRO_COMPLEX_MMA_PREFETCH_ONE)
+#define MICRO_COMPLEX_MMA_PREFETCH MICRO_COMPLEX_MMA_UNROLL(MICRO_COMPLEX_PREFETCH_ONE)
 
 #define MICRO_COMPLEX_MMA_STORE_ONE(iter) \
   if (unroll_factor > iter) { \
-    storeComplexAccumulator<DataMapper, Index, Packet, Packetc, accColsC, 0>(row + iter*accCols, col, res, pAlphaReal, pAlphaImag, &accReal##iter, &accImag##iter); \
+    storeComplexAccumulator<DataMapper, Packet, Packetc, accCols, (unroll_factor != (iter + 1)) ? accCols : accCols2>(row + iter*accCols, res, pAlphaReal, pAlphaImag, pMask, &accReal##iter, &accImag##iter); \
   }
 
 #define MICRO_COMPLEX_MMA_STORE MICRO_COMPLEX_MMA_UNROLL(MICRO_COMPLEX_MMA_STORE_ONE)
 
-template<int unroll_factor, typename Scalar, typename Packet, typename Packetc, typename RhsPacket, typename DataMapper, typename Index, const Index accRows, const Index accCols, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>
-EIGEN_STRONG_INLINE void gemm_complex_unrolled_MMA_iteration(
+template<int unroll_factor, typename Scalar, typename Packet, typename Packetc, typename RhsPacket, typename DataMapper, const Index accRows, const Index accCols, const Index accCols2, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>
+EIGEN_ALWAYS_INLINE void gemm_complex_unrolled_MMA_iteration(
   const DataMapper& res,
   const Scalar* lhs_base,
   const Scalar* rhs_base,
@@ -499,27 +610,28 @@ EIGEN_STRONG_INLINE void gemm_complex_unrolled_MMA_iteration(
   Index offsetA,
   Index strideB,
   Index& row,
-  Index col,
   const Packet& pAlphaReal,
-  const Packet& pAlphaImag)
+  const Packet& pAlphaImag,
+  const Packet& pMask)
 {
   const Scalar* rhs_ptr_real = rhs_base;
-  const Scalar* rhs_ptr_imag;
+  const Scalar* rhs_ptr_imag = NULL;
+  const Index imag_delta = accCols*strideA;
+  const Index imag_delta2 = accCols2*strideA;
   if(!RhsIsReal) {
     rhs_ptr_imag = rhs_base + accRows*strideB;
   } else {
     EIGEN_UNUSED_VARIABLE(rhs_ptr_imag);
   }
-  const Scalar* lhs_ptr_real0 = NULL, * lhs_ptr_imag0 = NULL, * lhs_ptr_real1 = NULL, * lhs_ptr_imag1 = NULL;
-  const Scalar* lhs_ptr_real2 = NULL, * lhs_ptr_imag2 = NULL, * lhs_ptr_real3 = NULL, * lhs_ptr_imag3 = NULL;
-  const Scalar* lhs_ptr_real4 = NULL, * lhs_ptr_imag4 = NULL;
-  __vector_quad accReal0, accImag0, accReal1, accImag1, accReal2, accImag2, accReal3, accImag3, accReal4, accImag4;
+  const Scalar* lhs_ptr_real0 = NULL, * lhs_ptr_real1 = NULL;
+  const Scalar* lhs_ptr_real2 = NULL, * lhs_ptr_real3 = NULL;
+  __vector_quad accReal0, accImag0, accReal1, accImag1, accReal2, accImag2, accReal3, accImag3;
 
   MICRO_COMPLEX_MMA_SRC_PTR
   MICRO_COMPLEX_MMA_DST_PTR
 
-  Index k = 0;
-  for(; k + PEEL_COMPLEX_MMA <= depth; k+= PEEL_COMPLEX_MMA)
+  Index k = 0, depth2 = depth - PEEL_COMPLEX_MMA;
+  for(; k <= depth2; k += PEEL_COMPLEX_MMA)
   {
     EIGEN_POWER_PREFETCH(rhs_ptr_real);
     if(!RhsIsReal) {
@@ -534,85 +646,98 @@ EIGEN_STRONG_INLINE void gemm_complex_unrolled_MMA_iteration(
   }
   MICRO_COMPLEX_MMA_STORE
 
-  row += unroll_factor*accCols;
+  MICRO_COMPLEX_UPDATE
 }
 
-template<typename LhsScalar, typename RhsScalar, typename Scalarc, typename Scalar, typename Index, typename Packet, typename Packetc, typename RhsPacket, typename DataMapper, const Index accRows, const Index accCols, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>
+#define MICRO_COMPLEX_MMA_UNROLL_ITER2(N, M) \
+  gemm_complex_unrolled_MMA_iteration<N + (M ? 1 : 0), Scalar, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, M ? M : accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res3, lhs_base, rhs_base, depth, strideA, offsetA, strideB, row, pAlphaReal, pAlphaImag, pMask); \
+  if (M) return;
+
+template<typename Scalar, typename Packet, typename Packetc, typename RhsPacket, typename DataMapper, const Index accRows, const Index accCols, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>
+EIGEN_ALWAYS_INLINE void gemmMMA_complex_cols(
+  const DataMapper& res,
+  const Scalar* blockA,
+  const Scalar* blockB,
+  Index depth,
+  Index strideA,
+  Index offsetA,
+  Index strideB,
+  Index offsetB,
+  Index col,
+  Index rows,
+  Index remaining_rows,
+  const Packet& pAlphaReal,
+  const Packet& pAlphaImag,
+  const Packet& pMask)
+{
+  const DataMapper res3 = res.getSubMapper(0, col);
+
+  const Scalar* rhs_base = blockB + advanceCols*col*strideB + accRows*offsetB;
+  const Scalar* lhs_base = blockA + accCols*offsetA;
+  Index row = 0;
+
+#define MAX_COMPLEX_MMA_UNROLL 4
+  while(row + MAX_COMPLEX_MMA_UNROLL*accCols <= rows) {
+    MICRO_COMPLEX_MMA_UNROLL_ITER2(MAX_COMPLEX_MMA_UNROLL, 0);
+  }
+  switch( (rows-row)/accCols ) {
+#if MAX_COMPLEX_MMA_UNROLL > 4
+    case 4:
+      MICRO_COMPLEX_UNROLL_ITER(MICRO_COMPLEX_MMA_UNROLL_ITER2, 4)
+      break;
+#endif
+#if MAX_COMPLEX_MMA_UNROLL > 3
+    case 3:
+      MICRO_COMPLEX_UNROLL_ITER(MICRO_COMPLEX_MMA_UNROLL_ITER2, 3)
+      break;
+#endif
+#if MAX_COMPLEX_MMA_UNROLL > 2
+    case 2:
+      MICRO_COMPLEX_UNROLL_ITER(MICRO_COMPLEX_MMA_UNROLL_ITER2, 2)
+      break;
+#endif
+#if MAX_COMPLEX_MMA_UNROLL > 1
+    case 1:
+      MICRO_COMPLEX_UNROLL_ITER(MICRO_COMPLEX_MMA_UNROLL_ITER2, 1)
+      break;
+#endif
+    default:
+      break;
+  }
+#undef MAX_COMPLEX_MMA_UNROLL
+
+  if(remaining_rows > 0)
+  {
+    gemm_complex_extra_row<Scalar, Packet, Packetc, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res3, blockA, rhs_base, depth, strideA, offsetA, strideB, row, rows, remaining_rows, pAlphaReal, pAlphaImag, pMask);
+  }
+}
+
+template<typename LhsScalar, typename RhsScalar, typename Scalarc, typename Scalar, typename Packet, typename Packetc, typename RhsPacket, typename DataMapper, const Index accRows, const Index accCols, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>
 void gemm_complexMMA(const DataMapper& res, const LhsScalar* blockAc, const RhsScalar* blockBc, Index rows, Index depth, Index cols, Scalarc alpha, Index strideA, Index strideB, Index offsetA, Index offsetB)
 {
       const Index remaining_rows = rows % accCols;
-      const Index remaining_cols = cols % accRows;
 
       if( strideA == -1 ) strideA = depth;
       if( strideB == -1 ) strideB = depth;
 
       const Packet pAlphaReal = pset1<Packet>(alpha.real());
       const Packet pAlphaImag = pset1<Packet>(alpha.imag());
-      const Packet pMask = bmask<Packet>((const int)(remaining_rows));
+      const Packet pMask = bmask<Packet>(remaining_rows);
 
       const Scalar* blockA = (Scalar *) blockAc;
       const Scalar* blockB = (Scalar *) blockBc;
 
+      typedef typename std::conditional_t<(sizeof(Scalar) == sizeof(float)), RhsPacket, __vector_pair> RhsPacket2;
+
       Index col = 0;
       for(; col + accRows <= cols; col += accRows)
       {
-        const Scalar* rhs_base = blockB + advanceCols*col*strideB + accRows*offsetB;
-        const Scalar* lhs_base = blockA;
-        Index row = 0;
-
-#define MAX_COMPLEX_MMA_UNROLL 4
-        while(row + MAX_COMPLEX_MMA_UNROLL*accCols <= rows) {
-          gemm_complex_unrolled_MMA_iteration<MAX_COMPLEX_MMA_UNROLL, Scalar, Packet, Packetc, RhsPacket, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res, lhs_base, rhs_base, depth, strideA, offsetA, strideB, row, col, pAlphaReal, pAlphaImag);
-        }
-        switch( (rows-row)/accCols ) {
-#if MAX_COMPLEX_MMA_UNROLL > 4
-          case 4:
-            gemm_complex_unrolled_MMA_iteration<4, Scalar, Packet, Packetc, RhsPacket, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res, lhs_base, rhs_base, depth, strideA, offsetA, strideB, row, col, pAlphaReal, pAlphaImag);
-            break;
-#endif
-#if MAX_COMPLEX_MMA_UNROLL > 3
-          case 3:
-            gemm_complex_unrolled_MMA_iteration<3, Scalar, Packet, Packetc, RhsPacket, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res, lhs_base, rhs_base, depth, strideA, offsetA, strideB, row, col, pAlphaReal, pAlphaImag);
-            break;
-#endif
-#if MAX_COMPLEX_MMA_UNROLL > 2
-          case 2:
-            gemm_complex_unrolled_MMA_iteration<2, Scalar, Packet, Packetc, RhsPacket, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res, lhs_base, rhs_base, depth, strideA, offsetA, strideB, row, col, pAlphaReal, pAlphaImag);
-            break;
-#endif
-#if MAX_COMPLEX_MMA_UNROLL > 1
-          case 1:
-            gemm_complex_unrolled_MMA_iteration<1, Scalar, Packet, Packetc, RhsPacket, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res, lhs_base, rhs_base, depth, strideA, offsetA, strideB, row, col, pAlphaReal, pAlphaImag);
-            break;
-#endif
-          default:
-            break;
-        }
-#undef MAX_COMPLEX_MMA_UNROLL
-
-        if(remaining_rows > 0)
-        {
-          gemm_complex_extra_row<Scalar, Packet, Packetc, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res, lhs_base, rhs_base, depth, strideA, offsetA, strideB, row, col, rows, cols, remaining_rows, pAlphaReal, pAlphaImag, pMask);
-        }
+        gemmMMA_complex_cols<Scalar, Packet, Packetc, RhsPacket2, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res, blockA, blockB, depth, strideA, offsetA, strideB, offsetB, col, rows, remaining_rows, pAlphaReal, pAlphaImag, pMask);
       }
 
-      if(remaining_cols > 0)
+      if (col != cols)
       {
-        const Scalar* rhs_base = blockB + advanceCols*col*strideB + remaining_cols*offsetB;
-        const Scalar* lhs_base = blockA;
-
-        for(; col < cols; col++)
-        {
-          Index row = 0;
-
-          gemm_complex_unrolled_col<Scalar, Packet, Packetc, DataMapper, Index, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res, lhs_base, rhs_base, depth, strideA, offsetA, strideB, row, rows, col, remaining_cols, pAlphaReal, pAlphaImag);
-
-          if (remaining_rows > 0)
-          {
-            gemm_complex_extra_col<Scalar, Packet, Packetc, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res, lhs_base, rhs_base, depth, strideA, offsetA, strideB, row, col, remaining_rows, remaining_cols, pAlphaReal, pAlphaImag);
-          }
-          rhs_base++;
-        }
+        gemm_complex_extra_cols<Scalar, Packet, Packetc, DataMapper, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res, blockA, blockB, depth, strideA, offsetA, strideB, offsetB, col, rows, cols, remaining_rows, pAlphaReal, pAlphaImag, pMask);
       }
 }
 
@@ -620,10 +745,13 @@ void gemm_complexMMA(const DataMapper& res, const LhsScalar* blockAc, const RhsS
 #undef advanceRows
 #undef advanceCols
 
-#pragma GCC reset_options
 } // end namespace internal
 
 } // end namespace Eigen
 
+#if defined(EIGEN_ALTIVEC_MMA_DYNAMIC_DISPATCH)
+#pragma GCC pop_options
+#endif
+
 #endif // EIGEN_MATRIX_PRODUCT_MMA_ALTIVEC_H
 
diff --git a/libs/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h b/libs/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h
new file mode 100644
index 0000000..b3e063d
--- /dev/null
+++ b/libs/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h
@@ -0,0 +1,385 @@
+ #ifndef EIGEN_MATRIX_PRODUCT_MMA_BFLOAT16_ALTIVEC_H
+ #define EIGEN_MATRIX_PRODUCT_MMA_BFLOAT16_ALTIVEC_H
+
+namespace Eigen {
+
+namespace internal {
+
+EIGEN_STRONG_INLINE void pgerMMAbfloat16(__vector_quad* acc, const Packet8bf& a, const Packet8bf& b, int maskX, int maskY)
+{
+  switch(maskX){
+    case 15:
+      switch(maskY){
+        case 0b1111:          
+          __builtin_mma_xvbf16ger2pp(acc, reinterpret_cast<Packet16uc>(a.m_val), reinterpret_cast<Packet16uc>(b.m_val));
+          break;
+        case 0b0011:
+          __builtin_mma_pmxvbf16ger2pp(acc, reinterpret_cast<Packet16uc>(a.m_val), reinterpret_cast<Packet16uc>(b.m_val), 0b1111, 0b11, 0b11);
+          break;
+        case 0b0001:
+          __builtin_mma_pmxvbf16ger2pp(acc, reinterpret_cast<Packet16uc>(a.m_val), reinterpret_cast<Packet16uc>(b.m_val), 0b1111, 0b1, 0b11);
+          break;
+        case 0b0111:
+          __builtin_mma_pmxvbf16ger2pp(acc, reinterpret_cast<Packet16uc>(a.m_val), reinterpret_cast<Packet16uc>(b.m_val), 0b1111, 0b111, 0b11);
+          break;
+      }
+      break;
+    case 3:
+      switch(maskY){
+        case 0b1111:
+          __builtin_mma_xvbf16ger2pp(acc, reinterpret_cast<Packet16uc>(a.m_val), reinterpret_cast<Packet16uc>(b.m_val));
+          break;
+        case 0b0011:
+          __builtin_mma_pmxvbf16ger2pp(acc, reinterpret_cast<Packet16uc>(a.m_val), reinterpret_cast<Packet16uc>(b.m_val), 0b11, 0b11, 0b11);
+          break;
+        case 0b0001:
+          __builtin_mma_pmxvbf16ger2pp(acc, reinterpret_cast<Packet16uc>(a.m_val), reinterpret_cast<Packet16uc>(b.m_val), 0b11, 0b1, 0b11);
+          break;
+        case 0b0111:
+          __builtin_mma_pmxvbf16ger2pp(acc, reinterpret_cast<Packet16uc>(a.m_val), reinterpret_cast<Packet16uc>(b.m_val), 0b11, 0b111, 0b11);
+          break;
+      }
+      break;
+    case 1:
+      switch(maskY){
+        case 0b1111:
+          __builtin_mma_xvbf16ger2pp(acc, reinterpret_cast<Packet16uc>(a.m_val), reinterpret_cast<Packet16uc>(b.m_val));
+          break;
+        case 0b0011:
+          __builtin_mma_pmxvbf16ger2pp(acc, reinterpret_cast<Packet16uc>(a.m_val), reinterpret_cast<Packet16uc>(b.m_val), 0b1, 0b11, 0b11);
+          break;
+        case 0b0001:
+          __builtin_mma_pmxvbf16ger2pp(acc, reinterpret_cast<Packet16uc>(a.m_val), reinterpret_cast<Packet16uc>(b.m_val), 0b1, 0b1, 0b11);
+          break;
+        case 0b0111:
+          __builtin_mma_pmxvbf16ger2pp(acc, reinterpret_cast<Packet16uc>(a.m_val), reinterpret_cast<Packet16uc>(b.m_val), 0b1, 0b111, 0b11);
+          break;
+      }
+      break;
+    case 0b0111:
+      switch(maskY){
+        case 0b1111:
+          __builtin_mma_xvbf16ger2pp(acc, reinterpret_cast<Packet16uc>(a.m_val), reinterpret_cast<Packet16uc>(b.m_val));
+          break;
+        case 0b0011:
+          __builtin_mma_pmxvbf16ger2pp(acc, reinterpret_cast<Packet16uc>(a.m_val), reinterpret_cast<Packet16uc>(b.m_val), 0b111, 0b11, 0b11);
+          break;
+        case 0b0001:
+          __builtin_mma_pmxvbf16ger2pp(acc, reinterpret_cast<Packet16uc>(a.m_val), reinterpret_cast<Packet16uc>(b.m_val), 0b111, 0b1, 0b11);
+          break;
+        case 0b0111:
+          __builtin_mma_pmxvbf16ger2pp(acc, reinterpret_cast<Packet16uc>(a.m_val), reinterpret_cast<Packet16uc>(b.m_val), 0b111, 0b111, 0b11);
+          break;
+      }
+      break;
+  }
+}
+
+EIGEN_STRONG_INLINE void scaleAndStore(float* result, float* acc, Packet4f pAlpha)
+{
+  Packet4f result_block = ploadu<Packet4f>(result);
+  Packet4f packet_pmadd = pmadd(pload<Packet4f>(acc), pAlpha, result_block);
+  pstoreu(result, packet_pmadd);
+}
+
+template<int num_packets, bool zero>
+EIGEN_STRONG_INLINE Packet8bf loadLhsBfloat16(const bfloat16* indexA)
+{
+  Packet8bf lhs1 = ploadu<Packet8bf>(indexA);
+  Packet8bf lhs2;
+  const int packet_size = 8; //We fit 8 bfloat16 on a 128 register
+  if(zero){
+    lhs2 = pset1<Packet8bf>(Eigen::bfloat16(0));
+  }
+  else lhs2 = ploadu<Packet8bf>(indexA + num_packets*packet_size);
+  return vec_mergeh(lhs1.m_val, lhs2.m_val);
+}
+
+template<bool zero>
+EIGEN_STRONG_INLINE Packet8bf loadLhsBfloat16ExtraRows(const bfloat16* indexA, Index strideA, Index row, int extra_rows)
+{
+  EIGEN_ALIGN16 bfloat16 lhs_array[8] = {Eigen::bfloat16(0), Eigen::bfloat16(0), Eigen::bfloat16(0), Eigen::bfloat16(0), Eigen::bfloat16(0), Eigen::bfloat16(0), Eigen::bfloat16(0), Eigen::bfloat16(0)};
+  int count = 0;
+  const bfloat16* idxA = indexA + row*strideA;
+  for(int row_count = 0; row_count < extra_rows; row_count++){
+    lhs_array[count++] = *idxA;
+    if(!zero) lhs_array[count] = *(idxA+1);
+    count++;
+    idxA += strideA;
+  }
+  return pload<Packet8bf>(lhs_array);
+}
+
+template<bool zero>
+EIGEN_STRONG_INLINE Packet8bf loadRhsBfloat16(const bfloat16* baseB, Index strideB, int i, int k)
+{
+  const bfloat16* indexB = baseB + strideB*4*i + (k*4);
+  Packet8bf rhs1 = ploadu<Packet8bf>(indexB);
+  if(zero){
+    Packet8bf rhs2 = pset1<Packet8bf>(Eigen::bfloat16(0));
+    return vec_mergeh(rhs1.m_val, rhs2.m_val);
+  }
+  //r = vec_perm (a, b, c)
+  //Let v be the concatenation of a and b.
+  //Each byte of r selected by using the least-significant 5 bits of the corresponding byte of c as an index into v
+  //We need this elements from rhs: 0, 4, 1, 5, 2, 6, 3, 7
+  Packet16uc c = {0x0u, 0x1u, 0x8u, 0x9u, 0x2u, 0x3u, 0xAu, 0xB, 0x4, 0x5, 0xCu, 0xDu, 0x6u, 0x7u, 0xEu, 0xFu};
+  return vec_perm(rhs1.m_val, rhs1.m_val, c);
+}
+
+template<bool zero>
+EIGEN_STRONG_INLINE Packet8bf loadRhsBfloat16ExtraCols(const bfloat16* blockB, Index strideB, Index offsetB, Index col, int i, int k, int extra_cols)
+{
+  EIGEN_ALIGN16 bfloat16 rhs_vector[8] = {Eigen::bfloat16(0), Eigen::bfloat16(0), Eigen::bfloat16(0), Eigen::bfloat16(0), Eigen::bfloat16(0), Eigen::bfloat16(0), Eigen::bfloat16(0), Eigen::bfloat16(0)};
+  const bfloat16* indexB = blockB + ((col+4*i)*strideB)+k+offsetB;
+  for(int c = 0; c < extra_cols; c++){
+    rhs_vector[2*c] = *indexB;
+    if(!zero) rhs_vector[2*c+1] = *(indexB+1);
+    indexB += strideB;
+  }
+  return pload<Packet8bf>(rhs_vector);
+}
+
+template<int num_acc, int num_packets, bool zero, bool rhs_extra_cols, bool lhs_extra_rows>
+EIGEN_STRONG_INLINE void KLoop
+(
+  const bfloat16* indexA,
+  const bfloat16* indexB,
+  __vector_quad (&quad_acc)[num_acc],
+  Index strideA,
+  Index strideB,
+  Index offsetB,
+  Index k,
+  Index row,
+  Index col,
+  int extra_rows,
+  int extra_cols,
+  int mask_rows = 0xF,
+  int mask_cols = 0xF
+)
+{
+  Packet8bf lhs;
+  Packet8bf rhs[num_acc];
+  if(lhs_extra_rows) lhs = loadLhsBfloat16ExtraRows<zero>(indexA+k, strideA, row, extra_rows);
+  else lhs = loadLhsBfloat16<num_packets, zero>(indexA + k*num_packets*8); //a packet of bfloat16 has 8 elements
+  for(int i = 0; i < num_acc; i++){
+    if(!rhs_extra_cols)
+      rhs[i] = loadRhsBfloat16<zero>(indexB, strideB, i, k);
+    else{
+      rhs[i] = loadRhsBfloat16ExtraCols<zero>(indexB, strideB, offsetB, col, i, k, extra_cols);
+    }
+    pgerMMAbfloat16(&(quad_acc[i]), rhs[i], lhs, mask_cols, mask_rows);
+  }
+}
+
+template<const int num_acc, const int standard_block_size, const int num_packets, bool rhsExtraCols = false, bool lhsExtraRows = false>
+void colLoopBody(Index* p_col, Index row, Index depth, Index cols, Index rows, int offset_row, int block_index, Packet4f pAlpha, const bfloat16* indexA, Index strideA, const bfloat16* blockB, Index strideB, Index offsetB, float* result, int extra_cols = 0, int extra_rows = 0, int mask_cols = 0xF, int mask_rows = 0xF)
+{
+  int col = *p_col;
+  int count;
+  int max, step, bound;
+  const bfloat16* indexB;
+
+  if(num_acc == 1) bound = 0;
+  else bound = 1;
+
+  if(rhsExtraCols){
+    count = 0;
+    max = 1;
+    step = 1;
+    indexB = blockB;
+  }
+  else{
+    count = col;
+    step = num_acc * 4; //each accumulator has 4 elements
+    max = cols/step;
+    indexB = blockB + 4*offsetB + strideB*col;
+  }
+
+  while(count/step + bound < max){
+    Index k = 0;
+    EIGEN_ALIGN32 float acc[num_acc][4][4];
+    __vector_quad quad_acc[num_acc];
+ 
+    for(int i = 0; i < num_acc; i++)
+      __builtin_mma_xxsetaccz(&(quad_acc[i]));
+
+    if(depth%2 != 0){
+      KLoop<num_acc, num_packets, true, rhsExtraCols, lhsExtraRows>(indexA, indexB, quad_acc, strideA, strideB, offsetB, k, row, col, extra_rows, extra_cols, mask_rows, mask_cols);
+      k = 1;
+    }
+    for(; k/2 < depth/2; k += 2){ 
+      KLoop<num_acc, num_packets, false, rhsExtraCols, lhsExtraRows>(indexA, indexB, quad_acc, strideA, strideB, offsetB, k, row, col, extra_rows, extra_cols, mask_rows, mask_cols);
+    }
+    for(int i = 0; i < num_acc; i++){
+      __builtin_mma_disassemble_acc((void*)acc[i], &(quad_acc[i]));
+      if(lhsExtraRows){
+        for(int x = 0; x < extra_cols; x++){
+          for(int y = 0; y < extra_rows; y++){
+            result[((col+i*4)+x)*rows + row + y] += acc[i][x][y]*(pAlpha[0]);
+          }
+        }
+      }
+      else{
+        if(rhsExtraCols){
+          for(int x = 0; x < cols-col; x++){
+            scaleAndStore(result + ((col+i*4)+x)*rows + row + offset_row,acc[i][x], pAlpha);
+          }
+        }
+        else{
+          for(int x = 0; x < 4; x++){
+            scaleAndStore(result + ((col+i*4)+x)*rows + (block_index*16) + offset_row,acc[i][x], pAlpha);
+          }
+        }
+      }
+    }
+    count += step;
+    if(!rhsExtraCols) {
+      indexB += strideB*step;
+      col += step;
+    }
+  }
+  *p_col = col;
+}
+
+template<typename Index, typename Packet, typename RhsPacket, typename DataMapper, const Index accRows, const Index accCols>
+void gemmMMAbfloat16(const DataMapper& res, const bfloat16* blockA, const bfloat16* blockB, Index rows, Index depth, Index cols, bfloat16 alpha, Index strideA, Index strideB, Index offsetA, Index offsetB)
+{
+
+  if(rows == 0 || cols == 0 || depth == 0) return;
+  const Packet4f pAlpha = pset1<Packet4f>(Eigen::bfloat16_impl::bfloat16_to_float(alpha));
+  ei_declare_aligned_stack_constructed_variable(float, result, cols*rows, 0);
+
+  for(int j = 0; j < cols; j++){
+    for(int i = 0; i < rows; i++){
+      result[j*rows + i] = res(i,j);
+    }
+  }
+
+  Index row = 0;
+  Index col = 0;
+
+  if( strideA == -1 ) strideA = depth;
+  if( strideB == -1 ) strideB = depth;
+  //Packing is done in blocks.
+  //There's 3 possible sizes of blocks
+  //Blocks of 8 columns with 16 elements (8x16) as col major
+  //Blocks of 8 columns with 8 elements (8x8) as col major. This happens when there's 16 > rows > 8 
+  //Blocks of 8 columns with <8 elements as row major. This happens when there's less than 8 remaining rows
+
+  //Loop for LHS standard block (8x16)
+  int standard_block_size = 16;
+  const int standard_blocks_quantity = rows/standard_block_size; //Number of standard blocks
+  int bigSuffix = (2*8) * (strideA-offsetA-depth);
+  const bfloat16* indexA = blockA;
+  int block_index;
+  for(block_index = 0; block_index < standard_blocks_quantity; block_index++){
+    indexA += 2*8*offsetA;
+    for(int offset_row = 0; offset_row < standard_block_size; offset_row += 4){ //This block size has 16 rows maximum
+      col = 0;
+      colLoopBody<5, 16, 2>(&col, row, depth, cols, rows, offset_row, block_index, pAlpha, indexA+offset_row, strideA, blockB, strideB, offsetB, result);
+      colLoopBody<4, 16, 2>(&col, row, depth, cols, rows, offset_row, block_index, pAlpha, indexA+offset_row, strideA, blockB, strideB, offsetB, result);
+      colLoopBody<3, 16, 2>(&col, row, depth, cols, rows, offset_row, block_index, pAlpha, indexA+offset_row, strideA, blockB, strideB, offsetB, result);
+      colLoopBody<2, 16, 2>(&col, row, depth, cols, rows, offset_row, block_index, pAlpha, indexA+offset_row, strideA, blockB, strideB, offsetB, result);
+      colLoopBody<1, 16, 2>(&col, row, depth, cols, rows, offset_row, block_index, pAlpha, indexA+offset_row, strideA, blockB, strideB, offsetB, result);
+      if(cols > col){
+        int extra_cols= cols-col;
+        int shift = (4-extra_cols>= 0) ? 4-extra_cols: 0;
+        int mask_cols= 0xF >> shift;
+        //Remember: It doesnt make sense use multiple acc to extra_cols as we are unrolling col loop
+        colLoopBody<1, 16, 2, true>(&col, row, depth, cols, rows, offset_row, block_index, pAlpha, indexA+offset_row, strideA, blockB, strideB, offsetB, result, extra_cols, 4, mask_cols, 0xF);
+      }
+    }
+    row += 16;
+    indexA += bigSuffix + 2*8*depth;
+  }
+  //LHS (8x8) block
+  if(rows - standard_blocks_quantity*16 >= 8){
+    indexA += 1*8*offsetA + 2*8*offsetA;
+    for(int offset_row = 0; offset_row < 8; offset_row += 4){
+      col = 0;
+      colLoopBody<5, 8, 1>(&col, row, depth, cols, rows, offset_row, block_index, pAlpha, indexA+offset_row, strideA, blockB, strideB, offsetB, result);
+      colLoopBody<4, 8, 1>(&col, row, depth, cols, rows, offset_row, block_index, pAlpha, indexA+offset_row, strideA, blockB, strideB, offsetB, result);
+      colLoopBody<3, 8, 1>(&col, row, depth, cols, rows, offset_row, block_index, pAlpha, indexA+offset_row, strideA, blockB, strideB, offsetB, result);
+      colLoopBody<2, 8, 1>(&col, row, depth, cols, rows, offset_row, block_index, pAlpha, indexA+offset_row, strideA, blockB, strideB, offsetB, result);
+      colLoopBody<1, 8, 1>(&col, row, depth, cols, rows, offset_row, block_index, pAlpha, indexA+offset_row, strideA, blockB, strideB, offsetB, result);
+    }
+    if(cols > col){
+      int extra_cols= cols-col;
+      int shift = (4-extra_cols>= 0) ? 4-extra_cols: 0;
+      int mask_cols= 0xF >> shift;
+
+      for(int offset_row = 0; offset_row < 8; offset_row += 4){
+        colLoopBody<1, 8, 1, true>(&col, row, depth, cols, rows, offset_row, block_index, pAlpha, indexA+offset_row, strideA, blockB, strideB, offsetB, result, extra_cols, 4, mask_cols, 0xF);
+      }
+    } //end extra cols
+    row += 8;
+  }
+  //extra rows
+  while(row < rows){
+    int extra_rows = rows-row;
+    int shift = (4-extra_rows >= 0) ? 4-extra_rows : 0;
+    int mask_rows = 0xF >> shift;
+    int extra_rows_or_four = (extra_rows <= 4) ? extra_rows : 4;
+
+    //This index is the beginning of remaining block. 
+    //This last block for LHS is organized as RowMajor
+    col = 0;
+    colLoopBody<5, 8, 1, false, true>(&col, row, depth, cols, rows, 0, block_index, pAlpha, blockA, strideA, blockB, strideB, offsetB, result, 4, extra_rows_or_four, 0xF, mask_rows);
+    colLoopBody<4, 8, 1, false, true>(&col, row, depth, cols, rows, 0, block_index, pAlpha, blockA, strideA, blockB, strideB, offsetB, result, 4, extra_rows_or_four, 0xF, mask_rows);
+    colLoopBody<3, 8, 1, false, true>(&col, row, depth, cols, rows, 0, block_index, pAlpha, blockA, strideA, blockB, strideB, offsetB, result, 4, extra_rows_or_four, 0xF, mask_rows);
+    colLoopBody<2, 8, 1, false, true>(&col, row, depth, cols, rows, 0, block_index, pAlpha, blockA, strideA, blockB, strideB, offsetB, result, 4, extra_rows_or_four, 0xF, mask_rows);
+    colLoopBody<1, 8, 1, false, true>(&col, row, depth, cols, rows, 0, block_index, pAlpha, blockA, strideA, blockB, strideB, offsetB, result, 4, extra_rows_or_four, 0xF, mask_rows);
+    if(cols > col){
+      int extra_cols= cols-col;
+      int shift = (4-extra_cols>= 0) ? 4-extra_cols: 0;
+      int mask_cols= 0xF >> shift;
+      int extra_cols_or_four = (extra_cols <= 4) ? extra_cols : 4;
+
+      colLoopBody<1, 8, 1, true, true>(&col, row, depth, cols, rows, 0, block_index, pAlpha, blockA, strideA, blockB, strideB, offsetB, result, extra_cols_or_four, extra_rows_or_four, mask_cols, mask_rows);
+    }
+    row += extra_rows_or_four;
+  }
+
+  //Convert back to bfloat16
+  for(col = 0; col/4 < cols/4; col += 4){
+    int row;
+    for(row = 0; row/8 < rows/8; row += 8){
+      //get and save block
+      PacketBlock<Packet8bf,4> block;
+      for(int j = 0; j < 4; j++){
+        Packet4f temp_even, temp_odd;
+        EIGEN_ALIGN32 float even[4], odd[4];
+        for(int i = 0; i < 4; i++){
+          even[i] = result[(col + j)*rows + row + i*2];
+          odd[i] = result[(col + j)*rows + row + i*2+1];
+        }
+        temp_even = pload<Packet4f>(even);
+        temp_odd = pload<Packet4f>(odd);
+        block.packet[j] = F32ToBf16(temp_even, temp_odd);
+      }
+
+      res.template storePacketBlock<Packet8bf,4>(row, col, block);
+    }
+    //extra rows
+    while(row < rows){
+      for(int col_off = 0; col_off < 4; col_off++){
+        res(row, col+col_off) = Eigen::bfloat16(result[(col+col_off)*rows+row]);
+      }
+      row++;
+    }
+
+  }
+  //extra cols
+  while(col < cols){
+    for(int r = 0; r < rows; r++){
+      res(r, col) = Eigen::bfloat16(result[col*rows + r]);
+    }
+    col++;
+  }
+}
+
+
+}
+}
+#endif //EIGEN_MATRIX_PRODUCT_MMA_BFLOAT16_ALTIVEC_H
diff --git a/libs/eigen/Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.h b/libs/eigen/Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.h
new file mode 100644
index 0000000..9d00b93
--- /dev/null
+++ b/libs/eigen/Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.h
@@ -0,0 +1,2400 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2021 Chip Kerchner (chip.kerchner@ibm.com)
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_MATRIX_VECTOR_PRODUCT_ALTIVEC_H
+#define EIGEN_MATRIX_VECTOR_PRODUCT_ALTIVEC_H
+
+#include "../../InternalHeaderCheck.h"
+
+#if defined(__MMA__) && !EIGEN_ALTIVEC_DISABLE_MMA
+#if EIGEN_COMP_LLVM || (__GNUC__ > 10 || __GNUC_MINOR__ >= 3)
+#define USE_GEMV_MMA
+#endif
+
+#if !EIGEN_COMP_LLVM && (__GNUC__ == 10 && __GNUC_MINOR__ <= 3)
+// Only allow one vector_pair in buggy gcc - gcc 10.3 has a bug
+#define GCC_ONE_VECTORPAIR_BUG
+#endif
+#endif
+
+//#define USE_SLOWER_GEMV_MMA   // MMA is currently not as fast as VSX in complex double GEMV (revisit when gcc is improved)
+
+//#define EIGEN_POWER_USE_GEMV_PREFETCH
+#ifdef EIGEN_POWER_USE_GEMV_PREFETCH
+#define EIGEN_POWER_GEMV_PREFETCH(p)  prefetch(p)
+#else
+#define EIGEN_POWER_GEMV_PREFETCH(p)
+#endif
+
+#ifdef __has_builtin
+#if !__has_builtin(__builtin_vsx_assemble_pair)
+#define __builtin_vsx_assemble_pair __builtin_mma_assemble_pair
+#endif
+#if !__has_builtin(__builtin_vsx_disassemble_pair)
+#define __builtin_vsx_disassemble_pair __builtin_mma_disassemble_pair
+#endif
+#endif
+
+#if EIGEN_COMP_LLVM
+#define GEMV_BUILDPAIR_MMA(dst, src1, src2) \
+  __builtin_vsx_assemble_pair(&dst, (__vector unsigned char)src2, (__vector unsigned char)src1)
+#else
+#if (__GNUC__ <= 10)
+#if (__GNUC_MINOR__ > 3)
+#define GEMV_BUILDPAIR_MMA(dst, src1, src2) \
+  __builtin_vsx_assemble_pair(&dst, (__vector unsigned char)src2, (__vector unsigned char)src1)
+#else
+#define GEMV_BUILDPAIR_MMA(dst, src1, src2) \
+  __builtin_vsx_assemble_pair(&dst, (__vector unsigned char)src1, (__vector unsigned char)src2)
+#endif
+#else
+#define GEMV_BUILDPAIR_MMA(dst, src1, src2) \
+  __builtin_vsx_build_pair(&dst, (__vector unsigned char)src1, (__vector unsigned char)src2)
+#endif
+#endif
+
+#define GEMV_IS_COMPLEX_COMPLEX ((sizeof(LhsPacket) == 16) && (sizeof(RhsPacket) == 16))
+#define GEMV_IS_FLOAT           (ResPacketSize == (16 / sizeof(float)))
+#define GEMV_IS_SCALAR          (sizeof(ResPacket) != 16)
+#define GEMV_IS_COMPLEX_FLOAT   (ResPacketSize == (16 / sizeof(std::complex<float>)))
+
+/** \internal multiply and add and store results */
+template<typename ResPacket, typename ResScalar>
+EIGEN_ALWAYS_INLINE void storeMaddData(ResScalar* res, ResPacket& palpha, ResPacket& data)
+{
+    pstoreu(res, pmadd(data, palpha, ploadu<ResPacket>(res)));
+}
+
+template<typename ResScalar>
+EIGEN_ALWAYS_INLINE void storeMaddData(ResScalar* res, ResScalar& alpha, ResScalar& data)
+{
+    *res += (alpha * data);
+}
+
+#define GEMV_UNROLL(func, N) \
+  func(0, N) func(1, N) func(2, N) func(3, N) \
+  func(4, N) func(5, N) func(6, N) func(7, N)
+
+#define GEMV_UNROLL_HALF(func, N) \
+  func(0, 0, 1, N) func(1, 2, 3, N) func(2, 4, 5, N) func(3, 6, 7, N)
+
+#define GEMV_GETN(N) (((N) * ResPacketSize) >> 2)
+
+#define GEMV_LOADPACKET_COL(iter) \
+  lhs.template load<LhsPacket, LhsAlignment>(i + ((iter) * LhsPacketSize), j)
+
+#ifdef USE_GEMV_MMA
+#define GEMV_UNROLL3(func, N, which) \
+  func(0, N, which) func(1, N, which) func(2, N, which) func(3, N, which) \
+  func(4, N, which) func(5, N, which) func(6, N, which) func(7, N, which)
+
+#define GEMV_UNUSED_VAR(iter, N, which) \
+  if (GEMV_GETN(N) <= iter) { \
+    EIGEN_UNUSED_VARIABLE(which##iter); \
+  }
+
+#define GEMV_UNUSED_EXTRA_VAR(iter, N, which) \
+  if (N <= iter) { \
+    EIGEN_UNUSED_VARIABLE(which##iter); \
+  }
+
+#define GEMV_UNUSED_EXTRA(N, which) \
+  GEMV_UNROLL3(GEMV_UNUSED_EXTRA_VAR, N, which)
+
+#define GEMV_UNUSED(N, which) \
+  GEMV_UNROLL3(GEMV_UNUSED_VAR, N, which)
+
+#define GEMV_INIT_MMA(iter, N) \
+  if (GEMV_GETN(N) > iter) { \
+    __builtin_mma_xxsetaccz(&e##iter); \
+  }
+
+#if EIGEN_COMP_LLVM
+#define GEMV_LOADPAIR_COL_MMA(iter1, iter2) \
+  GEMV_BUILDPAIR_MMA(b##iter1, GEMV_LOADPACKET_COL(iter2), GEMV_LOADPACKET_COL((iter2) + 1));
+#else
+#define GEMV_LOADPAIR_COL_MMA(iter1, iter2) \
+  const LhsScalar& src##iter1 = lhs(i + ((iter1 * 32) / sizeof(LhsScalar)), j); \
+  b##iter1 = *reinterpret_cast<__vector_pair *>(const_cast<LhsScalar *>(&src##iter1));
+#endif
+
+#define GEMV_LOAD1A_COL_MMA(iter, N) \
+  if (GEMV_GETN(N) > iter) { \
+    if (GEMV_IS_FLOAT) { \
+      g##iter = GEMV_LOADPACKET_COL(iter); \
+      EIGEN_UNUSED_VARIABLE(b##iter); \
+    } else { \
+      GEMV_LOADPAIR_COL_MMA(iter, iter << 1) \
+      EIGEN_UNUSED_VARIABLE(g##iter); \
+    } \
+  } else { \
+    EIGEN_UNUSED_VARIABLE(b##iter); \
+    EIGEN_UNUSED_VARIABLE(g##iter); \
+  }
+
+#define GEMV_WORK1A_COL_MMA(iter, N) \
+  if (GEMV_GETN(N) > iter) { \
+    if (GEMV_IS_FLOAT) { \
+      pger_vecMMA_acc<LhsPacket, RhsPacket, true>(&e##iter, a0, g##iter); \
+    } else { \
+      pger_vecMMA_acc<LhsPacket, RhsPacket, true>(&e##iter, b##iter, a0); \
+    } \
+  }
+
+#define GEMV_LOAD1B_COL_MMA(iter1, iter2, iter3, N) \
+  if (GEMV_GETN(N) > iter1) { \
+    if (GEMV_IS_FLOAT) { \
+      GEMV_LOADPAIR_COL_MMA(iter2, iter2) \
+      EIGEN_UNUSED_VARIABLE(b##iter3); \
+    } else { \
+      GEMV_LOADPAIR_COL_MMA(iter2, iter2 << 1) \
+      GEMV_LOADPAIR_COL_MMA(iter3, iter3 << 1) \
+    } \
+  } else { \
+    EIGEN_UNUSED_VARIABLE(b##iter2); \
+    EIGEN_UNUSED_VARIABLE(b##iter3); \
+  } \
+  EIGEN_UNUSED_VARIABLE(g##iter2); \
+  EIGEN_UNUSED_VARIABLE(g##iter3);
+
+#define GEMV_WORK1B_COL_MMA(iter1, iter2, iter3, N) \
+  if (GEMV_GETN(N) > iter1) { \
+    if (GEMV_IS_FLOAT) { \
+      LhsPacket h[2]; \
+      __builtin_vsx_disassemble_pair(reinterpret_cast<void*>(h), &b##iter2); \
+      pger_vecMMA_acc<LhsPacket, RhsPacket, true>(&e##iter2, a0, h[0]); \
+      pger_vecMMA_acc<LhsPacket, RhsPacket, true>(&e##iter3, a0, h[1]); \
+    } else { \
+      pger_vecMMA_acc<LhsPacket, RhsPacket, true>(&e##iter2, b##iter2, a0); \
+      pger_vecMMA_acc<LhsPacket, RhsPacket, true>(&e##iter3, b##iter3, a0); \
+    } \
+  }
+
+#if EIGEN_COMP_LLVM
+#define GEMV_LOAD_COL_MMA(N) \
+  if (GEMV_GETN(N) > 1) { \
+    GEMV_UNROLL_HALF(GEMV_LOAD1B_COL_MMA, (N >> 1)) \
+  } else { \
+    GEMV_UNROLL(GEMV_LOAD1A_COL_MMA, N) \
+  }
+
+#define GEMV_WORK_COL_MMA(N) \
+  if (GEMV_GETN(N) > 1) { \
+    GEMV_UNROLL_HALF(GEMV_WORK1B_COL_MMA, (N >> 1)) \
+  } else { \
+    GEMV_UNROLL(GEMV_WORK1A_COL_MMA, N) \
+  }
+#else
+#define GEMV_LOAD_COL_MMA(N) \
+  GEMV_UNROLL(GEMV_LOAD1A_COL_MMA, N)
+
+#define GEMV_WORK_COL_MMA(N) \
+  GEMV_UNROLL(GEMV_WORK1A_COL_MMA, N)
+#endif
+
+#define GEMV_DISASSEMBLE_MMA(iter, N) \
+  if (GEMV_GETN(N) > iter) { \
+    __builtin_mma_disassemble_acc(&result##iter.packet, &e##iter); \
+    if (!GEMV_IS_FLOAT) { \
+      result##iter.packet[0][1] = result##iter.packet[1][0]; \
+      result##iter.packet[2][1] = result##iter.packet[3][0]; \
+    } \
+  }
+
+#define GEMV_LOADPAIR2_COL_MMA(iter1, iter2) \
+  b##iter1 = *reinterpret_cast<__vector_pair *>(res + i + ((iter2) * ResPacketSize));
+
+#define GEMV_LOAD2_COL_MMA(iter1, iter2, iter3, N) \
+  if (GEMV_GETN(N) > iter1) { \
+    if (GEMV_IS_FLOAT) { \
+      GEMV_LOADPAIR2_COL_MMA(iter2, iter2); \
+      EIGEN_UNUSED_VARIABLE(b##iter3); \
+    } else { \
+      GEMV_LOADPAIR2_COL_MMA(iter2, iter2 << 1); \
+      GEMV_LOADPAIR2_COL_MMA(iter3, iter3 << 1); \
+    } \
+  } else { \
+    EIGEN_UNUSED_VARIABLE(b##iter2); \
+    EIGEN_UNUSED_VARIABLE(b##iter3); \
+  }
+
+#if EIGEN_COMP_LLVM
+#define GEMV_WORKPAIR2_COL_MMA(iter2, iter3, iter4) \
+  ResPacket f##iter2[2]; \
+  __builtin_vsx_disassemble_pair(reinterpret_cast<void*>(f##iter2), &b##iter2); \
+  f##iter2[0] = pmadd(result##iter2.packet[0], palpha, f##iter2[0]); \
+  f##iter2[1] = pmadd(result##iter3.packet[(iter2 == iter3) ? 2 : 0], palpha, f##iter2[1]); \
+  GEMV_BUILDPAIR_MMA(b##iter2, f##iter2[0], f##iter2[1]);
+#else
+#define GEMV_WORKPAIR2_COL_MMA(iter2, iter3, iter4) \
+  if (GEMV_IS_FLOAT) { \
+    __asm__ ("xvmaddasp %0,%x1,%x3\n\txvmaddasp %L0,%x2,%x3" : "+&d" (b##iter2) : "wa" (result##iter3.packet[0]), "wa" (result##iter2.packet[0]), "wa" (palpha)); \
+  } else { \
+    __asm__ ("xvmaddadp %0,%x1,%x3\n\txvmaddadp %L0,%x2,%x3" : "+&d" (b##iter2) : "wa" (result##iter2.packet[2]), "wa" (result##iter2.packet[0]), "wa" (palpha)); \
+  }
+#endif
+
+#define GEMV_WORK2_COL_MMA(iter1, iter2, iter3, N) \
+  if (GEMV_GETN(N) > iter1) { \
+    if (GEMV_IS_FLOAT) { \
+      GEMV_WORKPAIR2_COL_MMA(iter2, iter3, iter2); \
+    } else { \
+      GEMV_WORKPAIR2_COL_MMA(iter2, iter2, iter2 << 1); \
+      GEMV_WORKPAIR2_COL_MMA(iter3, iter3, iter3 << 1); \
+    } \
+  }
+
+#define GEMV_STOREPAIR2_COL_MMA(iter1, iter2) \
+  *reinterpret_cast<__vector_pair *>(res + i + ((iter2) * ResPacketSize)) = b##iter1;
+
+#define GEMV_STORE_COL_MMA(iter, N) \
+  if (GEMV_GETN(N) > iter) { \
+    if (GEMV_IS_FLOAT) { \
+      storeMaddData<ResPacket, ResScalar>(res + i + (iter * ResPacketSize), palpha, result##iter.packet[0]); \
+    } else { \
+      GEMV_LOADPAIR2_COL_MMA(iter, iter << 1) \
+      GEMV_WORKPAIR2_COL_MMA(iter, iter, iter << 1) \
+      GEMV_STOREPAIR2_COL_MMA(iter, iter << 1) \
+    } \
+  }
+
+#define GEMV_STORE2_COL_MMA(iter1, iter2, iter3, N) \
+  if (GEMV_GETN(N) > iter1) { \
+    if (GEMV_IS_FLOAT) { \
+      GEMV_STOREPAIR2_COL_MMA(iter2, iter2); \
+    } else { \
+      GEMV_STOREPAIR2_COL_MMA(iter2, iter2 << 1) \
+      GEMV_STOREPAIR2_COL_MMA(iter3, iter3 << 1) \
+    } \
+  }
+
+#define GEMV_PROCESS_COL_ONE_MMA(N) \
+  GEMV_UNROLL(GEMV_INIT_MMA, N) \
+  Index j = j2; \
+  __vector_pair b0, b1, b2, b3, b4, b5, b6, b7; \
+  do { \
+    LhsPacket g0, g1, g2, g3, g4, g5, g6, g7; \
+    RhsPacket a0 = pset1<RhsPacket>(rhs2(j, 0)); \
+    GEMV_UNROLL(GEMV_PREFETCH, N) \
+    GEMV_LOAD_COL_MMA(N) \
+    GEMV_WORK_COL_MMA(N) \
+  } while (++j < jend); \
+  GEMV_UNROLL(GEMV_DISASSEMBLE_MMA, N) \
+  if (GEMV_GETN(N) <= 1) { \
+    GEMV_UNROLL(GEMV_STORE_COL_MMA, N) \
+  } else { \
+    GEMV_UNROLL_HALF(GEMV_LOAD2_COL_MMA, (N >> 1)) \
+    GEMV_UNROLL_HALF(GEMV_WORK2_COL_MMA, (N >> 1)) \
+    GEMV_UNROLL_HALF(GEMV_STORE2_COL_MMA, (N >> 1)) \
+  } \
+  i += (ResPacketSize * N);
+#endif
+
+#define GEMV_INIT(iter, N) \
+  if (N > iter) { \
+    c##iter = pset1<ResPacket>(ResScalar(0)); \
+  } else { \
+    EIGEN_UNUSED_VARIABLE(c##iter); \
+  }
+
+#ifdef EIGEN_POWER_USE_GEMV_PREFETCH
+#define GEMV_PREFETCH(iter, N) \
+  if (GEMV_GETN(N) > ((iter >> 1) + ((N >> 1) * (iter & 1)))) { \
+    lhs.prefetch(i + (iter * LhsPacketSize) + prefetch_dist, j); \
+  }
+#else
+#define GEMV_PREFETCH(iter, N)
+#endif
+
+#define GEMV_WORK_COL(iter, N) \
+  if (N > iter) { \
+    c##iter = pcj.pmadd(GEMV_LOADPACKET_COL(iter), a0, c##iter); \
+  }
+
+#define GEMV_STORE_COL(iter, N) \
+  if (N > iter) { \
+    pstoreu(res + i + (iter * ResPacketSize), pmadd(c##iter, palpha, ploadu<ResPacket>(res + i + (iter * ResPacketSize)))); \
+  }
+
+/** \internal main macro for gemv_col - initialize accumulators, multiply and add inputs, and store results */
+#define GEMV_PROCESS_COL_ONE(N) \
+  GEMV_UNROLL(GEMV_INIT, N) \
+  Index j = j2; \
+  do { \
+    RhsPacket a0 = pset1<RhsPacket>(rhs2(j, 0)); \
+    GEMV_UNROLL(GEMV_PREFETCH, N) \
+    GEMV_UNROLL(GEMV_WORK_COL, N) \
+  } while (++j < jend); \
+  GEMV_UNROLL(GEMV_STORE_COL, N) \
+  i += (ResPacketSize * N);
+
+#ifdef USE_GEMV_MMA
+#define GEMV_PROCESS_COL(N) \
+  GEMV_PROCESS_COL_ONE_MMA(N)
+#else
+#define GEMV_PROCESS_COL(N) \
+  GEMV_PROCESS_COL_ONE(N)
+#endif
+
+/** \internal perform a matrix multiply and accumulate of packet a and packet b */
+#ifdef USE_GEMV_MMA
+template<typename LhsPacket, typename RhsPacket, bool accumulate>
+EIGEN_ALWAYS_INLINE void pger_vecMMA_acc(__vector_quad* acc, const RhsPacket& a, const LhsPacket& b)
+{
+    if (accumulate)
+    {
+        __builtin_mma_xvf32gerpp(acc, (__vector unsigned char)a, (__vector unsigned char)b);
+    }
+    else
+    {
+        __builtin_mma_xvf32ger(acc, (__vector unsigned char)a, (__vector unsigned char)b);
+    }
+}
+
+/** \internal perform a matrix multiply and accumulate of vector_pair a and packet b */
+template<typename LhsPacket, typename RhsPacket, bool accumulate>
+EIGEN_ALWAYS_INLINE void pger_vecMMA_acc(__vector_quad* acc, __vector_pair& a, const LhsPacket& b)
+{
+    if (accumulate)
+    {
+        __builtin_mma_xvf64gerpp(acc, a, (__vector unsigned char)b);
+    }
+    else
+    {
+        __builtin_mma_xvf64ger(acc, a, (__vector unsigned char)b);
+    }
+}
+#endif
+
+template<typename LhsScalar, typename LhsMapper, typename RhsScalar, typename RhsMapper, typename ResScalar>
+EIGEN_STRONG_INLINE void gemv_col(
+    Index rows, Index cols,
+    const LhsMapper& alhs,
+    const RhsMapper& rhs,
+    ResScalar* res, Index resIncr,
+    ResScalar alpha)
+{
+    typedef gemv_traits<LhsScalar, RhsScalar> Traits;
+
+    typedef typename Traits::LhsPacket LhsPacket;
+    typedef typename Traits::RhsPacket RhsPacket;
+    typedef typename Traits::ResPacket ResPacket;
+
+    EIGEN_UNUSED_VARIABLE(resIncr);
+    eigen_internal_assert(resIncr == 1);
+
+    // The following copy tells the compiler that lhs's attributes are not modified outside this function
+    // This helps GCC to generate proper code.
+    LhsMapper lhs(alhs);
+    RhsMapper rhs2(rhs);
+
+    conj_helper<LhsScalar, RhsScalar, false, false> cj;
+    conj_helper<LhsPacket, RhsPacket, false, false> pcj;
+
+    const Index lhsStride = lhs.stride();
+    // TODO: for padded aligned inputs, we could enable aligned reads
+    enum {
+        LhsAlignment = Unaligned,
+        ResPacketSize = Traits::ResPacketSize,
+        LhsPacketSize = Traits::LhsPacketSize,
+        RhsPacketSize = Traits::RhsPacketSize,
+    };
+
+#ifndef GCC_ONE_VECTORPAIR_BUG
+    const Index n8 = rows - 8 * ResPacketSize + 1;
+    const Index n4 = rows - 4 * ResPacketSize + 1;
+    const Index n2 = rows - 2 * ResPacketSize + 1;
+#endif
+    const Index n1 = rows - 1 * ResPacketSize + 1;
+#ifdef EIGEN_POWER_USE_GEMV_PREFETCH
+    const Index prefetch_dist = 64 * LhsPacketSize;
+#endif
+
+    // TODO: improve the following heuristic:
+    const Index block_cols = cols < 128 ? cols : (lhsStride * sizeof(LhsScalar) < 16000 ? 16 : 8);
+    ResPacket palpha = pset1<ResPacket>(alpha);
+
+    for (Index j2 = 0; j2 < cols; j2 += block_cols)
+    {
+        Index jend = numext::mini(j2 + block_cols, cols);
+        Index i = 0;
+        ResPacket c0, c1, c2, c3, c4, c5, c6, c7;
+#ifdef USE_GEMV_MMA
+        __vector_quad e0, e1, e2, e3, e4, e5, e6, e7;
+        PacketBlock<ResPacket, 4> result0, result1, result2, result3, result4, result5, result6, result7;
+        GEMV_UNUSED(8, e)
+        GEMV_UNUSED(8, result)
+        GEMV_UNUSED_EXTRA(1, c)
+#endif
+#ifndef GCC_ONE_VECTORPAIR_BUG
+        while (i < n8)
+        {
+            GEMV_PROCESS_COL(8)
+        }
+        if (i < n4)
+        {
+            GEMV_PROCESS_COL(4)
+        }
+        if (i < n2)
+        {
+            GEMV_PROCESS_COL(2)
+        }
+        if (i < n1)
+#else
+        while (i < n1)
+#endif
+        {
+            GEMV_PROCESS_COL_ONE(1)
+        }
+        for (;i < rows;++i)
+        {
+            ResScalar d0(0);
+            Index j = j2;
+            do {
+                d0 += cj.pmul(lhs(i, j), rhs2(j, 0));
+            } while (++j < jend);
+            res[i] += alpha * d0;
+        }
+    }
+}
+
+const Packet16uc p16uc_COMPLEX32_XORFLIP = { 0x44,0x55,0x66,0x77, 0x00,0x11,0x22,0x33, 0xcc,0xdd,0xee,0xff, 0x88,0x99,0xaa,0xbb };
+const Packet16uc p16uc_COMPLEX64_XORFLIP = { 0x88,0x99,0xaa,0xbb, 0xcc,0xdd,0xee,0xff, 0x00,0x11,0x22,0x33, 0x44,0x55,0x66,0x77 };
+
+#ifdef _BIG_ENDIAN
+const Packet16uc p16uc_COMPLEX32_CONJ_XOR  = { 0x00,0x00,0x00,0x00, 0x80,0x00,0x00,0x00, 0x00,0x00,0x00,0x00, 0x80,0x00,0x00,0x00 };
+const Packet16uc p16uc_COMPLEX64_CONJ_XOR  = { 0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00, 0x80,0x00,0x00,0x00, 0x00,0x00,0x00,0x00 };
+const Packet16uc p16uc_COMPLEX32_CONJ_XOR2 = { 0x80,0x00,0x00,0x00, 0x00,0x00,0x00,0x00, 0x80,0x00,0x00,0x00, 0x00,0x00,0x00,0x00 };
+const Packet16uc p16uc_COMPLEX64_CONJ_XOR2 = { 0x80,0x00,0x00,0x00, 0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00 };
+const Packet16uc p16uc_COMPLEX32_NEGATE    = { 0x80,0x00,0x00,0x00, 0x80,0x00,0x00,0x00, 0x80,0x00,0x00,0x00, 0x80,0x00,0x00,0x00 };
+const Packet16uc p16uc_COMPLEX64_NEGATE    = { 0x80,0x00,0x00,0x00, 0x00,0x00,0x00,0x00, 0x80,0x00,0x00,0x00, 0x00,0x00,0x00,0x00 };
+#else
+const Packet16uc p16uc_COMPLEX32_CONJ_XOR  = { 0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x80, 0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x80 };
+const Packet16uc p16uc_COMPLEX64_CONJ_XOR  = { 0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x80 };
+const Packet16uc p16uc_COMPLEX32_CONJ_XOR2 = { 0x00,0x00,0x00,0x80, 0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x80, 0x00,0x00,0x00,0x00 };
+const Packet16uc p16uc_COMPLEX64_CONJ_XOR2 = { 0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x80, 0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00 };
+const Packet16uc p16uc_COMPLEX32_NEGATE    = { 0x00,0x00,0x00,0x80, 0x00,0x00,0x00,0x80, 0x00,0x00,0x00,0x80, 0x00,0x00,0x00,0x80 };
+const Packet16uc p16uc_COMPLEX64_NEGATE    = { 0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x80, 0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x80 };
+#endif
+
+#ifdef _BIG_ENDIAN
+#define COMPLEX_DELTA  0
+#else
+#define COMPLEX_DELTA  2
+#endif
+
+/** \internal packet conjugate (same as pconj but uses the constants in pcplxflipconj for better code generation) */
+EIGEN_ALWAYS_INLINE Packet2cf pconj2(const Packet2cf& a) {
+    return Packet2cf(pxor(a.v, reinterpret_cast<Packet4f>(p16uc_COMPLEX32_CONJ_XOR)));
+}
+
+EIGEN_ALWAYS_INLINE Packet1cd pconj2(const Packet1cd& a) {
+    return Packet1cd(pxor(a.v, reinterpret_cast<Packet2d>(p16uc_COMPLEX64_CONJ_XOR)));
+}
+
+/** \internal packet conjugate with real & imaginary operation inverted */
+EIGEN_ALWAYS_INLINE Packet2cf pconjinv(const Packet2cf& a) {
+#ifdef __POWER8_VECTOR__
+    return Packet2cf(Packet4f(vec_neg(Packet2d(a.v))));
+#else
+    return Packet2cf(pxor(a.v, reinterpret_cast<Packet4f>(p16uc_COMPLEX32_CONJ_XOR2)));
+#endif
+}
+
+EIGEN_ALWAYS_INLINE Packet1cd pconjinv(const Packet1cd& a) {
+    return Packet1cd(pxor(a.v, reinterpret_cast<Packet2d>(p16uc_COMPLEX64_CONJ_XOR2)));
+}
+
+#if defined(_ARCH_PWR8) && (!EIGEN_COMP_LLVM || __clang_major__ >= 12)
+#define PERMXOR_GOOD  // Clang had a bug with vec_permxor and endianness prior to version 12
+#endif
+
+/** \internal flip the real & imaginary results and packet conjugate */
+EIGEN_ALWAYS_INLINE Packet2cf pcplxflipconj(Packet2cf a)
+{
+#ifdef PERMXOR_GOOD
+    return Packet2cf(Packet4f(vec_permxor(Packet16uc(a.v), p16uc_COMPLEX32_CONJ_XOR, p16uc_COMPLEX32_XORFLIP)));
+#else
+    return pcplxflip(pconj2(a));
+#endif
+}
+
+EIGEN_ALWAYS_INLINE Packet1cd pcplxflipconj(Packet1cd a)
+{
+#ifdef PERMXOR_GOOD
+    return Packet1cd(Packet2d(vec_permxor(Packet16uc(a.v), p16uc_COMPLEX64_CONJ_XOR, p16uc_COMPLEX64_XORFLIP)));
+#else
+    return pcplxflip(pconj2(a));
+#endif
+}
+
+/** \internal packet conjugate and flip the real & imaginary results */
+EIGEN_ALWAYS_INLINE Packet2cf pcplxconjflip(Packet2cf a)
+{
+#ifdef PERMXOR_GOOD
+    return Packet2cf(Packet4f(vec_permxor(Packet16uc(a.v), p16uc_COMPLEX32_CONJ_XOR2, p16uc_COMPLEX32_XORFLIP)));
+#else
+    return pconj2(pcplxflip(a));
+#endif
+}
+
+EIGEN_ALWAYS_INLINE Packet1cd pcplxconjflip(Packet1cd a)
+{
+#ifdef PERMXOR_GOOD
+    return Packet1cd(Packet2d(vec_permxor(Packet16uc(a.v), p16uc_COMPLEX64_CONJ_XOR2, p16uc_COMPLEX64_XORFLIP)));
+#else
+    return pconj2(pcplxflip(a));
+#endif
+}
+
+/** \internal packet negate */
+EIGEN_ALWAYS_INLINE Packet2cf pnegate2(Packet2cf a)
+{
+#ifdef __POWER8_VECTOR__
+    return Packet2cf(vec_neg(a.v));
+#else
+    return Packet2cf(pxor(a.v, reinterpret_cast<Packet4f>(p16uc_COMPLEX32_NEGATE)));
+#endif
+}
+
+EIGEN_ALWAYS_INLINE Packet1cd pnegate2(Packet1cd a)
+{
+#ifdef __POWER8_VECTOR__
+    return Packet1cd(vec_neg(a.v));
+#else
+    return Packet1cd(pxor(a.v, reinterpret_cast<Packet2d>(p16uc_COMPLEX64_NEGATE)));
+#endif
+}
+
+/** \internal flip the real & imaginary results and negate */
+EIGEN_ALWAYS_INLINE Packet2cf pcplxflipnegate(Packet2cf a)
+{
+#ifdef PERMXOR_GOOD
+    return Packet2cf(Packet4f(vec_permxor(Packet16uc(a.v), p16uc_COMPLEX32_NEGATE, p16uc_COMPLEX32_XORFLIP)));
+#else
+    return pcplxflip(pnegate2(a));
+#endif
+}
+
+EIGEN_ALWAYS_INLINE Packet1cd pcplxflipnegate(Packet1cd a)
+{
+#ifdef PERMXOR_GOOD
+    return Packet1cd(Packet2d(vec_permxor(Packet16uc(a.v), p16uc_COMPLEX64_NEGATE, p16uc_COMPLEX64_XORFLIP)));
+#else
+    return pcplxflip(pnegate2(a));
+#endif
+}
+
+/** \internal flip the real & imaginary results */
+EIGEN_ALWAYS_INLINE Packet2cf pcplxflip2(Packet2cf a)
+{
+    return Packet2cf(Packet4f(vec_perm(Packet16uc(a.v), Packet16uc(a.v), p16uc_COMPLEX32_XORFLIP)));
+}
+
+EIGEN_ALWAYS_INLINE Packet1cd pcplxflip2(Packet1cd a)
+{
+#ifdef __VSX__
+    return Packet1cd(__builtin_vsx_xxpermdi(a.v, a.v, 2));
+#else
+    return Packet1cd(Packet2d(vec_perm(Packet16uc(a.v), Packet16uc(a.v), p16uc_COMPLEX64_XORFLIP)));
+#endif
+}
+
+/** \internal load half a vector with one complex value */
+EIGEN_ALWAYS_INLINE Packet4f pload_complex_half(std::complex<float>* src)
+{
+    Packet4f t;
+#ifdef __VSX__
+    // Load float64/two float32 (doubleword alignment)
+    __asm__("lxsdx %x0,%y1" : "=wa" (t) : "Z" (*src));
+#else
+    *reinterpret_cast<std::complex<float>*>(reinterpret_cast<float*>(&t) + COMPLEX_DELTA) = *src;
+#endif
+    return t;
+}
+
+/** \internal load two vectors from the real and imaginary portions of a complex value */
+template<typename RhsScalar>
+EIGEN_ALWAYS_INLINE void pload_realimag(RhsScalar* src, Packet4f& r, Packet4f& i)
+{
+#ifdef _ARCH_PWR9
+    __asm__("lxvwsx %x0,%y1" : "=wa" (r) : "Z" (*(reinterpret_cast<float*>(src) + 0)));
+    __asm__("lxvwsx %x0,%y1" : "=wa" (i) : "Z" (*(reinterpret_cast<float*>(src) + 1)));
+#else
+    Packet4f t = pload_complex_half(src);
+    r = vec_splat(t, COMPLEX_DELTA + 0);
+    i = vec_splat(t, COMPLEX_DELTA + 1);
+#endif
+}
+
+template<typename RhsScalar>
+EIGEN_ALWAYS_INLINE void pload_realimag(RhsScalar* src, Packet2d& r, Packet2d& i)
+{
+#ifdef __VSX__
+    __asm__("lxvdsx %x0,%y1" : "=wa" (r) : "Z" (*(reinterpret_cast<double*>(src) + 0)));
+    __asm__("lxvdsx %x0,%y1" : "=wa" (i) : "Z" (*(reinterpret_cast<double*>(src) + 1)));
+#else
+    Packet2d t = ploadu<Packet2d>(reinterpret_cast<double*>(src));
+    r = vec_splat(t, 0);
+    i = vec_splat(t, 1);
+#endif
+}
+
+#ifndef __POWER8_VECTOR__
+const Packet16uc p16uc_MERGEE = { 0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, 0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B };
+
+const Packet16uc p16uc_MERGEO = { 0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17, 0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F };
+#endif
+
+/** \internal load two vectors from the interleaved real & imaginary values of src */
+template<typename RhsScalar>
+EIGEN_ALWAYS_INLINE void pload_realimag_row(RhsScalar* src, Packet4f& r, Packet4f& i)
+{
+    Packet4f t = ploadu<Packet4f>(reinterpret_cast<float*>(src));
+#ifdef __POWER8_VECTOR__
+    r = vec_mergee(t, t);
+    i = vec_mergeo(t, t);
+#else
+    r = vec_perm(t, t, p16uc_MERGEE);
+    i = vec_perm(t, t, p16uc_MERGEO);
+#endif
+}
+
+template<typename RhsScalar>
+EIGEN_ALWAYS_INLINE void pload_realimag_row(RhsScalar* src, Packet2d& r, Packet2d& i)
+{
+    return pload_realimag(src, r, i);
+}
+
+/** \internal load and splat a complex value into a vector - column-wise */
+EIGEN_ALWAYS_INLINE Packet4f pload_realimag_combine(std::complex<float>* src)
+{
+#ifdef __VSX__
+    Packet4f ret;
+    __asm__("lxvdsx %x0,%y1" : "=wa" (ret) : "Z" (*(reinterpret_cast<double*>(src) + 0)));
+    return ret;
+#else
+    return Packet4f(ploaddup<Packet2d>(reinterpret_cast<double *>(src)));
+#endif
+}
+
+EIGEN_ALWAYS_INLINE Packet2d pload_realimag_combine(std::complex<double>* src)
+{
+    return ploadu<Packet1cd>(src).v;
+}
+
+/** \internal load a complex value into a vector - row-wise */
+EIGEN_ALWAYS_INLINE Packet4f pload_realimag_combine_row(std::complex<float>* src)
+{
+    return ploadu<Packet2cf>(src).v;
+}
+
+EIGEN_ALWAYS_INLINE Packet2d pload_realimag_combine_row(std::complex<double>* src)
+{
+    return ploadu<Packet1cd>(src).v;
+}
+
+/** \internal load a scalar or a vector from complex location */
+template<typename ResPacket>
+EIGEN_ALWAYS_INLINE Packet4f pload_complex(std::complex<float>* src)
+{
+    if (GEMV_IS_SCALAR) {
+        return pload_complex_half(src);
+    }
+    else
+    {
+        return ploadu<Packet4f>(reinterpret_cast<float*>(src));
+    }
+}
+
+template<typename ResPacket>
+EIGEN_ALWAYS_INLINE Packet2d pload_complex(std::complex<double>* src)
+{
+    return ploadu<Packet2d>(reinterpret_cast<double*>(src));
+}
+
+/** \internal load from a complex vector and convert to a real vector */
+template<typename ResPacket>
+EIGEN_ALWAYS_INLINE Packet4f pload_complex(Packet2cf* src)
+{
+    return src->v;
+}
+
+template<typename ResPacket>
+EIGEN_ALWAYS_INLINE Packet2d pload_complex(Packet1cd* src)
+{
+    return src->v;
+}
+
+/** \internal load a full vector from complex location - column-wise */
+EIGEN_ALWAYS_INLINE Packet4f pload_complex_full(std::complex<float>* src)
+{
+    return Packet4f(ploaddup<Packet2d>(reinterpret_cast<double *>(src)));
+}
+
+EIGEN_ALWAYS_INLINE Packet2d pload_complex_full(std::complex<double>* src)
+{
+    return ploadu<Packet1cd>(src).v;
+}
+
+/** \internal load a full vector from complex location - row-wise */
+EIGEN_ALWAYS_INLINE Packet4f pload_complex_full_row(std::complex<float>* src)
+{
+    return ploadu<Packet2cf>(src).v;
+}
+
+EIGEN_ALWAYS_INLINE Packet2d pload_complex_full_row(std::complex<double>* src)
+{
+    return pload_complex_full(src);
+}
+
+/** \internal load a vector from a real-only scalar location - column-wise */
+EIGEN_ALWAYS_INLINE Packet4f pload_real(float* src)
+{
+    return pset1<Packet4f>(*src);
+}
+
+EIGEN_ALWAYS_INLINE Packet2d pload_real(double* src)
+{
+    return pset1<Packet2d>(*src);
+}
+
+EIGEN_ALWAYS_INLINE Packet4f pload_real(Packet4f& src)
+{
+    return src;
+}
+
+EIGEN_ALWAYS_INLINE Packet2d pload_real(Packet2d& src)
+{
+    return src;
+}
+
+/** \internal load a vector from a real-only vector location */
+EIGEN_ALWAYS_INLINE Packet4f pload_real_full(float* src)
+{
+    Packet4f ret = ploadu<Packet4f>(src);
+    return vec_mergeh(ret, ret);
+}
+
+EIGEN_ALWAYS_INLINE Packet2d pload_real_full(double* src)
+{
+    return pload_real(src);
+}
+
+EIGEN_ALWAYS_INLINE Packet4f pload_real_full(std::complex<float>* src)
+{
+    return pload_complex_full(src);   // Just for compilation
+}
+
+EIGEN_ALWAYS_INLINE Packet2d pload_real_full(std::complex<double>* src)
+{
+    return pload_complex_full(src);   // Just for compilation
+}
+
+/** \internal load a vector from a real-only scalar location - row-wise */
+template<typename ResPacket>
+EIGEN_ALWAYS_INLINE Packet4f pload_real_row(float* src)
+{
+    if (GEMV_IS_SCALAR) {
+        return pload_real_full(src);
+    }
+    else {
+        return ploadu<Packet4f>(src);
+    }
+}
+
+template<typename ResPacket>
+EIGEN_ALWAYS_INLINE Packet2d pload_real_row(double* src)
+{
+    return pload_real(src);
+}
+
+EIGEN_ALWAYS_INLINE Packet2cf padd(Packet2cf& a, std::complex<float>& b)
+{
+    EIGEN_UNUSED_VARIABLE(b);
+    return a;  // Just for compilation
+}
+
+EIGEN_ALWAYS_INLINE Packet1cd padd(Packet1cd& a, std::complex<double>& b)
+{
+    EIGEN_UNUSED_VARIABLE(b);
+    return a;  // Just for compilation
+}
+
+/** \internal set a scalar from complex location */
+template<typename Scalar, typename ResScalar>
+EIGEN_ALWAYS_INLINE Scalar pset1_realimag(ResScalar& alpha, int which, int conj)
+{
+    return (which) ? ((conj) ? -alpha.real() : alpha.real()) : ((conj) ? -alpha.imag() : alpha.imag());
+}
+
+/** \internal set a vector from complex location */
+template<typename Scalar, typename ResScalar, typename ResPacket, int which>
+EIGEN_ALWAYS_INLINE Packet2cf pset1_complex(std::complex<float>& alpha)
+{
+    Packet2cf ret;
+    ret.v[COMPLEX_DELTA + 0] = pset1_realimag<Scalar, ResScalar>(alpha, (which & 0x01), (which & 0x04));
+    ret.v[COMPLEX_DELTA + 1] = pset1_realimag<Scalar, ResScalar>(alpha, (which & 0x02), (which & 0x08));
+    ret.v[2 - COMPLEX_DELTA] = ret.v[COMPLEX_DELTA + 0];
+    ret.v[3 - COMPLEX_DELTA] = ret.v[COMPLEX_DELTA + 1];
+    return ret;
+}
+
+template<typename Scalar, typename ResScalar, typename ResPacket, int which>
+EIGEN_ALWAYS_INLINE Packet1cd pset1_complex(std::complex<double>& alpha)
+{
+    Packet1cd ret;
+    ret.v[0] = pset1_realimag<Scalar, ResScalar>(alpha, (which & 0x01), (which & 0x04));
+    ret.v[1] = pset1_realimag<Scalar, ResScalar>(alpha, (which & 0x02), (which & 0x08));
+    return ret;
+}
+
+/** \internal zero out a vector for real or complex forms */
+template<typename Packet>
+EIGEN_ALWAYS_INLINE Packet pset_zero()
+{
+    return pset1<Packet>(__UNPACK_TYPE__(Packet)(0));
+}
+
+template<>
+EIGEN_ALWAYS_INLINE Packet2cf pset_zero<Packet2cf>()
+{
+    return Packet2cf(pset1<Packet4f>(float(0)));
+}
+
+template<>
+EIGEN_ALWAYS_INLINE Packet1cd pset_zero<Packet1cd>()
+{
+    return Packet1cd(pset1<Packet2d>(double(0)));
+}
+
+/** \internal initialize a vector from another vector */
+template<typename Packet, typename LhsPacket, typename RhsPacket>
+EIGEN_ALWAYS_INLINE Packet pset_init(Packet& c1)
+{
+    if (GEMV_IS_COMPLEX_COMPLEX) {
+        EIGEN_UNUSED_VARIABLE(c1);
+        return pset_zero<Packet>();
+    }
+    else
+    {
+        return c1;  // Intentionally left uninitialized
+    }
+}
+
+template<typename PResPacket, typename ResPacket, typename ResScalar, typename Scalar>
+struct alpha_store
+{
+    alpha_store<PResPacket, ResPacket, ResScalar, Scalar>(ResScalar& alpha) {
+        separate.r = pset1_complex<Scalar, ResScalar, ResPacket, 0x3>(alpha);
+        separate.i = pset1_complex<Scalar, ResScalar, ResPacket, 0x0>(alpha);
+    }
+    struct ri {
+        PResPacket r;
+        PResPacket i;
+    } separate;
+};
+
+/** \internal multiply and add for complex math */
+template<typename ScalarPacket, typename AlphaData>
+EIGEN_ALWAYS_INLINE ScalarPacket pmadd_complex(ScalarPacket& c0, ScalarPacket& c2, ScalarPacket& c4, AlphaData& b0)
+{
+    return pmadd(c2, b0.separate.i.v, pmadd(c0, b0.separate.r.v, c4));
+}
+
+/** \internal store and madd for complex math */
+template<typename Scalar, typename ScalarPacket, typename PResPacket, typename ResPacket, typename ResScalar, typename AlphaData>
+EIGEN_ALWAYS_INLINE void pstoreu_pmadd_complex(PResPacket& c0, AlphaData& b0, ResScalar* res)
+{
+    PResPacket c2 = pcplxflipconj(c0);
+    if (GEMV_IS_SCALAR) {
+        ScalarPacket c4 = ploadu<ScalarPacket>(reinterpret_cast<Scalar*>(res));
+        ScalarPacket c3 = pmadd_complex<ScalarPacket, AlphaData>(c0.v, c2.v, c4, b0);
+        pstoreu(reinterpret_cast<Scalar*>(res), c3);
+    } else {
+        ScalarPacket c4 = pload_complex<ResPacket>(res);
+        PResPacket c3 = PResPacket(pmadd_complex<ScalarPacket, AlphaData>(c0.v, c2.v, c4, b0));
+        pstoreu(res, c3);
+    }
+}
+
+template<typename ScalarPacket, typename PResPacket, typename ResPacket, typename ResScalar, typename AlphaData, Index ResPacketSize, Index iter2>
+EIGEN_ALWAYS_INLINE void pstoreu_pmadd_complex(PResPacket& c0, PResPacket& c1, AlphaData& b0, ResScalar* res)
+{
+    PResPacket c2 = pcplxflipconj(c0);
+    PResPacket c3 = pcplxflipconj(c1);
+#if !defined(_ARCH_PWR10)
+    ScalarPacket c4 = pload_complex<ResPacket>(res + (iter2 * ResPacketSize));
+    ScalarPacket c5 = pload_complex<ResPacket>(res + ((iter2 + 1) * ResPacketSize));
+    PResPacket c6 = PResPacket(pmadd_complex<ScalarPacket, AlphaData>(c0.v, c2.v, c4, b0));
+    PResPacket c7 = PResPacket(pmadd_complex<ScalarPacket, AlphaData>(c1.v, c3.v, c5, b0));
+    pstoreu(res + (iter2 * ResPacketSize), c6);
+    pstoreu(res + ((iter2 + 1) * ResPacketSize), c7);
+#else
+    __vector_pair a = *reinterpret_cast<__vector_pair *>(res + (iter2 * ResPacketSize));
+#if EIGEN_COMP_LLVM
+    PResPacket c6[2];
+    __builtin_vsx_disassemble_pair(reinterpret_cast<void*>(c6), &a);
+    c6[0] = PResPacket(pmadd_complex<ScalarPacket, AlphaData>(c0.v, c2.v, c6[0].v, b0));
+    c6[1] = PResPacket(pmadd_complex<ScalarPacket, AlphaData>(c1.v, c3.v, c6[1].v, b0));
+    GEMV_BUILDPAIR_MMA(a, c6[0].v, c6[1].v);
+#else
+    if (GEMV_IS_COMPLEX_FLOAT) {
+        __asm__ ("xvmaddasp %L0,%x1,%x2\n\txvmaddasp %0,%x1,%x3" : "+&d" (a) : "wa" (b0.separate.r.v), "wa" (c0.v), "wa" (c1.v));
+        __asm__ ("xvmaddasp %L0,%x1,%x2\n\txvmaddasp %0,%x1,%x3" : "+&d" (a) : "wa" (b0.separate.i.v), "wa" (c2.v), "wa" (c3.v));
+    } else {
+        __asm__ ("xvmaddadp %L0,%x1,%x2\n\txvmaddadp %0,%x1,%x3" : "+&d" (a) : "wa" (b0.separate.r.v), "wa" (c0.v), "wa" (c1.v));
+        __asm__ ("xvmaddadp %L0,%x1,%x2\n\txvmaddadp %0,%x1,%x3" : "+&d" (a) : "wa" (b0.separate.i.v), "wa" (c2.v), "wa" (c3.v));
+    }
+#endif
+    *reinterpret_cast<__vector_pair *>(res + (iter2 * ResPacketSize)) = a;
+#endif
+}
+
+/** \internal load lhs packet */
+template<typename Scalar, typename LhsScalar, typename LhsMapper, typename LhsPacket>
+EIGEN_ALWAYS_INLINE LhsPacket loadLhsPacket(LhsMapper& lhs, Index i, Index j)
+{
+    if (sizeof(Scalar) == sizeof(LhsScalar)) {
+        const LhsScalar& src = lhs(i + 0, j);
+        return LhsPacket(pload_real_full(const_cast<LhsScalar*>(&src)));
+    }
+    return lhs.template load<LhsPacket, Unaligned>(i + 0, j);
+}
+
+/** \internal madd for complex times complex */
+template<typename ComplexPacket, typename RealPacket, bool ConjugateLhs, bool ConjugateRhs, bool Negate>
+EIGEN_ALWAYS_INLINE RealPacket pmadd_complex_complex(RealPacket& a, RealPacket& b, RealPacket& c)
+{
+    if (ConjugateLhs && ConjugateRhs) {
+        return vec_madd(a, pconj2(ComplexPacket(b)).v, c);
+    }
+    else if (Negate && !ConjugateLhs && ConjugateRhs) {
+        return vec_nmsub(a, b, c);
+    }
+    else {
+        return vec_madd(a, b, c);
+    }
+}
+
+/** \internal madd for complex times real */
+template<typename ComplexPacket, typename RealPacket, bool Conjugate>
+EIGEN_ALWAYS_INLINE RealPacket pmadd_complex_real(RealPacket& a, RealPacket& b, RealPacket& c)
+{
+    if (Conjugate) {
+        return vec_madd(a, pconj2(ComplexPacket(b)).v, c);
+    }
+    else {
+        return vec_madd(a, b, c);
+    }
+}
+
+template<typename LhsPacket, typename RhsScalar, typename RhsPacket, typename PResPacket, bool ConjugateLhs, bool ConjugateRhs, int StorageOrder>
+EIGEN_ALWAYS_INLINE void gemv_mult_generic(LhsPacket& a0, RhsScalar* b, PResPacket& c0)
+{
+    conj_helper<LhsPacket, RhsPacket, ConjugateLhs, ConjugateRhs> pcj;
+    RhsPacket b0;
+    if (StorageOrder == ColMajor) {
+        b0 = pset1<RhsPacket>(*b);
+    }
+    else {
+        b0 = ploadu<RhsPacket>(b);
+    }
+    c0 = pcj.pmadd(a0, b0, c0);
+}
+
+/** \internal core multiply operation for vectors - complex times complex */
+template<typename ScalarPacket, typename LhsPacket, typename RhsScalar, typename RhsPacket, typename PResPacket, typename ResPacket, bool ConjugateLhs, bool ConjugateRhs, int StorageOrder>
+EIGEN_ALWAYS_INLINE void gemv_mult_complex_complex(LhsPacket& a0, RhsScalar* b, PResPacket& c0, ResPacket& c1)
+{
+    ScalarPacket br, bi;
+    if (StorageOrder == ColMajor) {
+        pload_realimag<RhsScalar>(b, br, bi);
+    }
+    else {
+        pload_realimag_row<RhsScalar>(b, br, bi);
+    }
+    if (ConjugateLhs && !ConjugateRhs) a0 = pconj2(a0);
+    LhsPacket a1 = pcplxflipconj(a0);
+    ScalarPacket cr = pmadd_complex_complex<LhsPacket, ScalarPacket, ConjugateLhs, ConjugateRhs, false>(a0.v, br, c0.v);
+    ScalarPacket ci = pmadd_complex_complex<LhsPacket, ScalarPacket, ConjugateLhs, ConjugateRhs, true>(a1.v, bi, c1.v);
+    c1 = ResPacket(ci);
+    c0 = PResPacket(cr);
+}
+
+/** \internal core multiply operation for vectors - real times complex */
+template<typename ScalarPacket, typename LhsPacket, typename RhsScalar, typename RhsPacket, typename PResPacket, typename ResPacket, bool ConjugateLhs, bool ConjugateRhs, int StorageOrder>
+EIGEN_ALWAYS_INLINE void gemv_mult_real_complex(LhsPacket& a0, RhsScalar* b, PResPacket& c0)
+{
+    ScalarPacket b0;
+    if (StorageOrder == ColMajor) {
+        b0 = pload_complex_full(b);
+    }
+    else {
+        b0 = pload_complex_full_row(b);
+    }
+    ScalarPacket cri = pmadd_complex_real<PResPacket, ScalarPacket, ConjugateRhs>(a0, b0, c0.v);
+    c0 = PResPacket(cri);
+}
+
+/** \internal core multiply operation for vectors - complex times real */
+template<typename ScalarPacket, typename LhsPacket, typename RhsScalar, typename RhsPacket, typename PResPacket, typename ResPacket, bool ConjugateLhs, bool ConjugateRhs, int StorageOrder>
+EIGEN_ALWAYS_INLINE void gemv_mult_complex_real(LhsPacket& a0, RhsScalar* b, PResPacket& c0)
+{
+    ScalarPacket a1 = pload_complex<ResPacket>(&a0);
+    ScalarPacket b0;
+    if (StorageOrder == ColMajor) {
+        b0 = pload_real(b);
+    }
+    else {
+        b0 = pload_real_row<ResPacket>(b);
+    }
+    ScalarPacket cri = pmadd_complex_real<PResPacket, ScalarPacket, ConjugateLhs>(a1, b0, c0.v);
+    c0 = PResPacket(cri);
+}
+
+#define GEMV_MULT_COMPLEX_COMPLEX(LhsType, RhsType, ResType) \
+template<typename ScalarPacket, typename LhsPacket, typename RhsScalar, typename RhsPacket, typename PResPacket, typename ResPacket, bool ConjugateLhs, bool ConjugateRhs, int StorageOrder> \
+EIGEN_ALWAYS_INLINE void gemv_mult_complex(LhsType& a0, RhsType* b, ResType& c0, ResType& c1) \
+{ \
+    gemv_mult_complex_complex<ScalarPacket, LhsPacket, RhsScalar, RhsPacket, PResPacket, ResPacket, ConjugateLhs, ConjugateRhs, StorageOrder>(a0, b, c0, c1); \
+}
+
+GEMV_MULT_COMPLEX_COMPLEX(Packet2cf, std::complex<float>,  Packet2cf)
+GEMV_MULT_COMPLEX_COMPLEX(Packet1cd, std::complex<double>, Packet1cd)
+
+#define GEMV_MULT_REAL_COMPLEX(LhsType, RhsType, ResType) \
+template<typename ScalarPacket, typename LhsPacket, typename RhsScalar, typename RhsPacket, typename PResPacket, typename ResPacket, bool ConjugateLhs, bool ConjugateRhs, int StorageOrder> \
+EIGEN_ALWAYS_INLINE void gemv_mult_complex(LhsType& a0, RhsType* b, ResType& c0, RhsType&) \
+{ \
+    gemv_mult_real_complex<ScalarPacket, LhsPacket, RhsScalar, RhsPacket, PResPacket, ResPacket, ConjugateLhs, ConjugateRhs, StorageOrder>(a0, b, c0); \
+}
+
+GEMV_MULT_REAL_COMPLEX(float,    std::complex<float>,  Packet2cf)
+GEMV_MULT_REAL_COMPLEX(double,   std::complex<double>, Packet1cd)
+GEMV_MULT_REAL_COMPLEX(Packet4f, std::complex<float>,  Packet2cf)
+GEMV_MULT_REAL_COMPLEX(Packet2d, std::complex<double>, Packet1cd)
+
+#define GEMV_MULT_COMPLEX_REAL(LhsType, RhsType, ResType1, ResType2) \
+template<typename ScalarPacket, typename LhsPacket, typename RhsScalar, typename RhsPacket, typename PResPacket, typename ResPacket, bool ConjugateLhs, bool ConjugateRhs, int StorageOrder> \
+EIGEN_ALWAYS_INLINE void gemv_mult_complex(LhsType& a0, RhsType* b, ResType1& c0, ResType2&) \
+{ \
+    gemv_mult_complex_real<ScalarPacket, LhsPacket, RhsScalar, RhsPacket, PResPacket, ResPacket, ConjugateLhs, ConjugateRhs, StorageOrder>(a0, b, c0); \
+}
+
+GEMV_MULT_COMPLEX_REAL(Packet2cf,             float, Packet2cf, std::complex<float>)
+GEMV_MULT_COMPLEX_REAL(Packet1cd,            double, Packet1cd, std::complex<double>)
+GEMV_MULT_COMPLEX_REAL(std::complex<float>,   float, Packet2cf, std::complex<float>)
+GEMV_MULT_COMPLEX_REAL(std::complex<double>, double, Packet1cd, std::complex<double>)
+
+#ifdef USE_GEMV_MMA
+/** \internal convert packet to real form */
+template<typename T>
+EIGEN_ALWAYS_INLINE T convertReal(T a)
+{
+    return a;
+}
+
+EIGEN_ALWAYS_INLINE Packet4f convertReal(Packet2cf a)
+{
+    return a.v;
+}
+
+EIGEN_ALWAYS_INLINE Packet2d convertReal(Packet1cd a)
+{
+    return a.v;
+}
+
+/** \internal convert packet to complex form */
+template<typename T>
+EIGEN_ALWAYS_INLINE T convertComplex(T a)
+{
+    return a;
+}
+
+EIGEN_ALWAYS_INLINE Packet2cf convertComplex(Packet4f a)
+{
+    return Packet2cf(a);
+}
+
+EIGEN_ALWAYS_INLINE Packet1cd convertComplex(Packet2d a)
+{
+    return Packet1cd(a);
+}
+
+/** \internal load a vector from a complex location (for MMA version) */
+template<typename ScalarPacket, typename LhsPacket, typename SLhsPacket, typename ResPacket>
+EIGEN_ALWAYS_INLINE void pload_complex_MMA(SLhsPacket& a)
+{
+    a = SLhsPacket(pload_complex<ResPacket>(&a));
+}
+
+template<typename ScalarPacket, typename LhsPacket, typename SLhsPacket, typename ResPacket>
+EIGEN_ALWAYS_INLINE void pload_complex_MMA(__vector_pair&)
+{
+    // Pass thru
+}
+
+/** \internal perform a matrix multiply and accumulate (positive and negative) of packet a and packet b */
+template<typename LhsPacket, typename RhsPacket, bool NegativeAccumulate>
+EIGEN_ALWAYS_INLINE void pger_vecMMA(__vector_quad* acc, RhsPacket& a, LhsPacket& b)
+{
+    if (NegativeAccumulate)
+    {
+        __builtin_mma_xvf32gernp(acc, (__vector unsigned char)a, (__vector unsigned char)b);
+    }
+    else {
+        __builtin_mma_xvf32gerpp(acc, (__vector unsigned char)a, (__vector unsigned char)b);
+    }
+}
+
+/** \internal perform a matrix multiply and accumulate (positive and negative) of vector_pair a and packet b */
+template<typename LhsPacket, typename RhsPacket, bool NegativeAccumulate>
+EIGEN_ALWAYS_INLINE void pger_vecMMA(__vector_quad* acc, __vector_pair& a, Packet2d& b)
+{
+    if (NegativeAccumulate)
+    {
+        __builtin_mma_xvf64gernp(acc, (__vector_pair)a, (__vector unsigned char)b);
+    }
+    else {
+        __builtin_mma_xvf64gerpp(acc, (__vector_pair)a, (__vector unsigned char)b);
+    }
+}
+
+template<typename LhsPacket, typename RhsPacket, bool NegativeAccumulate>
+EIGEN_ALWAYS_INLINE void pger_vecMMA(__vector_quad*, __vector_pair&, Packet4f&)
+{
+    // Just for compilation
+}
+
+/** \internal madd for complex times complex (MMA version) */
+template<typename RealPacket, typename LhsPacket, bool ConjugateLhs, bool ConjugateRhs, bool Negate>
+EIGEN_ALWAYS_INLINE void pmadd_complex_complex_MMA(LhsPacket& a, RealPacket& b, __vector_quad* c)
+{
+    if (ConjugateLhs && ConjugateRhs) {
+        RealPacket b2 = pconj2(convertComplex(b)).v;
+        return pger_vecMMA<RealPacket, RealPacket, false>(c, b2, a.v);
+    }
+    else if (Negate && !ConjugateLhs && ConjugateRhs) {
+        return pger_vecMMA<RealPacket, RealPacket, true>(c, b, a.v);
+    }
+    else {
+        return pger_vecMMA<RealPacket, RealPacket, false>(c, b, a.v);
+    }
+}
+
+template<typename RealPacket, typename LhsPacket, bool ConjugateLhs, bool ConjugateRhs, bool Negate>
+EIGEN_ALWAYS_INLINE void pmadd_complex_complex_MMA(__vector_pair& a, RealPacket& b, __vector_quad* c)
+{
+    if (ConjugateLhs && ConjugateRhs) {
+        RealPacket b2 = pconj2(convertComplex(b)).v;
+        return pger_vecMMA<RealPacket, __vector_pair, false>(c, a, b2);
+    }
+    else if (Negate && !ConjugateLhs && ConjugateRhs) {
+        return pger_vecMMA<RealPacket, __vector_pair, true>(c, a, b);
+    }
+    else {
+        return pger_vecMMA<RealPacket, __vector_pair, false>(c, a, b);
+    }
+}
+
+/** \internal madd for complex times real (MMA version) */
+template<typename RealPacket, typename LhsPacket, bool Conjugate, int StorageOrder>
+EIGEN_ALWAYS_INLINE void pmadd_complex_real_MMA(LhsPacket& a, RealPacket& b, __vector_quad* c)
+{
+    RealPacket a2 = convertReal(a);
+    if (Conjugate) {
+        RealPacket b2 = pconj2(convertComplex(b)).v;
+        if (StorageOrder == ColMajor) {
+            return pger_vecMMA<RealPacket, RealPacket, false>(c, b2, a2);
+        } else {
+            return pger_vecMMA<RealPacket, RealPacket, false>(c, a2, b2);
+        }
+    }
+    else {
+        if (StorageOrder == ColMajor) {
+            return pger_vecMMA<RealPacket, RealPacket, false>(c, b, a2);
+        } else {
+            return pger_vecMMA<RealPacket, RealPacket, false>(c, a2, b);
+        }
+    }
+}
+
+/** \internal madd for real times complex (MMA version) */
+template<typename RealPacket, typename LhsPacket, bool Conjugate, int StorageOrder>
+EIGEN_ALWAYS_INLINE void pmadd_complex_real_MMA(__vector_pair& a, RealPacket& b, __vector_quad* c)
+{
+    if (Conjugate) {
+        RealPacket b2 = pconj2(convertComplex(b)).v;
+        return pger_vecMMA<RealPacket, __vector_pair, false>(c, a, b2);
+    }
+    else {
+        return pger_vecMMA<RealPacket, __vector_pair, false>(c, a, b);
+    }
+}
+
+/** \internal core multiply operation for vectors (MMA version) - complex times complex */
+template<typename ScalarPacket, typename LhsPacket, typename SLhsPacket, typename RhsScalar, typename ResPacket, bool ConjugateLhs, bool ConjugateRhs, int StorageOrder>
+EIGEN_ALWAYS_INLINE void gemv_mult_complex_complex_MMA(SLhsPacket& a0, RhsScalar* b, __vector_quad* c0)
+{
+    ScalarPacket b0;
+    if (StorageOrder == ColMajor) {
+        b0 = pload_realimag_combine(b);
+    } else {
+        b0 = pload_realimag_combine_row(b);
+    }
+    pmadd_complex_complex_MMA<ScalarPacket, LhsPacket, ConjugateLhs, ConjugateRhs, false>(a0, b0, c0);
+}
+
+/** \internal core multiply operation for vectors (MMA version) - complex times real */
+template<typename ScalarPacket, typename LhsPacket, typename SLhsPacket, typename RhsScalar, typename ResPacket, bool ConjugateLhs, bool ConjugateRhs, int StorageOrder>
+EIGEN_ALWAYS_INLINE void gemv_mult_complex_real_MMA(SLhsPacket& a0, RhsScalar* b, __vector_quad* c0)
+{
+    pload_complex_MMA<ScalarPacket, LhsPacket, SLhsPacket, ResPacket>(a0);
+    ScalarPacket b0;
+    if (StorageOrder == ColMajor) {
+        b0 = pload_real(b);
+    }
+    else {
+        b0 = pload_real_row<ResPacket>(b);
+    }
+    pmadd_complex_real_MMA<ScalarPacket, LhsPacket, ConjugateLhs, ColMajor>(a0, b0, c0);
+}
+
+/** \internal core multiply operation for vectors (MMA version) - real times complex */
+template<typename ScalarPacket, typename LhsPacket, typename SLhsPacket, typename RhsScalar, typename ResPacket, bool ConjugateLhs, bool ConjugateRhs, int StorageOrder>
+EIGEN_ALWAYS_INLINE void gemv_mult_real_complex_MMA(SLhsPacket& a0, RhsScalar* b, __vector_quad* c0)
+{
+    ScalarPacket b0;
+    if (StorageOrder == ColMajor) {
+        b0 = pload_complex_full(b);
+    }
+    else {
+        b0 = pload_complex_full_row(b);
+    }
+    pmadd_complex_real_MMA<ScalarPacket, LhsPacket, ConjugateRhs, (sizeof(RhsScalar) == sizeof(std::complex<float>)) ? StorageOrder : ColMajor>(a0, b0, c0);
+}
+
+#define GEMV_MULT_COMPLEX_COMPLEX_MMA(LhsType, RhsType) \
+template<typename ScalarPacket, typename LhsScalar, typename LhsPacket, typename SLhsPacket, typename RhsScalar, typename RhsPacket, typename ResPacket, bool ConjugateLhs, bool ConjugateRhs, int StorageOrder> \
+EIGEN_ALWAYS_INLINE void gemv_mult_complex_MMA(LhsType& a0, RhsType* b, __vector_quad* c0) \
+{ \
+    gemv_mult_complex_complex_MMA<ScalarPacket, LhsPacket, SLhsPacket, RhsScalar, ResPacket, ConjugateLhs, ConjugateRhs, StorageOrder>(a0, b, c0); \
+}
+
+GEMV_MULT_COMPLEX_COMPLEX_MMA(Packet2cf,     std::complex<float>)
+GEMV_MULT_COMPLEX_COMPLEX_MMA(__vector_pair, std::complex<float>)
+GEMV_MULT_COMPLEX_COMPLEX_MMA(Packet1cd,     std::complex<double>)
+
+/** \internal core multiply operation for vectors (MMA version) - complex times complex */
+template<typename ScalarPacket, typename LhsScalar, typename LhsPacket, typename SLhsPacket, typename RhsScalar, typename RhsPacket, typename ResPacket, bool ConjugateLhs, bool ConjugateRhs, int StorageOrder>
+EIGEN_ALWAYS_INLINE void gemv_mult_complex_MMA(__vector_pair& a0, std::complex<double>* b, __vector_quad* c0)
+{
+    if (sizeof(LhsScalar) == 16) {
+        gemv_mult_complex_complex_MMA<ScalarPacket, LhsPacket, SLhsPacket, RhsScalar, ResPacket, ConjugateLhs, ConjugateRhs, StorageOrder>(a0, b, c0);
+    }
+    else {
+        gemv_mult_real_complex_MMA<ScalarPacket, LhsPacket, SLhsPacket, RhsScalar, ResPacket, ConjugateLhs, ConjugateRhs, StorageOrder>(a0, b, c0);
+    }
+}
+
+#define GEMV_MULT_REAL_COMPLEX_MMA(LhsType, RhsType) \
+template<typename ScalarPacket, typename LhsScalar, typename LhsPacket, typename SLhsPacket, typename RhsScalar, typename RhsPacket, typename ResPacket, bool ConjugateLhs, bool ConjugateRhs, int StorageOrder> \
+EIGEN_ALWAYS_INLINE void gemv_mult_complex_MMA(LhsType& a0, RhsType* b, __vector_quad* c0) \
+{ \
+    gemv_mult_real_complex_MMA<ScalarPacket, LhsPacket, SLhsPacket, RhsScalar, ResPacket, ConjugateLhs, ConjugateRhs, StorageOrder>(a0, b, c0); \
+}
+
+GEMV_MULT_REAL_COMPLEX_MMA(Packet4f, std::complex<float>)
+GEMV_MULT_REAL_COMPLEX_MMA(Packet2d, std::complex<double>)
+
+#define GEMV_MULT_COMPLEX_REAL_MMA(LhsType, RhsType) \
+template<typename ScalarPacket, typename LhsScalar, typename LhsPacket, typename SLhsPacket, typename RhsScalar, typename RhsPacket, typename ResPacket, bool ConjugateLhs, bool ConjugateRhs, int StorageOrder> \
+EIGEN_ALWAYS_INLINE void gemv_mult_complex_MMA(LhsType& a0, RhsType* b, __vector_quad* c0) \
+{ \
+    gemv_mult_complex_real_MMA<ScalarPacket, LhsPacket, SLhsPacket, RhsScalar, ResPacket, ConjugateLhs, ConjugateRhs, StorageOrder>(a0, b, c0); \
+}
+
+GEMV_MULT_COMPLEX_REAL_MMA(Packet2cf,     float)
+GEMV_MULT_COMPLEX_REAL_MMA(Packet1cd,     double)
+GEMV_MULT_COMPLEX_REAL_MMA(__vector_pair, float)
+GEMV_MULT_COMPLEX_REAL_MMA(__vector_pair, double)
+
+/** \internal disassemble MMA accumulator results into packets */
+template <typename Scalar, typename ScalarPacket, typename LhsPacket, typename RhsPacket, bool ConjugateLhs, bool ConjugateRhs>
+EIGEN_ALWAYS_INLINE void disassembleResults2(__vector_quad* c0, PacketBlock<ScalarPacket, 4>& result0)
+{
+    __builtin_mma_disassemble_acc(&result0.packet, c0);
+    if (sizeof(LhsPacket) == 16) {
+        if (sizeof(RhsPacket) == 16) {
+            ScalarPacket tmp0, tmp2;
+            tmp2 = vec_mergeh(result0.packet[2], result0.packet[3]);
+            tmp0 = vec_mergeh(result0.packet[0], result0.packet[1]);
+            result0.packet[3] = vec_mergel(result0.packet[3], result0.packet[2]);
+            result0.packet[1] = vec_mergel(result0.packet[1], result0.packet[0]);
+            result0.packet[2] = tmp2;
+            result0.packet[0] = tmp0;
+
+            if (ConjugateLhs) {
+                result0.packet[0] = pconj2(convertComplex(result0.packet[0])).v;
+                result0.packet[2] = pconj2(convertComplex(result0.packet[2])).v;
+            } else if (ConjugateRhs) {
+                result0.packet[1] = pconj2(convertComplex(result0.packet[1])).v;
+                result0.packet[3] = pconj2(convertComplex(result0.packet[3])).v;
+            } else {
+                result0.packet[1] = pconjinv(convertComplex(result0.packet[1])).v;
+                result0.packet[3] = pconjinv(convertComplex(result0.packet[3])).v;
+            }
+            result0.packet[0] = vec_add(result0.packet[0], result0.packet[1]);
+            result0.packet[2] = vec_add(result0.packet[2], result0.packet[3]);
+        } else {
+            result0.packet[0][1] = result0.packet[1][1];
+            result0.packet[2][1] = result0.packet[3][1];
+        }
+    }
+}
+
+template <typename Scalar, typename ScalarPacket, typename LhsPacket, typename RhsPacket, bool ConjugateLhs, bool ConjugateRhs>
+EIGEN_ALWAYS_INLINE void disassembleResults4(__vector_quad* c0, PacketBlock<ScalarPacket, 4>& result0)
+{
+    __builtin_mma_disassemble_acc(&result0.packet, c0);
+    if (GEMV_IS_COMPLEX_COMPLEX) {
+        if (ConjugateLhs) {
+            result0.packet[0] = pconj2(convertComplex(result0.packet[0])).v;
+            result0.packet[1] = pcplxflip2(convertComplex(result0.packet[1])).v;
+        } else {
+            if (ConjugateRhs) {
+                result0.packet[1] = pcplxconjflip(convertComplex(result0.packet[1])).v;
+            } else {
+                result0.packet[1] = pcplxflipconj(convertComplex(result0.packet[1])).v;
+            }
+        }
+        result0.packet[0] = vec_add(result0.packet[0], result0.packet[1]);
+    } else if (sizeof(LhsPacket) == sizeof(std::complex<float>)) {
+        if (ConjugateLhs) {
+            result0.packet[0] = pconj2(convertComplex(result0.packet[0])).v;
+        }
+    } else {
+        result0.packet[0] = vec_mergee(result0.packet[0], result0.packet[1]);
+    }
+}
+
+template <typename Scalar, typename ScalarPacket, int ResPacketSize, typename LhsPacket, typename RhsPacket, bool ConjugateLhs, bool ConjugateRhs>
+EIGEN_ALWAYS_INLINE void disassembleResults(__vector_quad* c0, PacketBlock<ScalarPacket, 4>& result0)
+{
+    if (!GEMV_IS_COMPLEX_FLOAT) {
+        disassembleResults2<Scalar, ScalarPacket, LhsPacket, RhsPacket, ConjugateLhs, ConjugateRhs>(c0, result0);
+    } else {
+        disassembleResults4<Scalar, ScalarPacket, LhsPacket, RhsPacket, ConjugateLhs, ConjugateRhs>(c0, result0);
+    }
+}
+#endif
+
+#define GEMV_GETN_COMPLEX(N) (((N) * ResPacketSize) >> 1)
+
+#define GEMV_LOADPACKET_COL_COMPLEX(iter) \
+  loadLhsPacket<Scalar, LhsScalar, LhsMapper, PLhsPacket>(lhs, i + ((iter) * ResPacketSize), j)
+
+#define GEMV_LOADPACKET_COL_COMPLEX_DATA(iter) \
+  convertReal(GEMV_LOADPACKET_COL_COMPLEX(iter))
+
+#ifdef USE_GEMV_MMA
+#define GEMV_INIT_COL_COMPLEX_MMA(iter, N) \
+  if (GEMV_GETN_COMPLEX(N) > iter) { \
+    __builtin_mma_xxsetaccz(&e0##iter); \
+  }
+
+#if EIGEN_COMP_LLVM
+#define GEMV_LOADPAIR_COL_COMPLEX_MMA(iter1, iter2) \
+  GEMV_BUILDPAIR_MMA(a##iter1, GEMV_LOADPACKET_COL_COMPLEX_DATA(iter2), GEMV_LOADPACKET_COL_COMPLEX_DATA((iter2) + 1)); \
+  EIGEN_UNUSED_VARIABLE(f##iter1);
+#else
+#define GEMV_LOADPAIR_COL_COMPLEX_MMA(iter1, iter2) \
+  if (sizeof(LhsPacket) == 16) { \
+    const LhsScalar& src = lhs(i + ((32 * iter1) / sizeof(LhsScalar)), j); \
+    a##iter1 = *reinterpret_cast<__vector_pair *>(const_cast<LhsScalar *>(&src)); \
+    EIGEN_UNUSED_VARIABLE(f##iter1); \
+  } else { \
+    f##iter1 = lhs.template load<PLhsPacket, Unaligned>(i + ((iter2) * ResPacketSize), j); \
+    GEMV_BUILDPAIR_MMA(a##iter1, vec_splat(convertReal(f##iter1), 0), vec_splat(convertReal(f##iter1), 1)); \
+  }
+#endif
+
+#define GEMV_LOAD1_COL_COMPLEX_MMA(iter, N) \
+  if (GEMV_GETN_COMPLEX(N) > iter) { \
+    if (GEMV_IS_COMPLEX_FLOAT) { \
+      f##iter = GEMV_LOADPACKET_COL_COMPLEX(iter); \
+      EIGEN_UNUSED_VARIABLE(a##iter); \
+    } else { \
+      GEMV_LOADPAIR_COL_COMPLEX_MMA(iter, iter << 1) \
+    } \
+  } else { \
+    EIGEN_UNUSED_VARIABLE(a##iter); \
+    EIGEN_UNUSED_VARIABLE(f##iter); \
+  }
+
+#define GEMV_WORK1_COL_COMPLEX_MMA(iter, N) \
+  if (GEMV_GETN_COMPLEX(N) > iter) { \
+    if (GEMV_IS_COMPLEX_FLOAT) { \
+      gemv_mult_complex_MMA<ScalarPacket, LhsScalar, PLhsPacket, PLhsPacket, RhsScalar, RhsPacket, ResPacket, ConjugateLhs, ConjugateRhs, ColMajor>(f##iter, b, &e0##iter); \
+    } else { \
+      gemv_mult_complex_MMA<ScalarPacket, LhsScalar, PLhsPacket, __vector_pair, RhsScalar, RhsPacket, ResPacket, ConjugateLhs, ConjugateRhs, ColMajor>(a##iter, b, &e0##iter); \
+    } \
+  }
+
+#define GEMV_LOADPAIR2_COL_COMPLEX_MMA(iter1, iter2) \
+  GEMV_BUILDPAIR_MMA(a##iter1, GEMV_LOADPACKET_COL_COMPLEX_DATA(iter2), GEMV_LOADPACKET_COL_COMPLEX_DATA((iter2) + 1));
+
+#define GEMV_LOAD2_COL_COMPLEX_MMA(iter1, iter2, iter3, N) \
+  if (GEMV_GETN_COMPLEX(N) > iter1) { \
+    if (GEMV_IS_COMPLEX_FLOAT) { \
+      GEMV_LOADPAIR2_COL_COMPLEX_MMA(iter2, iter2); \
+      EIGEN_UNUSED_VARIABLE(a##iter3) \
+    } else { \
+      GEMV_LOADPAIR2_COL_COMPLEX_MMA(iter2, iter2 << 1); \
+      GEMV_LOADPAIR2_COL_COMPLEX_MMA(iter3, iter3 << 1); \
+    } \
+  } else { \
+    EIGEN_UNUSED_VARIABLE(a##iter2); \
+    EIGEN_UNUSED_VARIABLE(a##iter3); \
+  } \
+  EIGEN_UNUSED_VARIABLE(f##iter2); \
+  EIGEN_UNUSED_VARIABLE(f##iter3);
+
+#define GEMV_WORK2_COL_COMPLEX_MMA(iter1, iter2, iter3, N) \
+  if (GEMV_GETN_COMPLEX(N) > iter1) { \
+    if (GEMV_IS_COMPLEX_FLOAT) { \
+      PLhsPacket g[2]; \
+      __builtin_vsx_disassemble_pair(reinterpret_cast<void*>(g), &a##iter2); \
+      gemv_mult_complex_MMA<ScalarPacket, LhsScalar, PLhsPacket, PLhsPacket, RhsScalar, RhsPacket, ResPacket, ConjugateLhs, ConjugateRhs, ColMajor>(g[0], b, &e0##iter2); \
+      gemv_mult_complex_MMA<ScalarPacket, LhsScalar, PLhsPacket, PLhsPacket, RhsScalar, RhsPacket, ResPacket, ConjugateLhs, ConjugateRhs, ColMajor>(g[1], b, &e0##iter3); \
+    } else { \
+      gemv_mult_complex_MMA<ScalarPacket, LhsScalar, PLhsPacket, __vector_pair, RhsScalar, RhsPacket, ResPacket, ConjugateLhs, ConjugateRhs, ColMajor>(a##iter2, b, &e0##iter2); \
+      gemv_mult_complex_MMA<ScalarPacket, LhsScalar, PLhsPacket, __vector_pair, RhsScalar, RhsPacket, ResPacket, ConjugateLhs, ConjugateRhs, ColMajor>(a##iter3, b, &e0##iter3); \
+    } \
+  }
+
+#if EIGEN_COMP_LLVM
+#define GEMV_LOAD_COL_COMPLEX_MMA(N) \
+  if (GEMV_GETN_COMPLEX(N) > 1) { \
+    GEMV_UNROLL_HALF(GEMV_LOAD2_COL_COMPLEX_MMA, (N >> 1)) \
+  } else { \
+    GEMV_UNROLL(GEMV_LOAD1_COL_COMPLEX_MMA, N) \
+  }
+
+#define GEMV_WORK_COL_COMPLEX_MMA(N) \
+  if (GEMV_GETN_COMPLEX(N) > 1) { \
+    GEMV_UNROLL_HALF(GEMV_WORK2_COL_COMPLEX_MMA, (N >> 1)) \
+  } else { \
+    GEMV_UNROLL(GEMV_WORK1_COL_COMPLEX_MMA, N) \
+  }
+#else
+#define GEMV_LOAD_COL_COMPLEX_MMA(N) \
+  GEMV_UNROLL(GEMV_LOAD1_COL_COMPLEX_MMA, N)
+
+#define GEMV_WORK_COL_COMPLEX_MMA(N) \
+  GEMV_UNROLL(GEMV_WORK1_COL_COMPLEX_MMA, N)
+#endif
+
+#define GEMV_DISASSEMBLE_COMPLEX_MMA(iter) \
+  disassembleResults<Scalar, ScalarPacket, ResPacketSize, LhsPacket, RhsPacket, ConjugateLhs, ConjugateRhs>(&e0##iter, result0##iter);
+
+#define GEMV_STORE_COL_COMPLEX_MMA(iter, N) \
+  if (GEMV_GETN_COMPLEX(N) > iter) { \
+    GEMV_DISASSEMBLE_COMPLEX_MMA(iter); \
+    c0##iter = PResPacket(result0##iter.packet[0]); \
+    if (GEMV_IS_COMPLEX_FLOAT) { \
+      pstoreu_pmadd_complex<Scalar, ScalarPacket, PResPacket, ResPacket, ResScalar, AlphaData>(c0##iter, alpha_data, res + i + (iter * ResPacketSize)); \
+    } else { \
+      pstoreu_pmadd_complex<Scalar, ScalarPacket, PResPacket, ResPacket, ResScalar, AlphaData>(c0##iter, alpha_data, res + i + ((iter << 1) * ResPacketSize)); \
+      c0##iter = PResPacket(result0##iter.packet[2]); \
+      pstoreu_pmadd_complex<Scalar, ScalarPacket, PResPacket, ResPacket, ResScalar, AlphaData>(c0##iter, alpha_data, res + i + (((iter << 1) + 1) * ResPacketSize)); \
+    } \
+  }
+
+#define GEMV_STORE2_COL_COMPLEX_MMA(iter1, iter2, iter3, N) \
+  if (GEMV_GETN_COMPLEX(N) > iter1) { \
+    GEMV_DISASSEMBLE_COMPLEX_MMA(iter2); \
+    GEMV_DISASSEMBLE_COMPLEX_MMA(iter3); \
+    c0##iter2 = PResPacket(result0##iter2.packet[0]); \
+    if (GEMV_IS_COMPLEX_FLOAT) { \
+      c0##iter3 = PResPacket(result0##iter3.packet[0]); \
+      pstoreu_pmadd_complex<ScalarPacket, PResPacket, ResPacket, ResScalar, AlphaData, ResPacketSize, iter2>(c0##iter2, c0##iter3, alpha_data, res + i); \
+    } else { \
+      c0##iter3 = PResPacket(result0##iter2.packet[2]); \
+      pstoreu_pmadd_complex<ScalarPacket, PResPacket, ResPacket, ResScalar, AlphaData, ResPacketSize, iter2 << 1>(c0##iter2, c0##iter3, alpha_data, res + i); \
+      c0##iter2 = PResPacket(result0##iter3.packet[0]); \
+      c0##iter3 = PResPacket(result0##iter3.packet[2]); \
+      pstoreu_pmadd_complex<ScalarPacket, PResPacket, ResPacket, ResScalar, AlphaData, ResPacketSize, iter3 << 1>(c0##iter2, c0##iter3, alpha_data, res + i); \
+    } \
+  }
+
+#define GEMV_PROCESS_COL_COMPLEX_ONE_MMA(N) \
+  GEMV_UNROLL(GEMV_INIT_COL_COMPLEX_MMA, N) \
+  Index j = j2; \
+  do { \
+    const RhsScalar& b1 = rhs2(j, 0); \
+    RhsScalar* b = const_cast<RhsScalar *>(&b1); \
+    GEMV_UNROLL(GEMV_PREFETCH, N) \
+    GEMV_LOAD_COL_COMPLEX_MMA(N) \
+    GEMV_WORK_COL_COMPLEX_MMA(N) \
+  } while (++j < jend); \
+  if (GEMV_GETN(N) <= 2) { \
+    GEMV_UNROLL(GEMV_STORE_COL_COMPLEX_MMA, N) \
+  } else { \
+    GEMV_UNROLL_HALF(GEMV_STORE2_COL_COMPLEX_MMA, (N >> 1)) \
+  } \
+  i += (ResPacketSize * N);
+#endif
+
+#define GEMV_INIT_COMPLEX(iter, N) \
+  if (N > iter) { \
+    c0##iter = pset_zero<PResPacket>(); \
+    c1##iter = pset_init<ResPacket, LhsPacket, RhsPacket>(c1##iter); \
+  } else { \
+    EIGEN_UNUSED_VARIABLE(c0##iter); \
+    EIGEN_UNUSED_VARIABLE(c1##iter); \
+  }
+
+#define GEMV_WORK_COL_COMPLEX(iter, N) \
+  if (N > iter) { \
+    f##iter = GEMV_LOADPACKET_COL_COMPLEX(iter); \
+    gemv_mult_complex<ScalarPacket, PLhsPacket, RhsScalar, RhsPacket, PResPacket, ResPacket, ConjugateLhs, ConjugateRhs, ColMajor>(f##iter, b, c0##iter, c1##iter); \
+  } else { \
+    EIGEN_UNUSED_VARIABLE(f##iter); \
+  }
+
+#define GEMV_STORE_COL_COMPLEX(iter, N) \
+  if (N > iter) { \
+    if (GEMV_IS_COMPLEX_COMPLEX) { \
+      c0##iter = padd(c0##iter, c1##iter); \
+    } \
+    pstoreu_pmadd_complex<Scalar, ScalarPacket, PResPacket, ResPacket, ResScalar, AlphaData>(c0##iter, alpha_data, res + i + (iter * ResPacketSize)); \
+  }
+
+/** \internal main macro for gemv_complex_col - initialize accumulators, multiply and add inputs, and store results */
+#define GEMV_PROCESS_COL_COMPLEX_ONE(N) \
+  GEMV_UNROLL(GEMV_INIT_COMPLEX, N) \
+  Index j = j2; \
+  do { \
+    const RhsScalar& b1 = rhs2(j, 0); \
+    RhsScalar* b = const_cast<RhsScalar *>(&b1); \
+    GEMV_UNROLL(GEMV_PREFETCH, N) \
+    GEMV_UNROLL(GEMV_WORK_COL_COMPLEX, N) \
+  } while (++j < jend); \
+  GEMV_UNROLL(GEMV_STORE_COL_COMPLEX, N) \
+  i += (ResPacketSize * N);
+
+#if defined(USE_GEMV_MMA) && (EIGEN_COMP_LLVM || defined(USE_SLOWER_GEMV_MMA))
+#define USE_GEMV_COL_COMPLEX_MMA
+#endif
+
+#ifdef USE_GEMV_COL_COMPLEX_MMA
+#define GEMV_PROCESS_COL_COMPLEX(N) \
+  GEMV_PROCESS_COL_COMPLEX_ONE_MMA(N)
+#else
+#if defined(USE_GEMV_MMA) && (__GNUC__ > 10)
+#define GEMV_PROCESS_COL_COMPLEX(N) \
+  if (sizeof(Scalar) != sizeof(LhsPacket)) { \
+    GEMV_PROCESS_COL_COMPLEX_ONE_MMA(N) \
+  } else { \
+    GEMV_PROCESS_COL_COMPLEX_ONE(N) \
+  }
+#else
+#define GEMV_PROCESS_COL_COMPLEX(N) \
+  GEMV_PROCESS_COL_COMPLEX_ONE(N)
+#endif
+#endif
+
+template<typename Scalar, typename LhsScalar, typename LhsMapper, bool ConjugateLhs, bool LhsIsReal, typename RhsScalar, typename RhsMapper, bool ConjugateRhs, bool RhsIsReal, typename ResScalar>
+EIGEN_STRONG_INLINE void gemv_complex_col(
+    Index rows, Index cols,
+    const LhsMapper& alhs,
+    const RhsMapper& rhs,
+    ResScalar* res, Index resIncr,
+    ResScalar alpha)
+{
+    typedef gemv_traits<LhsScalar, RhsScalar> Traits;
+
+    typedef typename Traits::LhsPacket LhsPacket;
+    typedef typename Traits::RhsPacket RhsPacket;
+    typedef typename Traits::ResPacket ResPacket;
+
+    typedef typename packet_traits<Scalar>::type ScalarPacket;
+    typedef typename packet_traits<LhsScalar>::type PLhsPacket;
+    typedef typename packet_traits<ResScalar>::type PResPacket;
+    typedef gemv_traits<ResPacket, ResPacket> PTraits;
+
+    EIGEN_UNUSED_VARIABLE(resIncr);
+    eigen_internal_assert(resIncr == 1);
+
+    // The following copy tells the compiler that lhs's attributes are not modified outside this function
+    // This helps GCC to generate proper code.
+    LhsMapper lhs(alhs);
+    RhsMapper rhs2(rhs);
+
+    conj_helper<LhsScalar, RhsScalar, ConjugateLhs, ConjugateRhs> cj;
+
+    const Index lhsStride = lhs.stride();
+    // TODO: for padded aligned inputs, we could enable aligned reads
+    enum {
+        LhsAlignment = Unaligned,
+        ResPacketSize = PTraits::ResPacketSize,
+        LhsPacketSize = PTraits::LhsPacketSize,
+        RhsPacketSize = PTraits::RhsPacketSize,
+    };
+#ifdef EIGEN_POWER_USE_GEMV_PREFETCH
+    const Index prefetch_dist = 64 * LhsPacketSize;
+#endif
+
+#ifndef GCC_ONE_VECTORPAIR_BUG
+    const Index n8 = rows - 8 * ResPacketSize + 1;
+    const Index n4 = rows - 4 * ResPacketSize + 1;
+    const Index n2 = rows - 2 * ResPacketSize + 1;
+#endif
+    const Index n1 = rows - 1 * ResPacketSize + 1;
+
+    // TODO: improve the following heuristic:
+    const Index block_cols = cols < 128 ? cols : (lhsStride * sizeof(LhsScalar) < 16000 ? 16 : 8);
+
+    typedef alpha_store<PResPacket, ResPacket, ResScalar, Scalar> AlphaData;
+    AlphaData alpha_data(alpha);
+
+    for (Index j2 = 0; j2 < cols; j2 += block_cols)
+    {
+        Index jend = numext::mini(j2 + block_cols, cols);
+        Index i = 0;
+        PResPacket c00, c01, c02, c03, c04, c05, c06, c07;
+        ResPacket c10, c11, c12, c13, c14, c15, c16, c17;
+        PLhsPacket f0, f1, f2, f3, f4, f5, f6, f7;
+#ifdef USE_GEMV_MMA
+        __vector_quad e00, e01, e02, e03, e04, e05, e06, e07;
+        __vector_pair a0, a1, a2, a3, a4, a5, a6, a7;
+        PacketBlock<ScalarPacket, 4> result00, result01, result02, result03, result04, result05, result06, result07;
+        GEMV_UNUSED(8, e0)
+        GEMV_UNUSED(8, result0)
+        GEMV_UNUSED(8, a)
+        GEMV_UNUSED(8, f)
+#if !defined(GCC_ONE_VECTORPAIR_BUG) && defined(USE_GEMV_COL_COMPLEX_MMA)
+        if (GEMV_IS_COMPLEX_COMPLEX || !GEMV_IS_COMPLEX_FLOAT)
+#endif
+#endif
+#ifndef GCC_ONE_VECTORPAIR_BUG
+        {
+            while (i < n8)
+            {
+                GEMV_PROCESS_COL_COMPLEX(8)
+            }
+        }
+        while (i < n4)
+        {
+            GEMV_PROCESS_COL_COMPLEX(4)
+        }
+        if (i < n2)
+        {
+            GEMV_PROCESS_COL_COMPLEX(2)
+        }
+        if (i < n1)
+#else
+        while (i < n1)
+#endif
+        {
+            GEMV_PROCESS_COL_COMPLEX_ONE(1)
+        }
+        for (;i < rows;++i)
+        {
+            ResScalar d0(0);
+            Index j = j2;
+            do {
+                d0 += cj.pmul(lhs(i, j), rhs2(j, 0));
+            } while (++j < jend);
+            res[i] += alpha * d0;
+        }
+    }
+}
+
+template <typename Scalar, int N> struct ScalarBlock {
+    Scalar scalar[N];
+};
+
+#ifdef USE_GEMV_MMA
+static Packet16uc p16uc_ELEMENT_3 = { 0x0c,0x0d,0x0e,0x0f, 0x1c,0x1d,0x1e,0x1f, 0x0c,0x0d,0x0e,0x0f, 0x1c,0x1d,0x1e,0x1f };
+
+/** \internal predux (add elements of a vector) from a MMA accumulator - real results */
+template<typename ResScalar, typename ResPacket>
+EIGEN_ALWAYS_INLINE ScalarBlock<ResScalar, 2> predux_real(__vector_quad* acc0, __vector_quad* acc1)
+{
+    PacketBlock<ResPacket, 4> result0, result1;
+    __builtin_mma_disassemble_acc(&result0.packet, acc0);
+    __builtin_mma_disassemble_acc(&result1.packet, acc1);
+    result0.packet[0] = vec_mergeh(result0.packet[0], result1.packet[0]);
+    result0.packet[1] = vec_mergeo(result0.packet[1], result1.packet[1]);
+    result0.packet[2] = vec_mergel(result0.packet[2], result1.packet[2]);
+    result0.packet[3] = vec_perm(result0.packet[3], result1.packet[3], p16uc_ELEMENT_3);
+    result0.packet[0] = vec_add(vec_add(result0.packet[0], result0.packet[2]), vec_add(result0.packet[1], result0.packet[3]));
+    return *reinterpret_cast<ScalarBlock<ResScalar, 2> *>(&result0.packet[0]);
+}
+
+template<>
+EIGEN_ALWAYS_INLINE ScalarBlock<double, 2> predux_real<double, Packet2d>(__vector_quad* acc0, __vector_quad* acc1)
+{
+    PacketBlock<Packet2d, 4> result0, result1;
+    __builtin_mma_disassemble_acc(&result0.packet, acc0);
+    __builtin_mma_disassemble_acc(&result1.packet, acc1);
+    result0.packet[0] = vec_add(vec_mergeh(result0.packet[0], result1.packet[0]), vec_mergel(result0.packet[1], result1.packet[1]));
+    return *reinterpret_cast<ScalarBlock<double, 2> *>(&result0.packet[0]);
+}
+
+/** \internal add complex results together */
+template<typename LhsPacket, typename RhsPacket, bool ConjugateLhs, bool ConjugateRhs>
+EIGEN_ALWAYS_INLINE ScalarBlock<std::complex<float>, 2> addComplexResults(PacketBlock<Packet4f, 4>& result0, PacketBlock<Packet4f, 4>& result1)
+{
+    ScalarBlock<std::complex<float>, 2> cc0;
+    result0.packet[0] = reinterpret_cast<Packet4f>(vec_mergeh(reinterpret_cast<Packet2d>(result0.packet[0]), reinterpret_cast<Packet2d>(result1.packet[0])));
+    result0.packet[2] = reinterpret_cast<Packet4f>(vec_mergel(reinterpret_cast<Packet2d>(result0.packet[2]), reinterpret_cast<Packet2d>(result1.packet[2])));
+    result0.packet[0] = vec_add(result0.packet[0], result0.packet[2]);
+    if (GEMV_IS_COMPLEX_COMPLEX) {
+        result0.packet[1] = reinterpret_cast<Packet4f>(vec_mergeh(reinterpret_cast<Packet2d>(result0.packet[1]), reinterpret_cast<Packet2d>(result1.packet[1])));
+        result0.packet[3] = reinterpret_cast<Packet4f>(vec_mergel(reinterpret_cast<Packet2d>(result0.packet[3]), reinterpret_cast<Packet2d>(result1.packet[3])));
+        result0.packet[1] = vec_add(result0.packet[1], result0.packet[3]);
+        if (ConjugateLhs) {
+            result0.packet[0] = pconj2(convertComplex(result0.packet[0])).v;
+            result0.packet[1] = pcplxflip2(convertComplex(result0.packet[1])).v;
+        } else if (ConjugateRhs) {
+            result0.packet[1] = pcplxconjflip(convertComplex(result0.packet[1])).v;
+        } else {
+            result0.packet[1] = pcplxflipconj(convertComplex(result0.packet[1])).v;
+        }
+        result0.packet[0] = vec_add(result0.packet[0], result0.packet[1]);
+    } else {
+        if (ConjugateLhs && (sizeof(LhsPacket) == sizeof(std::complex<float>))) {
+            result0.packet[0] = pconj2(convertComplex(result0.packet[0])).v;
+        }
+    }
+    cc0.scalar[0].real(result0.packet[0][0]);
+    cc0.scalar[0].imag(result0.packet[0][1]);
+    cc0.scalar[1].real(result0.packet[0][2]);
+    cc0.scalar[1].imag(result0.packet[0][3]);
+    return cc0;
+}
+
+template<typename LhsPacket, typename RhsPacket, bool ConjugateLhs, bool ConjugateRhs>
+EIGEN_ALWAYS_INLINE ScalarBlock<std::complex<double>, 2> addComplexResults(PacketBlock<Packet2d, 4>&, PacketBlock<Packet2d, 4>&)
+{
+    ScalarBlock<std::complex<double>, 2> cc0;
+    EIGEN_UNUSED_VARIABLE(cc0);
+    return cc0;  // Just for compilation
+}
+
+/** \internal predux (add elements of a vector) from a MMA accumulator - complex results */
+template<typename ResScalar, typename ResPacket, typename LhsPacket, typename RhsPacket, bool ConjugateLhs, bool ConjugateRhs>
+EIGEN_ALWAYS_INLINE ScalarBlock<ResScalar, 2> predux_complex(__vector_quad* acc0, __vector_quad* acc1)
+{
+    PacketBlock<ResPacket, 4> result0, result1;
+    __builtin_mma_disassemble_acc(&result0.packet, acc0);
+    __builtin_mma_disassemble_acc(&result1.packet, acc1);
+    return addComplexResults<LhsPacket, RhsPacket, ConjugateLhs, ConjugateRhs>(result0, result1);
+}
+
+template<typename ResScalar, typename ResPacket>
+EIGEN_ALWAYS_INLINE ScalarBlock<ResScalar, 2> predux_real(__vector_quad* acc0)
+{
+    PacketBlock<ResPacket, 4> result0;
+    __builtin_mma_disassemble_acc(&result0.packet, acc0);
+    result0.packet[0] = vec_add(vec_mergeh(result0.packet[0], result0.packet[2]), vec_mergel(result0.packet[1], result0.packet[3]));
+    return *reinterpret_cast<ScalarBlock<ResScalar, 2> *>(&result0.packet[0]);
+}
+
+template<typename ResScalar, typename ResPacket, typename LhsPacket, typename RhsPacket, bool ConjugateLhs, bool ConjugateRhs>
+EIGEN_ALWAYS_INLINE ScalarBlock<ResScalar, 2> predux_complex(__vector_quad* acc0)
+{
+    ScalarBlock<ResScalar, 2> cc0;
+    PacketBlock<ResPacket, 4> result0;
+    __builtin_mma_disassemble_acc(&result0.packet, acc0);
+    if (GEMV_IS_COMPLEX_COMPLEX) {
+        if (ConjugateLhs) {
+            result0.packet[1] = pconjinv(convertComplex(result0.packet[1])).v;
+            result0.packet[3] = pconjinv(convertComplex(result0.packet[3])).v;
+        } else if (ConjugateRhs) {
+            result0.packet[0] = pconj2(convertComplex(result0.packet[0])).v;
+            result0.packet[2] = pconj2(convertComplex(result0.packet[2])).v;
+        } else {
+            result0.packet[1] = pconj2(convertComplex(result0.packet[1])).v;
+            result0.packet[3] = pconj2(convertComplex(result0.packet[3])).v;
+        }
+        result0.packet[0] = vec_add(result0.packet[0], __builtin_vsx_xxpermdi(result0.packet[1], result0.packet[1], 2));
+        result0.packet[2] = vec_add(result0.packet[2], __builtin_vsx_xxpermdi(result0.packet[3], result0.packet[3], 2));
+    } else {
+        result0.packet[0] = __builtin_vsx_xxpermdi(result0.packet[0], result0.packet[1], 1);
+        result0.packet[2] = __builtin_vsx_xxpermdi(result0.packet[2], result0.packet[3], 1);
+    }
+    cc0.scalar[0].real(result0.packet[0][0]);
+    cc0.scalar[0].imag(result0.packet[0][1]);
+    cc0.scalar[1].real(result0.packet[2][0]);
+    cc0.scalar[1].imag(result0.packet[2][1]);
+    return cc0;
+}
+#endif
+
+template<typename ResScalar, typename ResPacket>
+EIGEN_ALWAYS_INLINE ScalarBlock<ResScalar, 2> predux_real(ResPacket& a, ResPacket& b)
+{
+    ScalarBlock<ResScalar, 2> cc0;
+    cc0.scalar[0] = predux(a);
+    cc0.scalar[1] = predux(b);
+    return cc0;
+}
+
+template<typename ResScalar, typename ResPacket>
+EIGEN_ALWAYS_INLINE ScalarBlock<ResScalar, 2> predux_complex(ResPacket& a, ResPacket& b)
+{
+    return predux_real<ResScalar, ResPacket>(a, b);
+}
+
+#define GEMV_UNROLL_ROW(func, N) \
+  func(0, N) func(1, N) func(2, N) func(3, N) func(4, N) func(5, N) func(6, N) func(7, N)
+
+#define GEMV_UNROLL_ROW_HALF(func, N) \
+  func(0, 0, 1, N) func(1, 2, 3, N) func(2, 4, 5, N) func(3, 6, 7, N)
+
+#define GEMV_LOADPACKET_ROW(iter) \
+  lhs.template load<LhsPacket, Unaligned>(i + (iter), j)
+
+#ifdef USE_GEMV_MMA
+#define GEMV_UNROLL3_ROW(func, N, which) \
+  func(0, N, which) func(1, N, which) func(2, N, which) func(3, N, which) \
+  func(4, N, which) func(5, N, which) func(6, N, which) func(7, N, which)
+
+#define GEMV_UNUSED_ROW(N, which) \
+  GEMV_UNROLL3_ROW(GEMV_UNUSED_VAR, N, which)
+
+#define GEMV_INIT_ROW(iter, N) \
+  if (GEMV_GETN(N) > iter) { \
+    __builtin_mma_xxsetaccz(&c##iter); \
+  }
+
+#define GEMV_LOADPAIR_ROW(iter1, iter2) \
+  GEMV_BUILDPAIR_MMA(b##iter1, GEMV_LOADPACKET_ROW(iter2), GEMV_LOADPACKET_ROW((iter2) + 1));
+
+#define GEMV_WORK_ROW(iter, N) \
+  if (GEMV_GETN(N) > iter) { \
+    if (GEMV_IS_FLOAT) { \
+      pger_vecMMA_acc<LhsPacket, RhsPacket, true>(&c##iter, a0, GEMV_LOADPACKET_ROW(iter)); \
+    } else { \
+      __vector_pair b##iter; \
+      GEMV_LOADPAIR_ROW(iter, iter << 1) \
+      pger_vecMMA_acc<LhsPacket, RhsPacket, true>(&c##iter, b##iter, a0); \
+    } \
+  }
+
+#define GEMV_PREDUX2(iter1, iter2, iter3, N) \
+  if (N > iter1) { \
+    if (GEMV_IS_FLOAT) { \
+      cc##iter1 = predux_real<ResScalar, ResPacket>(&c##iter2, &c##iter3); \
+    } else { \
+      cc##iter1 = predux_real<ResScalar, ResPacket>(&c##iter1); \
+    } \
+  } else { \
+    EIGEN_UNUSED_VARIABLE(cc##iter1); \
+  }
+#else
+#define GEMV_INIT_ROW(iter, N) \
+  if (N > iter) { \
+    c##iter = pset1<ResPacket>(ResScalar(0)); \
+  } else { \
+    EIGEN_UNUSED_VARIABLE(c##iter); \
+  }
+
+#define GEMV_WORK_ROW(iter, N) \
+  if (N > iter) { \
+    c##iter = pcj.pmadd(GEMV_LOADPACKET_ROW(iter), a0, c##iter); \
+  }
+
+#define GEMV_PREDUX2(iter1, iter2, iter3, N) \
+  if (N > iter1) { \
+    cc##iter1 = predux_real<ResScalar, ResPacket>(c##iter2, c##iter3); \
+  } else { \
+    EIGEN_UNUSED_VARIABLE(cc##iter1); \
+  }
+#endif
+
+#define GEMV_MULT(iter1, iter2, iter3, N) \
+  if (N > iter1) { \
+    cc##iter1.scalar[0] += cj.pmul(lhs(i + iter2, j), a0); \
+    cc##iter1.scalar[1] += cj.pmul(lhs(i + iter3, j), a0); \
+  }
+
+#define GEMV_STORE_ROW(iter1, iter2, iter3, N) \
+  if (N > iter1) { \
+    storeMaddData<ResScalar>(res + ((i + iter2) * resIncr), alpha, cc##iter1.scalar[0]); \
+    storeMaddData<ResScalar>(res + ((i + iter3) * resIncr), alpha, cc##iter1.scalar[1]); \
+  }
+
+/** \internal main macro for gemv_row - initialize accumulators, multiply and add inputs, predux and store results */
+#define GEMV_PROCESS_ROW(N) \
+  for (; i < n##N; i += N) { \
+    GEMV_UNROLL_ROW(GEMV_INIT_ROW, N) \
+    Index j = 0; \
+    for (; j + LhsPacketSize <= cols; j += LhsPacketSize) { \
+      RhsPacket a0 = rhs2.template load<RhsPacket, Unaligned>(j); \
+      GEMV_UNROLL_ROW(GEMV_WORK_ROW, N) \
+    } \
+    GEMV_UNROLL_ROW_HALF(GEMV_PREDUX2, (N >> 1)) \
+    for (; j < cols; ++j) { \
+      RhsScalar a0 = rhs2(j); \
+      GEMV_UNROLL_ROW_HALF(GEMV_MULT, (N >> 1)) \
+    } \
+    GEMV_UNROLL_ROW_HALF(GEMV_STORE_ROW, (N >> 1)) \
+  }
+
+template<typename LhsScalar, typename LhsMapper, typename RhsScalar, typename RhsMapper, typename ResScalar>
+EIGEN_STRONG_INLINE void gemv_row(
+    Index rows, Index cols,
+    const LhsMapper& alhs,
+    const RhsMapper& rhs,
+    ResScalar* res, Index resIncr,
+    ResScalar alpha)
+{
+    typedef gemv_traits<LhsScalar, RhsScalar> Traits;
+
+    typedef typename Traits::LhsPacket LhsPacket;
+    typedef typename Traits::RhsPacket RhsPacket;
+    typedef typename Traits::ResPacket ResPacket;
+
+    // The following copy tells the compiler that lhs's attributes are not modified outside this function
+    // This helps GCC to generate proper code.
+    LhsMapper lhs(alhs);
+    typename RhsMapper::LinearMapper rhs2 = rhs.getLinearMapper(0, 0);
+
+    eigen_internal_assert(rhs.stride() == 1);
+    conj_helper<LhsScalar, RhsScalar, false, false> cj;
+    conj_helper<LhsPacket, RhsPacket, false, false> pcj;
+
+    // TODO: fine tune the following heuristic. The rationale is that if the matrix is very large,
+    //       processing 8 rows at once might be counter productive wrt cache.
+#ifndef GCC_ONE_VECTORPAIR_BUG
+    const Index n8 = lhs.stride() * sizeof(LhsScalar) > 32000 ? (rows - 7) : (rows - 7);
+    const Index n4 = rows - 3;
+    const Index n2 = rows - 1;
+#endif
+
+    // TODO: for padded aligned inputs, we could enable aligned reads
+    enum {
+        LhsAlignment = Unaligned,
+        ResPacketSize = Traits::ResPacketSize,
+        LhsPacketSize = Traits::LhsPacketSize,
+        RhsPacketSize = Traits::RhsPacketSize,
+    };
+
+    Index i = 0;
+#ifdef USE_GEMV_MMA
+    __vector_quad c0, c1, c2, c3, c4, c5, c6, c7;
+    GEMV_UNUSED_ROW(8, c)
+#else
+    ResPacket c0, c1, c2, c3, c4, c5, c6, c7;
+#endif
+#ifndef GCC_ONE_VECTORPAIR_BUG
+    ScalarBlock<ResScalar, 2> cc0, cc1, cc2, cc3;
+    GEMV_PROCESS_ROW(8)
+    GEMV_PROCESS_ROW(4)
+    GEMV_PROCESS_ROW(2)
+#endif
+    for (; i < rows; ++i)
+    {
+        ResPacket d0 = pset1<ResPacket>(ResScalar(0));
+        Index j = 0;
+        for (; j + LhsPacketSize <= cols; j += LhsPacketSize)
+        {
+            RhsPacket b0 = rhs2.template load<RhsPacket, Unaligned>(j);
+
+            d0 = pcj.pmadd(lhs.template load<LhsPacket, LhsAlignment>(i + 0, j), b0, d0);
+        }
+        ResScalar dd0 = predux(d0);
+        for (; j < cols; ++j)
+        {
+            dd0 += cj.pmul(lhs(i, j), rhs2(j));
+        }
+        res[i * resIncr] += alpha * dd0;
+    }
+}
+
+#define EIGEN_POWER_GEMV_REAL_SPECIALIZE_COL(Scalar) \
+template<typename Index, typename LhsMapper, bool ConjugateLhs, typename RhsMapper, bool ConjugateRhs, int Version> \
+struct general_matrix_vector_product<Index, Scalar, LhsMapper, ColMajor, ConjugateLhs, Scalar, RhsMapper, ConjugateRhs, Version> \
+{ \
+    typedef typename ScalarBinaryOpTraits<Scalar, Scalar>::ReturnType ResScalar; \
+\
+    EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE static void run( \
+        Index rows, Index cols, \
+        const LhsMapper& lhs, \
+        const RhsMapper& rhs, \
+        ResScalar* res, Index resIncr, \
+        ResScalar alpha) { \
+        gemv_col<Scalar, LhsMapper, Scalar, RhsMapper, ResScalar>(rows, cols, lhs, rhs, res, resIncr, alpha); \
+    } \
+};
+
+#define EIGEN_POWER_GEMV_REAL_SPECIALIZE_ROW(Scalar) \
+template<typename Index, typename LhsMapper, bool ConjugateLhs, typename RhsMapper, bool ConjugateRhs, int Version> \
+struct general_matrix_vector_product<Index, Scalar, LhsMapper, RowMajor, ConjugateLhs, Scalar, RhsMapper, ConjugateRhs, Version> \
+{ \
+    typedef typename ScalarBinaryOpTraits<Scalar, Scalar>::ReturnType ResScalar; \
+\
+    EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE static void run( \
+        Index rows, Index cols, \
+        const LhsMapper& lhs, \
+        const RhsMapper& rhs, \
+        ResScalar* res, Index resIncr, \
+        ResScalar alpha) { \
+        gemv_row<Scalar, LhsMapper, Scalar, RhsMapper, ResScalar>(rows, cols, lhs, rhs, res, resIncr, alpha); \
+    } \
+};
+
+EIGEN_POWER_GEMV_REAL_SPECIALIZE_COL(float)
+EIGEN_POWER_GEMV_REAL_SPECIALIZE_COL(double)
+EIGEN_POWER_GEMV_REAL_SPECIALIZE_ROW(float)
+EIGEN_POWER_GEMV_REAL_SPECIALIZE_ROW(double)
+
+template<typename ResScalar, typename PResPacket, typename ResPacket, typename LhsPacket, typename RhsPacket>
+EIGEN_ALWAYS_INLINE ScalarBlock<ResScalar, 2> predux_complex(PResPacket& a0, PResPacket& b0, ResPacket& a1, ResPacket& b1)
+{
+    if (GEMV_IS_COMPLEX_COMPLEX) {
+        a0 = padd(a0, a1);
+        b0 = padd(b0, b1);
+    }
+    return predux_complex<ResScalar, PResPacket>(a0, b0);
+}
+
+#define GEMV_LOADPACKET_ROW_COMPLEX(iter) \
+  loadLhsPacket<Scalar, LhsScalar, LhsMapper, PLhsPacket>(lhs, i + (iter), j)
+
+#define GEMV_LOADPACKET_ROW_COMPLEX_DATA(iter) \
+  convertReal(GEMV_LOADPACKET_ROW_COMPLEX(iter))
+
+#define GEMV_PROCESS_ROW_COMPLEX_SINGLE_WORK(which, N) \
+  j = 0; \
+  for (; j + LhsPacketSize <= cols; j += LhsPacketSize) { \
+    const RhsScalar& b1 = rhs2(j); \
+    RhsScalar* b = const_cast<RhsScalar *>(&b1); \
+    GEMV_UNROLL_ROW(which, N) \
+  }
+
+#define GEMV_PROCESS_END_ROW_COMPLEX(N) \
+  for (; j < cols; ++j) { \
+    RhsScalar b0 = rhs2(j); \
+    GEMV_UNROLL_ROW_HALF(GEMV_MULT_COMPLEX, (N >> 1)) \
+  } \
+  GEMV_UNROLL_ROW_HALF(GEMV_STORE_ROW_COMPLEX, (N >> 1))
+
+#ifdef USE_GEMV_MMA
+#define GEMV_INIT_ROW_COMPLEX_MMA(iter, N) \
+  if (GEMV_GETN_COMPLEX(N) > iter) { \
+    __builtin_mma_xxsetaccz(&e0##iter); \
+  }
+
+#define GEMV_LOADPAIR_ROW_COMPLEX_MMA(iter1, iter2) \
+  GEMV_BUILDPAIR_MMA(a##iter1, GEMV_LOADPACKET_ROW_COMPLEX_DATA(iter2), GEMV_LOADPACKET_ROW_COMPLEX_DATA((iter2) + 1));
+
+#define GEMV_WORK_ROW_COMPLEX_MMA(iter, N) \
+  if (GEMV_GETN_COMPLEX(N) > iter) { \
+    if (GEMV_IS_COMPLEX_FLOAT) { \
+      PLhsPacket a##iter = GEMV_LOADPACKET_ROW_COMPLEX(iter); \
+      gemv_mult_complex_MMA<ScalarPacket, LhsScalar, PLhsPacket, PLhsPacket, RhsScalar, RhsPacket, ResPacket, ConjugateLhs, ConjugateRhs, RowMajor>(a##iter, b, &e0##iter); \
+    } else { \
+      __vector_pair a##iter; \
+      GEMV_LOADPAIR_ROW_COMPLEX_MMA(iter, iter << 1) \
+      gemv_mult_complex_MMA<ScalarPacket, LhsScalar, PLhsPacket, __vector_pair, RhsScalar, RhsPacket, ResPacket, ConjugateLhs, ConjugateRhs, RowMajor>(a##iter, b, &e0##iter); \
+    } \
+  }
+
+#define GEMV_PREDUX4_COMPLEX_MMA(iter1, iter2, iter3, N) \
+  if (N > iter1) { \
+    if (GEMV_IS_COMPLEX_FLOAT) { \
+      cc##iter1 = predux_complex<ResScalar, ScalarPacket, LhsPacket, RhsPacket, ConjugateLhs, ConjugateRhs>(&e0##iter2, &e0##iter3); \
+    } else { \
+      cc##iter1 = predux_complex<ResScalar, ScalarPacket, LhsPacket, RhsPacket, ConjugateLhs, ConjugateRhs>(&e0##iter1); \
+    } \
+  } else { \
+    EIGEN_UNUSED_VARIABLE(cc##iter1); \
+  }
+
+#define GEMV_PROCESS_ROW_COMPLEX_SINGLE_MMA(N) \
+  GEMV_UNROLL_ROW(GEMV_INIT_ROW_COMPLEX_MMA, N) \
+  GEMV_PROCESS_ROW_COMPLEX_SINGLE_WORK(GEMV_WORK_ROW_COMPLEX_MMA, N)
+
+#define GEMV_PROCESS_ROW_COMPLEX_ONE_MMA(N) \
+  for (; i < n##N; i += N) { \
+    GEMV_PROCESS_ROW_COMPLEX_SINGLE_MMA(N) \
+    GEMV_UNROLL_ROW_HALF(GEMV_PREDUX4_COMPLEX_MMA, (N >> 1)) \
+    GEMV_PROCESS_END_ROW_COMPLEX(N); \
+  }
+#endif
+
+#define GEMV_WORK_ROW_COMPLEX(iter, N) \
+  if (N > iter) { \
+    PLhsPacket a##iter = GEMV_LOADPACKET_ROW_COMPLEX(iter); \
+    gemv_mult_complex<ScalarPacket, PLhsPacket, RhsScalar, RhsPacket, PResPacket, ResPacket, ConjugateLhs, ConjugateRhs, RowMajor>(a##iter, b, c0##iter, c1##iter); \
+  }
+
+#define GEMV_PREDUX4_COMPLEX(iter1, iter2, iter3, N) \
+  if (N > iter1) { \
+    cc##iter1 = predux_complex<ResScalar, PResPacket, ResPacket, LhsPacket, RhsPacket>(c0##iter2, c0##iter3, c1##iter2, c1##iter3); \
+  } else { \
+    EIGEN_UNUSED_VARIABLE(cc##iter1); \
+  }
+
+#define GEMV_MULT_COMPLEX(iter1, iter2, iter3, N) \
+  if (N > iter1) { \
+    cc##iter1.scalar[0] += cj.pmul(lhs(i + iter2, j), b0); \
+    cc##iter1.scalar[1] += cj.pmul(lhs(i + iter3, j), b0); \
+  }
+
+#define GEMV_STORE_ROW_COMPLEX(iter1, iter2, iter3, N) \
+  if (N > iter1) { \
+    storeMaddData<ResScalar>(res + ((i + iter2) * resIncr), alpha, cc##iter1.scalar[0]); \
+    storeMaddData<ResScalar>(res + ((i + iter3) * resIncr), alpha, cc##iter1.scalar[1]); \
+  }
+
+#define GEMV_PROCESS_ROW_COMPLEX_SINGLE_NEW(N) \
+  GEMV_UNROLL_ROW(GEMV_INIT_COMPLEX, N) \
+  GEMV_PROCESS_ROW_COMPLEX_SINGLE_WORK(GEMV_WORK_ROW_COMPLEX, N)
+
+/** \internal main macro for gemv_complex_row - initialize accumulators, multiply and add inputs, predux and store results */
+#define GEMV_PROCESS_ROW_COMPLEX_ONE_NEW(N) \
+  for (; i < n##N; i += N) { \
+    GEMV_PROCESS_ROW_COMPLEX_SINGLE_NEW(N) \
+    GEMV_UNROLL_ROW_HALF(GEMV_PREDUX4_COMPLEX, (N >> 1)) \
+    GEMV_PROCESS_END_ROW_COMPLEX(N); \
+  }
+
+#define GEMV_PROCESS_ROW_COMPLEX_PREDUX_NEW(iter) \
+  if (GEMV_IS_COMPLEX_COMPLEX) { \
+    c0##iter = padd(c0##iter, c1##iter); \
+  } \
+  dd0 = predux(c0##iter);
+
+#if EIGEN_COMP_LLVM
+#define GEMV_PROCESS_ROW_COMPLEX_SINGLE(N) \
+  GEMV_PROCESS_ROW_COMPLEX_SINGLE_NEW(N)
+
+#define GEMV_PROCESS_ROW_COMPLEX_ONE(N) \
+  GEMV_PROCESS_ROW_COMPLEX_ONE_NEW(N)
+
+#define GEMV_PROCESS_ROW_COMPLEX_PREDUX(iter) \
+  GEMV_PROCESS_ROW_COMPLEX_PREDUX_NEW(iter)
+#else
+// gcc seems to be reading and writing registers unnecessarily to memory.
+// Use the old way for complex double until it is fixed.
+
+#define GEMV_LOADPACKET_ROW_COMPLEX_OLD(iter) \
+  lhs.template load<LhsPacket, LhsAlignment>(i + (iter), j)
+
+#define GEMV_INIT_COMPLEX_OLD(iter, N) \
+  EIGEN_UNUSED_VARIABLE(c0##iter); \
+  if (N > iter) { \
+    c1##iter = pset_zero<ResPacket>(); \
+  } else { \
+    EIGEN_UNUSED_VARIABLE(c1##iter); \
+  }
+
+#define GEMV_WORK_ROW_COMPLEX_OLD(iter, N) \
+  if (N > iter) { \
+    LhsPacket a##iter = GEMV_LOADPACKET_ROW_COMPLEX_OLD(iter); \
+    c1##iter = pcj.pmadd(a##iter, b0, c1##iter); \
+  }
+
+#define GEMV_PREDUX4_COMPLEX_OLD(iter1, iter2, iter3, N) \
+  if (N > iter1) { \
+    cc##iter1.scalar[0] = predux(c1##iter2); \
+    cc##iter1.scalar[1] = predux(c1##iter3); \
+  } else { \
+    EIGEN_UNUSED_VARIABLE(cc##iter1); \
+  }
+
+#define GEMV_PROCESS_ROW_COMPLEX_SINGLE_OLD(N) \
+  GEMV_UNROLL_ROW(GEMV_INIT_COMPLEX_OLD, N) \
+  j = 0; \
+  for (; j + LhsPacketSize <= cols; j += LhsPacketSize) { \
+    RhsPacket b0 = rhs2.template load<RhsPacket, Unaligned>(j); \
+    GEMV_UNROLL_ROW(GEMV_WORK_ROW_COMPLEX_OLD, N) \
+  }
+
+#define GEMV_PROCESS_ROW_COMPLEX_ONE_OLD(N) \
+  for (; i < n##N; i += N) { \
+    GEMV_PROCESS_ROW_COMPLEX_SINGLE_OLD(N) \
+    GEMV_UNROLL_ROW_HALF(GEMV_PREDUX4_COMPLEX_OLD, (N >> 1)) \
+    GEMV_PROCESS_END_ROW_COMPLEX(N) \
+  }
+
+#define GEMV_PROCESS_ROW_COMPLEX_PREDUX_OLD(iter) \
+  dd0 = predux(c1##iter);
+
+#if (__GNUC__ > 10)
+#define GEMV_PROCESS_ROW_COMPLEX_IS_NEW  1
+#else
+#define GEMV_PROCESS_ROW_COMPLEX_IS_NEW  \
+  (sizeof(Scalar) == sizeof(float)) || GEMV_IS_COMPLEX_COMPLEX
+#endif
+
+#define GEMV_PROCESS_ROW_COMPLEX_SINGLE(N) \
+  if (GEMV_PROCESS_ROW_COMPLEX_IS_NEW) { \
+    GEMV_PROCESS_ROW_COMPLEX_SINGLE_NEW(N) \
+  } else { \
+    GEMV_PROCESS_ROW_COMPLEX_SINGLE_OLD(N) \
+  }
+
+#define GEMV_PROCESS_ROW_COMPLEX_ONE(N) \
+  if (GEMV_PROCESS_ROW_COMPLEX_IS_NEW) { \
+    GEMV_PROCESS_ROW_COMPLEX_ONE_NEW(N) \
+  } else { \
+    GEMV_PROCESS_ROW_COMPLEX_ONE_OLD(N) \
+  }
+
+#define GEMV_PROCESS_ROW_COMPLEX_PREDUX(iter) \
+  if (GEMV_PROCESS_ROW_COMPLEX_IS_NEW) { \
+    GEMV_PROCESS_ROW_COMPLEX_PREDUX_NEW(iter) \
+  } else { \
+    GEMV_PROCESS_ROW_COMPLEX_PREDUX_OLD(iter) \
+  }
+#endif
+
+#ifdef USE_GEMV_MMA
+#define GEMV_PROCESS_ROW_COMPLEX(N) \
+  GEMV_PROCESS_ROW_COMPLEX_ONE_MMA(N)
+#else
+#define GEMV_PROCESS_ROW_COMPLEX(N) \
+  GEMV_PROCESS_ROW_COMPLEX_ONE(N)
+#endif
+
+template<typename Scalar, typename LhsScalar, typename LhsMapper, bool ConjugateLhs, bool LhsIsReal, typename RhsScalar, typename RhsMapper, bool ConjugateRhs, bool RhsIsReal, typename ResScalar>
+EIGEN_STRONG_INLINE void gemv_complex_row(
+    Index rows, Index cols,
+    const LhsMapper& alhs,
+    const RhsMapper& rhs,
+    ResScalar* res, Index resIncr,
+    ResScalar alpha)
+{
+    typedef gemv_traits<LhsScalar, RhsScalar> Traits;
+
+    typedef typename Traits::LhsPacket LhsPacket;
+    typedef typename Traits::RhsPacket RhsPacket;
+    typedef typename Traits::ResPacket ResPacket;
+
+    typedef typename packet_traits<Scalar>::type ScalarPacket;
+    typedef typename packet_traits<LhsScalar>::type PLhsPacket;
+    typedef typename packet_traits<ResScalar>::type PResPacket;
+    typedef gemv_traits<ResPacket, ResPacket> PTraits;
+
+    // The following copy tells the compiler that lhs's attributes are not modified outside this function
+    // This helps GCC to generate proper code.
+    LhsMapper lhs(alhs);
+    typename RhsMapper::LinearMapper rhs2 = rhs.getLinearMapper(0, 0);
+
+    eigen_internal_assert(rhs.stride() == 1);
+    conj_helper<LhsScalar, RhsScalar, ConjugateLhs, ConjugateRhs> cj;
+#if !EIGEN_COMP_LLVM
+    conj_helper<LhsPacket, RhsPacket, ConjugateLhs, ConjugateRhs> pcj;
+#endif
+
+    // TODO: fine tune the following heuristic. The rationale is that if the matrix is very large,
+    //       processing 8 rows at once might be counter productive wrt cache.
+#ifndef GCC_ONE_VECTORPAIR_BUG
+    const Index n8 = lhs.stride() * sizeof(LhsScalar) > 32000 ? (rows - 7) : (rows - 7);
+    const Index n4 = rows - 3;
+    const Index n2 = rows - 1;
+#endif
+
+    // TODO: for padded aligned inputs, we could enable aligned reads
+    enum {
+        LhsAlignment = Unaligned,
+        ResPacketSize = PTraits::ResPacketSize,
+        LhsPacketSize = PTraits::LhsPacketSize,
+        RhsPacketSize = PTraits::RhsPacketSize,
+    };
+
+    Index i = 0, j;
+    PResPacket c00, c01, c02, c03, c04, c05, c06, c07;
+    ResPacket c10, c11, c12, c13, c14, c15, c16, c17;
+#ifdef USE_GEMV_MMA
+    __vector_quad e00, e01, e02, e03, e04, e05, e06, e07;
+    GEMV_UNUSED_ROW(8, e0)
+    GEMV_UNUSED_EXTRA(1, c0)
+    GEMV_UNUSED_EXTRA(1, c1)
+#endif
+    ResScalar dd0;
+#ifndef GCC_ONE_VECTORPAIR_BUG
+    ScalarBlock<ResScalar, 2> cc0, cc1, cc2, cc3;
+#ifdef USE_GEMV_MMA
+    if (!GEMV_IS_COMPLEX_COMPLEX)
+#endif
+    {
+        GEMV_PROCESS_ROW_COMPLEX(8)
+    }
+    GEMV_PROCESS_ROW_COMPLEX(4)
+    GEMV_PROCESS_ROW_COMPLEX(2)
+#endif
+    for (; i < rows; ++i)
+    {
+        GEMV_PROCESS_ROW_COMPLEX_SINGLE(1)
+        GEMV_PROCESS_ROW_COMPLEX_PREDUX(0)
+        for (; j < cols; ++j)
+        {
+            dd0 += cj.pmul(lhs(i, j), rhs2(j));
+        }
+        res[i * resIncr] += alpha * dd0;
+    }
+}
+
+#define EIGEN_POWER_GEMV_COMPLEX_SPECIALIZE_COL(Scalar, LhsScalar, RhsScalar) \
+template<typename Index, typename LhsMapper, bool ConjugateLhs, typename RhsMapper, bool ConjugateRhs, int Version> \
+struct general_matrix_vector_product<Index, LhsScalar, LhsMapper, ColMajor, ConjugateLhs, RhsScalar, RhsMapper, ConjugateRhs, Version> \
+{ \
+    typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar; \
+\
+    EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE static void run( \
+        Index rows, Index cols, \
+        const LhsMapper& lhs, \
+        const RhsMapper& rhs, \
+        ResScalar* res, Index resIncr, \
+        ResScalar alpha) { \
+        gemv_complex_col<Scalar, LhsScalar, LhsMapper, ConjugateLhs, sizeof(Scalar) == sizeof(LhsScalar), RhsScalar, RhsMapper, ConjugateRhs, sizeof(Scalar) == sizeof(RhsScalar), ResScalar>(rows, cols, lhs, rhs, res, resIncr, alpha); \
+    } \
+};
+
+#define EIGEN_POWER_GEMV_COMPLEX_SPECIALIZE_ROW(Scalar, LhsScalar, RhsScalar) \
+template<typename Index, typename LhsMapper, bool ConjugateLhs, typename RhsMapper, bool ConjugateRhs, int Version> \
+struct general_matrix_vector_product<Index, LhsScalar, LhsMapper, RowMajor, ConjugateLhs, RhsScalar, RhsMapper, ConjugateRhs, Version> \
+{ \
+    typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar; \
+\
+    EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE static void run( \
+        Index rows, Index cols, \
+        const LhsMapper& lhs, \
+        const RhsMapper& rhs, \
+        ResScalar* res, Index resIncr, \
+        ResScalar alpha) { \
+        gemv_complex_row<Scalar, LhsScalar, LhsMapper, ConjugateLhs, sizeof(Scalar) == sizeof(LhsScalar), RhsScalar, RhsMapper, ConjugateRhs, sizeof(Scalar) == sizeof(RhsScalar), ResScalar>(rows, cols, lhs, rhs, res, resIncr, alpha); \
+    } \
+};
+
+EIGEN_POWER_GEMV_COMPLEX_SPECIALIZE_COL(float,  float,                std::complex<float>)
+EIGEN_POWER_GEMV_COMPLEX_SPECIALIZE_COL(float,  std::complex<float>,  float)
+EIGEN_POWER_GEMV_COMPLEX_SPECIALIZE_COL(float,  std::complex<float>,  std::complex<float>)
+EIGEN_POWER_GEMV_COMPLEX_SPECIALIZE_COL(double, double,               std::complex<double>)
+EIGEN_POWER_GEMV_COMPLEX_SPECIALIZE_COL(double, std::complex<double>, double)
+EIGEN_POWER_GEMV_COMPLEX_SPECIALIZE_COL(double, std::complex<double>, std::complex<double>)
+EIGEN_POWER_GEMV_COMPLEX_SPECIALIZE_ROW(float,  float,                std::complex<float>)
+EIGEN_POWER_GEMV_COMPLEX_SPECIALIZE_ROW(float,  std::complex<float>,  float)
+EIGEN_POWER_GEMV_COMPLEX_SPECIALIZE_ROW(float,  std::complex<float>,  std::complex<float>)
+EIGEN_POWER_GEMV_COMPLEX_SPECIALIZE_ROW(double, double,               std::complex<double>)
+EIGEN_POWER_GEMV_COMPLEX_SPECIALIZE_ROW(double, std::complex<double>, double)
+EIGEN_POWER_GEMV_COMPLEX_SPECIALIZE_ROW(double, std::complex<double>, std::complex<double>)
+
+#endif // EIGEN_MATRIX_VECTOR_PRODUCT_ALTIVEC_H
+
diff --git a/libs/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h b/libs/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h
old mode 100755
new mode 100644
index 2a44054..b0f8529
--- a/libs/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h
+++ b/libs/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_PACKET_MATH_ALTIVEC_H
 #define EIGEN_PACKET_MATH_ALTIVEC_H
 
+#include "../../InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
@@ -39,34 +41,34 @@ typedef eigen_packet_wrapper<__vector unsigned short int,0> Packet8bf;
 
 // We don't want to write the same code all the time, but we need to reuse the constants
 // and it doesn't really work to declare them global, so we define macros instead
-#define _EIGEN_DECLARE_CONST_FAST_Packet4f(NAME,X) \
+#define EIGEN_DECLARE_CONST_FAST_Packet4f(NAME,X) \
   Packet4f p4f_##NAME = {X, X, X, X}
 
-#define _EIGEN_DECLARE_CONST_FAST_Packet4i(NAME,X) \
+#define EIGEN_DECLARE_CONST_FAST_Packet4i(NAME,X) \
   Packet4i p4i_##NAME = vec_splat_s32(X)
 
-#define _EIGEN_DECLARE_CONST_FAST_Packet4ui(NAME,X) \
+#define EIGEN_DECLARE_CONST_FAST_Packet4ui(NAME,X) \
   Packet4ui p4ui_##NAME = {X, X, X, X}
 
-#define _EIGEN_DECLARE_CONST_FAST_Packet8us(NAME,X) \
+#define EIGEN_DECLARE_CONST_FAST_Packet8us(NAME,X) \
   Packet8us p8us_##NAME = {X, X, X, X, X, X, X, X}
 
-#define _EIGEN_DECLARE_CONST_FAST_Packet16uc(NAME,X) \
+#define EIGEN_DECLARE_CONST_FAST_Packet16uc(NAME,X) \
   Packet16uc p16uc_##NAME = {X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X}
 
-#define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
+#define EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
   Packet4f p4f_##NAME = pset1<Packet4f>(X)
 
-#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
+#define EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
   Packet4i p4i_##NAME = pset1<Packet4i>(X)
 
-#define _EIGEN_DECLARE_CONST_Packet2d(NAME,X) \
+#define EIGEN_DECLARE_CONST_Packet2d(NAME,X) \
   Packet2d p2d_##NAME = pset1<Packet2d>(X)
 
-#define _EIGEN_DECLARE_CONST_Packet2l(NAME,X) \
+#define EIGEN_DECLARE_CONST_Packet2l(NAME,X) \
   Packet2l p2l_##NAME = pset1<Packet2l>(X)
 
-#define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
+#define EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
   const Packet4f p4f_##NAME = reinterpret_cast<Packet4f>(pset1<Packet4i>(X))
 
 #define DST_CHAN 1
@@ -74,15 +76,17 @@ typedef eigen_packet_wrapper<__vector unsigned short int,0> Packet8bf;
 #define __UNPACK_TYPE__(PACKETNAME) typename unpacket_traits<PACKETNAME>::type 
 
 // These constants are endian-agnostic
-static _EIGEN_DECLARE_CONST_FAST_Packet4f(ZERO, 0); //{ 0.0, 0.0, 0.0, 0.0}
-static _EIGEN_DECLARE_CONST_FAST_Packet4i(ZERO, 0); //{ 0, 0, 0, 0,}
-static _EIGEN_DECLARE_CONST_FAST_Packet4i(ONE,1); //{ 1, 1, 1, 1}
-static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS16,-16); //{ -16, -16, -16, -16}
-static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS1,-1); //{ -1, -1, -1, -1}
-static _EIGEN_DECLARE_CONST_FAST_Packet4ui(SIGN, 0x80000000u);
-static _EIGEN_DECLARE_CONST_FAST_Packet4ui(PREV0DOT5, 0x3EFFFFFFu);
-static _EIGEN_DECLARE_CONST_FAST_Packet8us(ONE,1); //{ 1, 1, 1, 1, 1, 1, 1, 1}
-static _EIGEN_DECLARE_CONST_FAST_Packet16uc(ONE,1);
+static EIGEN_DECLARE_CONST_FAST_Packet4f(ZERO, 0); //{ 0.0, 0.0, 0.0, 0.0}
+static EIGEN_DECLARE_CONST_FAST_Packet4i(ZERO, 0); //{ 0, 0, 0, 0,}
+static EIGEN_DECLARE_CONST_FAST_Packet4i(ONE,1); //{ 1, 1, 1, 1}
+static EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS16,-16); //{ -16, -16, -16, -16}
+static EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS1,-1); //{ -1, -1, -1, -1}
+static EIGEN_DECLARE_CONST_FAST_Packet4ui(SIGN, 0x80000000u);
+static EIGEN_DECLARE_CONST_FAST_Packet4ui(PREV0DOT5, 0x3EFFFFFFu);
+#ifndef __POWER8_VECTOR__
+static EIGEN_DECLARE_CONST_FAST_Packet8us(ONE,1); //{ 1, 1, 1, 1, 1, 1, 1, 1}
+static EIGEN_DECLARE_CONST_FAST_Packet16uc(ONE,1);
+#endif
 static Packet4f p4f_MZERO = (Packet4f) vec_sl((Packet4ui)p4i_MINUS1, (Packet4ui)p4i_MINUS1); //{ 0x80000000, 0x80000000, 0x80000000, 0x80000000}
 #ifndef __VSX__
 static Packet4f p4f_ONE = vec_ctf(p4i_ONE, 0); //{ 1.0, 1.0, 1.0, 1.0}
@@ -100,11 +104,13 @@ static Packet16uc p16uc_COUNTDOWN = { 0, 1, 2, 3, 4, 5, 6, 7,
 
 static Packet16uc p16uc_REVERSE32 = { 12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3 };
 static Packet16uc p16uc_REVERSE16 = { 14,15, 12,13, 10,11, 8,9, 6,7, 4,5, 2,3, 0,1 };
+#ifndef _ARCH_PWR9
 static Packet16uc p16uc_REVERSE8 = { 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 };
+#endif
 
+#ifdef _BIG_ENDIAN
 static Packet16uc p16uc_DUPLICATE32_HI = { 0,1,2,3, 0,1,2,3, 4,5,6,7, 4,5,6,7 };
-static Packet16uc p16uc_DUPLICATE16_HI = { 0,1,0,1, 2,3,2,3, 4,5,4,5, 6,7,6,7 };
-static Packet16uc p16uc_DUPLICATE8_HI = { 0,0, 1,1, 2,2, 3,3, 4,4, 5,5, 6,6, 7,7 };
+#endif
 static const Packet16uc p16uc_DUPLICATE16_EVEN= { 0,1 ,0,1, 4,5, 4,5, 8,9, 8,9, 12,13, 12,13 };
 static const Packet16uc p16uc_DUPLICATE16_ODD = { 2,3 ,2,3, 6,7, 6,7, 10,11, 10,11, 14,15, 14,15 };
 
@@ -114,15 +120,11 @@ static Packet16uc p16uc_QUADRUPLICATE16_HI = { 0,1,0,1,0,1,0,1, 2,3,2,3,2,3,2,3
 // Define global static constants:
 #ifdef _BIG_ENDIAN
 static Packet16uc p16uc_FORWARD = vec_lvsl(0, (float*)0);
-#ifdef __VSX__
-static Packet16uc p16uc_REVERSE64 = { 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 };
-#endif
 static Packet16uc p16uc_PSET32_WODD   = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 2), 8);//{ 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 };
 static Packet16uc p16uc_PSET32_WEVEN  = vec_sld(p16uc_DUPLICATE32_HI, (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);//{ 4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15 };
 static Packet16uc p16uc_HALF64_0_16 = vec_sld((Packet16uc)p4i_ZERO, vec_splat((Packet16uc) vec_abs(p4i_MINUS16), 3), 8);      //{ 0,0,0,0, 0,0,0,0, 16,16,16,16, 16,16,16,16};
 #else
 static Packet16uc p16uc_FORWARD = p16uc_REVERSE32;
-static Packet16uc p16uc_REVERSE64 = { 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 };
 static Packet16uc p16uc_PSET32_WODD = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 1), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);//{ 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 };
 static Packet16uc p16uc_PSET32_WEVEN = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 2), 8);//{ 4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15 };
 static Packet16uc p16uc_HALF64_0_16 = vec_sld(vec_splat((Packet16uc) vec_abs(p4i_MINUS16), 0), (Packet16uc)p4i_ZERO, 8);      //{ 0,0,0,0, 0,0,0,0, 16,16,16,16, 16,16,16,16};
@@ -135,18 +137,18 @@ static Packet16uc p16uc_TRANSPOSE64_LO = p16uc_PSET64_LO + p16uc_HALF64_0_16;
 
 static Packet16uc p16uc_COMPLEX32_REV = vec_sld(p16uc_REVERSE32, p16uc_REVERSE32, 8);                                         //{ 4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11 };
 
-#ifdef _BIG_ENDIAN
-static Packet16uc p16uc_COMPLEX32_REV2 = vec_sld(p16uc_FORWARD, p16uc_FORWARD, 8);                                            //{ 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 };
-#else
-static Packet16uc p16uc_COMPLEX32_REV2 = vec_sld(p16uc_PSET64_HI, p16uc_PSET64_LO, 8);                                            //{ 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 };
-#endif // _BIG_ENDIAN
-
 #if EIGEN_HAS_BUILTIN(__builtin_prefetch) || EIGEN_COMP_GNUC
   #define EIGEN_PPC_PREFETCH(ADDR) __builtin_prefetch(ADDR);
 #else
   #define EIGEN_PPC_PREFETCH(ADDR) asm( "   dcbt [%[addr]]\n" :: [addr] "r" (ADDR) : "cc" );
 #endif
 
+#if EIGEN_COMP_LLVM
+#define LOAD_STORE_UNROLL_16 _Pragma("unroll 16")
+#else
+#define LOAD_STORE_UNROLL_16 _Pragma("GCC unroll(16)")
+#endif
+
 template <>
 struct packet_traits<float> : default_packet_traits {
   typedef Packet4f type;
@@ -166,6 +168,9 @@ struct packet_traits<float> : default_packet_traits {
     HasAbs = 1,
     HasSin = EIGEN_FAST_MATH,
     HasCos = EIGEN_FAST_MATH,
+    HasACos = 1,
+    HasASin = 1,
+    HasATan = 1,
     HasLog = 1,
     HasExp = 1,
 #ifdef __VSX__
@@ -175,16 +180,19 @@ struct packet_traits<float> : default_packet_traits {
 #else
     HasRsqrt = 0,
 #endif
+    HasTanh = EIGEN_FAST_MATH,
+    HasErf = EIGEN_FAST_MATH,
+    HasRint = 1,
 #else
     HasSqrt = 0,
     HasRsqrt = 0,
-    HasTanh = EIGEN_FAST_MATH,
-    HasErf = EIGEN_FAST_MATH,
+    HasTanh = 0,
+    HasErf = 0,
+    HasRint = 0,
 #endif
     HasRound = 1,
     HasFloor = 1,
     HasCeil = 1,
-    HasRint = 1,
     HasNegate = 1,
     HasBlend = 1
   };
@@ -217,16 +225,17 @@ struct packet_traits<bfloat16> : default_packet_traits {
 #else
     HasRsqrt = 0,
 #endif
+    HasRint = 1,
 #else
     HasSqrt = 0,
     HasRsqrt = 0,
-    HasTanh = EIGEN_FAST_MATH,
-    HasErf = EIGEN_FAST_MATH,
+    HasRint = 0,
 #endif
+    HasTanh = 0,
+    HasErf = 0,
     HasRound = 1,
     HasFloor = 1,
     HasCeil = 1,
-    HasRint = 1,
     HasNegate = 1,
     HasBlend = 1
   };
@@ -247,7 +256,8 @@ struct packet_traits<int> : default_packet_traits {
     HasShift = 1,
     HasMul   = 1,
     HasDiv   = 0,
-    HasBlend = 1
+    HasBlend = 1,
+    HasCmp = 1
   };
 };
 
@@ -265,7 +275,8 @@ struct packet_traits<short int> : default_packet_traits {
     HasSub  = 1,
     HasMul  = 1,
     HasDiv  = 0,
-    HasBlend = 1
+    HasBlend = 1,
+    HasCmp = 1
   };
 };
 
@@ -283,7 +294,8 @@ struct packet_traits<unsigned short int> : default_packet_traits {
     HasSub  = 1,
     HasMul  = 1,
     HasDiv  = 0,
-    HasBlend = 1
+    HasBlend = 1,
+    HasCmp = 1
   };
 };
 
@@ -301,7 +313,8 @@ struct packet_traits<signed char> : default_packet_traits {
     HasSub  = 1,
     HasMul  = 1,
     HasDiv  = 0,
-    HasBlend = 1
+    HasBlend = 1,
+    HasCmp = 1
   };
 };
 
@@ -319,7 +332,8 @@ struct packet_traits<unsigned char> : default_packet_traits {
     HasSub  = 1,
     HasMul  = 1,
     HasDiv  = 0,
-    HasBlend = 1
+    HasBlend = 1,
+    HasCmp = 1
   };
 };
 
@@ -475,6 +489,119 @@ template<> EIGEN_STRONG_INLINE Packet8bf pload<Packet8bf>(const bfloat16*     fr
   return pload_common<Packet8us>(reinterpret_cast<const unsigned short int*>(from));
 }
 
+template <typename Packet>
+EIGEN_ALWAYS_INLINE Packet pload_ignore(const __UNPACK_TYPE__(Packet)* from)
+{
+  // some versions of GCC throw "unused-but-set-parameter".
+  // ignoring these warnings for now.
+  EIGEN_UNUSED_VARIABLE(from);
+  EIGEN_DEBUG_ALIGNED_LOAD
+  // Ignore partial input memory initialized
+#if !EIGEN_COMP_LLVM
+  #pragma GCC diagnostic push
+  #pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
+#endif
+#ifdef __VSX__
+  return vec_xl(0, const_cast<__UNPACK_TYPE__(Packet)*>(from));
+#else
+  return vec_ld(0, from);
+#endif
+#if !EIGEN_COMP_LLVM
+  #pragma GCC diagnostic pop
+#endif
+}
+
+template<> EIGEN_ALWAYS_INLINE Packet8bf pload_ignore<Packet8bf>(const bfloat16*     from)
+{
+  return pload_ignore<Packet8us>(reinterpret_cast<const unsigned short int*>(from));
+}
+
+template <typename Packet>
+EIGEN_ALWAYS_INLINE Packet pload_partial_common(const __UNPACK_TYPE__(Packet)* from, const Index n, const Index offset)
+{
+  // some versions of GCC throw "unused-but-set-parameter".
+  // ignoring these warnings for now.
+  const Index packet_size = unpacket_traits<Packet>::size;
+  eigen_assert(n + offset <= packet_size && "number of elements plus offset will read past end of packet");
+  const Index size = sizeof(__UNPACK_TYPE__(Packet));
+#ifdef _ARCH_PWR9
+  EIGEN_UNUSED_VARIABLE(packet_size);
+  EIGEN_DEBUG_ALIGNED_LOAD
+  EIGEN_UNUSED_VARIABLE(from);
+  Packet load = vec_xl_len(const_cast<__UNPACK_TYPE__(Packet)*>(from), n * size);
+  if (offset) {
+    Packet16uc shift = pset1<Packet16uc>(offset * 8 * size);
+#ifdef _BIG_ENDIAN
+    load = Packet(vec_sro(Packet16uc(load), shift));
+#else
+    load = Packet(vec_slo(Packet16uc(load), shift));
+#endif
+  }
+  return load;
+#else
+  EIGEN_ALIGN16 __UNPACK_TYPE__(Packet) load[packet_size];
+  unsigned char* load2 = reinterpret_cast<unsigned char *>(load + offset);
+  unsigned char* from2 = reinterpret_cast<unsigned char *>(const_cast<__UNPACK_TYPE__(Packet)*>(from));
+  Index n2 = n * size;
+  Index i = 0;
+  if (16 <= n2) {
+    pstoreu(load2, ploadu<Packet16uc>(from2));
+    i += 16;
+  }
+  if (i + 8 <= n2) {
+    *reinterpret_cast<uint64_t *>(load2 + i) = *reinterpret_cast<uint64_t *>(from2 + i);
+    i += 8;
+  }
+  if (i + 4 <= n2) {
+    *reinterpret_cast<uint32_t *>(load2 + i) = *reinterpret_cast<uint32_t *>(from2 + i);
+    i += 4;
+  }
+  if (i + 2 <= n2) {
+    *reinterpret_cast<uint16_t *>(load2 + i) = *reinterpret_cast<uint16_t *>(from2 + i);
+    i += 2;
+  }
+  if (i < n2) {
+    *reinterpret_cast<uint8_t *>(load2 + i) = *reinterpret_cast<uint8_t *>(from2 + i);
+  }
+  return pload_ignore<Packet>(load);
+#endif
+}
+
+template<> EIGEN_ALWAYS_INLINE Packet4f pload_partial<Packet4f>(const float* from, const Index n, const Index offset)
+{
+  return pload_partial_common<Packet4f>(from, n, offset);
+}
+
+template<> EIGEN_ALWAYS_INLINE Packet4i pload_partial<Packet4i>(const int* from, const Index n, const Index offset)
+{
+  return pload_partial_common<Packet4i>(from, n, offset);
+}
+
+template<> EIGEN_ALWAYS_INLINE Packet8s pload_partial<Packet8s>(const short int* from, const Index n, const Index offset)
+{
+  return pload_partial_common<Packet8s>(from, n, offset);
+}
+
+template<> EIGEN_ALWAYS_INLINE Packet8us pload_partial<Packet8us>(const unsigned short int* from, const Index n, const Index offset)
+{
+  return pload_partial_common<Packet8us>(from, n, offset);
+}
+
+template<> EIGEN_ALWAYS_INLINE Packet8bf pload_partial<Packet8bf>(const bfloat16* from, const Index n, const Index offset)
+{
+  return pload_partial_common<Packet8us>(reinterpret_cast<const unsigned short int*>(from), n, offset);
+}
+
+template<> EIGEN_ALWAYS_INLINE Packet16c pload_partial<Packet16c>(const signed char* from, const Index n, const Index offset)
+{
+  return pload_partial_common<Packet16c>(from, n, offset);
+}
+
+template<> EIGEN_ALWAYS_INLINE Packet16uc pload_partial<Packet16uc>(const unsigned char* from, const Index n, const Index offset)
+{
+  return pload_partial_common<Packet16uc>(from, n, offset);
+}
+
 template <typename Packet>
 EIGEN_STRONG_INLINE void pstore_common(__UNPACK_TYPE__(Packet)* to, const Packet& from){
   // some versions of GCC throw "unused-but-set-parameter" (float *to).
@@ -523,6 +650,91 @@ template<> EIGEN_STRONG_INLINE void pstore<unsigned char>(unsigned char*       t
   pstore_common<Packet16uc>(to, from);
 }
 
+template<typename Packet> EIGEN_ALWAYS_INLINE void pstore_partial_common(__UNPACK_TYPE__(Packet)*  to, const Packet& from, const Index n, const Index offset)
+{
+  // some versions of GCC throw "unused-but-set-parameter" (float *to).
+  // ignoring these warnings for now.
+  const Index packet_size = unpacket_traits<Packet>::size;
+  eigen_assert(n + offset <= packet_size && "number of elements plus offset will write past end of packet");
+  const Index size = sizeof(__UNPACK_TYPE__(Packet));
+#ifdef _ARCH_PWR9
+  EIGEN_UNUSED_VARIABLE(packet_size);
+  EIGEN_UNUSED_VARIABLE(to);
+  EIGEN_DEBUG_ALIGNED_STORE
+  Packet store = from;
+  if (offset) {
+    Packet16uc shift = pset1<Packet16uc>(offset * 8 * size);
+#ifdef _BIG_ENDIAN
+    store = Packet(vec_slo(Packet16uc(store), shift));
+#else
+    store = Packet(vec_sro(Packet16uc(store), shift));
+#endif
+  }
+  vec_xst_len(store, to, n * size);
+#else
+  EIGEN_ALIGN16 __UNPACK_TYPE__(Packet) store[packet_size];
+  pstore(store, from);
+  unsigned char* store2 = reinterpret_cast<unsigned char *>(store + offset);
+  unsigned char* to2 = reinterpret_cast<unsigned char *>(to);
+  Index n2 = n * size;
+  Index i = 0;
+  if (16 <= n2) {
+    pstore(to2, ploadu<Packet16uc>(store2));
+    i += 16;
+  }
+  if (i + 8 <= n2) {
+    *reinterpret_cast<uint64_t *>(to2 + i) = *reinterpret_cast<uint64_t *>(store2 + i);
+    i += 8;
+  }
+  if (i + 4 <= n2) {
+    *reinterpret_cast<uint32_t *>(to2 + i) = *reinterpret_cast<uint32_t *>(store2 + i);
+    i += 4;
+  }
+  if (i + 2 <= n2) {
+    *reinterpret_cast<uint16_t *>(to2 + i) = *reinterpret_cast<uint16_t *>(store2 + i);
+    i += 2;
+  }
+  if (i < n2) {
+    *reinterpret_cast<uint8_t *>(to2 + i) = *reinterpret_cast<uint8_t *>(store2 + i);
+  }
+#endif
+}
+
+template<> EIGEN_ALWAYS_INLINE void pstore_partial<float>(float*  to, const Packet4f& from, const Index n, const Index offset)
+{
+  pstore_partial_common<Packet4f>(to, from, n, offset);
+}
+
+template<> EIGEN_ALWAYS_INLINE void pstore_partial<int>(int*  to, const Packet4i& from, const Index n, const Index offset)
+{
+  pstore_partial_common<Packet4i>(to, from, n, offset);
+}
+
+template<> EIGEN_ALWAYS_INLINE void pstore_partial<short int>(short int*  to, const Packet8s& from, const Index n, const Index offset)
+{
+  pstore_partial_common<Packet8s>(to, from, n, offset);
+}
+
+template<> EIGEN_ALWAYS_INLINE void pstore_partial<unsigned short int>(unsigned short int*  to, const Packet8us& from, const Index n, const Index offset)
+{
+  pstore_partial_common<Packet8us>(to, from, n, offset);
+}
+
+template<> EIGEN_ALWAYS_INLINE void pstore_partial<bfloat16>(bfloat16*      to, const Packet8bf& from, const Index n, const Index offset)
+{
+  pstore_partial_common<Packet8us>(reinterpret_cast<unsigned short int*>(to), from, n, offset);
+}
+
+template<> EIGEN_ALWAYS_INLINE void pstore_partial<signed char>(signed char*  to, const Packet16c& from, const Index n, const Index offset)
+{
+  pstore_partial_common<Packet16c>(to, from, n, offset);
+}
+
+template<> EIGEN_ALWAYS_INLINE void pstore_partial<unsigned char>(unsigned char*  to, const Packet16uc& from, const Index n, const Index offset)
+{
+  pstore_partial_common<Packet16uc>(to, from, n, offset);
+}
+
 template<typename Packet>
 EIGEN_STRONG_INLINE Packet pset1_size4(const __UNPACK_TYPE__(Packet)& from)
 {
@@ -600,168 +812,167 @@ pbroadcast4<Packet4i>(const int *a,
   pbroadcast4_common<Packet4i>(a, a0, a1, a2, a3);
 }
 
-template<typename Packet> EIGEN_DEVICE_FUNC inline Packet pgather_common(const __UNPACK_TYPE__(Packet)* from, Index stride)
+template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pgather_common(const __UNPACK_TYPE__(Packet)* from, Index stride, const Index n = unpacket_traits<Packet>::size)
 {
-  EIGEN_ALIGN16 __UNPACK_TYPE__(Packet) a[4];
-  a[0] = from[0*stride];
-  a[1] = from[1*stride];
-  a[2] = from[2*stride];
-  a[3] = from[3*stride];
-  return pload<Packet>(a);
+  EIGEN_ALIGN16 __UNPACK_TYPE__(Packet) a[unpacket_traits<Packet>::size];
+  eigen_assert(n <= unpacket_traits<Packet>::size && "number of elements will gather past end of packet");
+  LOAD_STORE_UNROLL_16
+  for (Index i = 0; i < n; i++) {
+    a[i] = from[i*stride];
+  }
+  // Leave rest of the array uninitialized
+  return pload_ignore<Packet>(a);
 }
 
-template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, Index stride)
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet4f pgather<float, Packet4f>(const float* from, Index stride)
 {
   return pgather_common<Packet4f>(from, stride);
 }
 
-template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, Index stride)
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet4i pgather<int, Packet4i>(const int* from, Index stride)
 {
   return pgather_common<Packet4i>(from, stride);
 }
 
-template<typename Packet> EIGEN_DEVICE_FUNC inline Packet pgather_size8(const __UNPACK_TYPE__(Packet)* from, Index stride)
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet8s pgather<short int, Packet8s>(const short int* from, Index stride)
 {
-  EIGEN_ALIGN16 __UNPACK_TYPE__(Packet) a[8];
-  a[0] = from[0*stride];
-  a[1] = from[1*stride];
-  a[2] = from[2*stride];
-  a[3] = from[3*stride];
-  a[4] = from[4*stride];
-  a[5] = from[5*stride];
-  a[6] = from[6*stride];
-  a[7] = from[7*stride];
-  return pload<Packet>(a);
+  return pgather_common<Packet8s>(from, stride);
 }
 
-template<> EIGEN_DEVICE_FUNC inline Packet8s pgather<short int, Packet8s>(const short int* from, Index stride)
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet8us pgather<unsigned short int, Packet8us>(const unsigned short int* from, Index stride)
 {
-  return pgather_size8<Packet8s>(from, stride);
+  return pgather_common<Packet8us>(from, stride);
 }
 
-template<> EIGEN_DEVICE_FUNC inline Packet8us pgather<unsigned short int, Packet8us>(const unsigned short int* from, Index stride)
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet8bf pgather<bfloat16, Packet8bf>(const bfloat16* from, Index stride)
 {
-  return pgather_size8<Packet8us>(from, stride);
+  return pgather_common<Packet8bf>(from, stride);
 }
 
-template<> EIGEN_DEVICE_FUNC inline Packet8bf pgather<bfloat16, Packet8bf>(const bfloat16* from, Index stride)
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet16c pgather<signed char, Packet16c>(const signed char* from, Index stride)
 {
-  return pgather_size8<Packet8bf>(from, stride);
+  return pgather_common<Packet16c>(from, stride);
 }
 
-template<typename Packet> EIGEN_DEVICE_FUNC inline Packet pgather_size16(const __UNPACK_TYPE__(Packet)* from, Index stride)
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet16uc pgather<unsigned char, Packet16uc>(const unsigned char* from, Index stride)
 {
-  EIGEN_ALIGN16 __UNPACK_TYPE__(Packet) a[16];
-  a[0] = from[0*stride];
-  a[1] = from[1*stride];
-  a[2] = from[2*stride];
-  a[3] = from[3*stride];
-  a[4] = from[4*stride];
-  a[5] = from[5*stride];
-  a[6] = from[6*stride];
-  a[7] = from[7*stride];
-  a[8] = from[8*stride];
-  a[9] = from[9*stride];
-  a[10] = from[10*stride];
-  a[11] = from[11*stride];
-  a[12] = from[12*stride];
-  a[13] = from[13*stride];
-  a[14] = from[14*stride];
-  a[15] = from[15*stride];
-  return pload<Packet>(a);
+  return pgather_common<Packet16uc>(from, stride);
 }
 
-
-template<> EIGEN_DEVICE_FUNC inline Packet16c pgather<signed char, Packet16c>(const signed char* from, Index stride)
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet4f pgather_partial<float, Packet4f>(const float* from, Index stride, const Index n)
 {
-  return pgather_size16<Packet16c>(from, stride);
+  return pgather_common<Packet4f>(from, stride, n);
 }
 
-template<> EIGEN_DEVICE_FUNC inline Packet16uc pgather<unsigned char, Packet16uc>(const unsigned char* from, Index stride)
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet4i pgather_partial<int, Packet4i>(const int* from, Index stride, const Index n)
 {
-  return pgather_size16<Packet16uc>(from, stride);
+  return pgather_common<Packet4i>(from, stride, n);
 }
 
-template<typename Packet> EIGEN_DEVICE_FUNC inline void pscatter_size4(__UNPACK_TYPE__(Packet)* to, const Packet& from, Index stride)
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet8s pgather_partial<short int, Packet8s>(const short int* from, Index stride, const Index n)
 {
-  EIGEN_ALIGN16 __UNPACK_TYPE__(Packet) a[4];
+  return pgather_common<Packet8s>(from, stride, n);
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet8us pgather_partial<unsigned short int, Packet8us>(const unsigned short int* from, Index stride, const Index n)
+{
+  return pgather_common<Packet8us>(from, stride, n);
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet8bf pgather_partial<bfloat16, Packet8bf>(const bfloat16* from, Index stride, const Index n)
+{
+  return pgather_common<Packet8bf>(from, stride, n);
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet16c pgather_partial<signed char, Packet16c>(const signed char* from, Index stride, const Index n)
+{
+  return pgather_common<Packet16c>(from, stride, n);
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet16uc pgather_partial<unsigned char, Packet16uc>(const unsigned char* from, Index stride, const Index n)
+{
+  return pgather_common<Packet16uc>(from, stride, n);
+}
+
+template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pscatter_common(__UNPACK_TYPE__(Packet)* to, const Packet& from, Index stride, const Index n = unpacket_traits<Packet>::size)
+{
+  EIGEN_ALIGN16 __UNPACK_TYPE__(Packet) a[unpacket_traits<Packet>::size];
+  eigen_assert(n <= unpacket_traits<Packet>::size && "number of elements will scatter past end of packet");
   pstore<__UNPACK_TYPE__(Packet)>(a, from);
-  to[0*stride] = a[0];
-  to[1*stride] = a[1];
-  to[2*stride] = a[2];
-  to[3*stride] = a[3];
+  LOAD_STORE_UNROLL_16
+  for (Index i = 0; i < n; i++) {
+    to[i*stride] = a[i];
+  }
 }
 
-template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, Index stride)
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pscatter<float, Packet4f>(float* to, const Packet4f& from, Index stride)
 {
-  pscatter_size4<Packet4f>(to, from, stride);
+  pscatter_common<Packet4f>(to, from, stride);
 }
 
-template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, Index stride)
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pscatter<int, Packet4i>(int* to, const Packet4i& from, Index stride)
 {
-  pscatter_size4<Packet4i>(to, from, stride);
+  pscatter_common<Packet4i>(to, from, stride);
 }
 
-template<typename Packet> EIGEN_DEVICE_FUNC inline void pscatter_size8(__UNPACK_TYPE__(Packet)* to, const Packet& from, Index stride)
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pscatter<short int, Packet8s>(short int* to, const Packet8s& from, Index stride)
 {
-  EIGEN_ALIGN16 __UNPACK_TYPE__(Packet) a[8];
-  pstore<__UNPACK_TYPE__(Packet)>(a, from);
-  to[0*stride] = a[0];
-  to[1*stride] = a[1];
-  to[2*stride] = a[2];
-  to[3*stride] = a[3];
-  to[4*stride] = a[4];
-  to[5*stride] = a[5];
-  to[6*stride] = a[6];
-  to[7*stride] = a[7];
+  pscatter_common<Packet8s>(to, from, stride);
 }
 
-
-template<> EIGEN_DEVICE_FUNC inline void pscatter<short int, Packet8s>(short int* to, const Packet8s& from, Index stride)
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pscatter<unsigned short int, Packet8us>(unsigned short int* to, const Packet8us& from, Index stride)
 {
-  pscatter_size8<Packet8s>(to, from, stride);
+  pscatter_common<Packet8us>(to, from, stride);
 }
 
-template<> EIGEN_DEVICE_FUNC inline void pscatter<unsigned short int, Packet8us>(unsigned short int* to, const Packet8us& from, Index stride)
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pscatter<bfloat16, Packet8bf>(bfloat16* to, const Packet8bf& from, Index stride)
 {
-  pscatter_size8<Packet8us>(to, from, stride);
+  pscatter_common<Packet8bf>(to, from, stride);
 }
 
-template<> EIGEN_DEVICE_FUNC inline void pscatter<bfloat16, Packet8bf>(bfloat16* to, const Packet8bf& from, Index stride)
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pscatter<signed char, Packet16c>(signed char* to, const Packet16c& from, Index stride)
 {
-  pscatter_size8<Packet8bf>(to, from, stride);
+  pscatter_common<Packet16c>(to, from, stride);
 }
 
-template<typename Packet> EIGEN_DEVICE_FUNC inline void pscatter_size16(__UNPACK_TYPE__(Packet)* to, const Packet& from, Index stride)
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pscatter<unsigned char, Packet16uc>(unsigned char* to, const Packet16uc& from, Index stride)
 {
-  EIGEN_ALIGN16 __UNPACK_TYPE__(Packet) a[16];
-  pstore<__UNPACK_TYPE__(Packet)>(a, from);
-  to[0*stride] = a[0];
-  to[1*stride] = a[1];
-  to[2*stride] = a[2];
-  to[3*stride] = a[3];
-  to[4*stride] = a[4];
-  to[5*stride] = a[5];
-  to[6*stride] = a[6];
-  to[7*stride] = a[7];
-  to[8*stride] = a[8];
-  to[9*stride] = a[9];
-  to[10*stride] = a[10];
-  to[11*stride] = a[11];
-  to[12*stride] = a[12];
-  to[13*stride] = a[13];
-  to[14*stride] = a[14];
-  to[15*stride] = a[15];
+  pscatter_common<Packet16uc>(to, from, stride);
 }
 
-template<> EIGEN_DEVICE_FUNC inline void pscatter<signed char, Packet16c>(signed char* to, const Packet16c& from, Index stride)
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pscatter_partial<float, Packet4f>(float* to, const Packet4f& from, Index stride, const Index n)
 {
-  pscatter_size16<Packet16c>(to, from, stride);
+  pscatter_common<Packet4f>(to, from, stride, n);
 }
 
-template<> EIGEN_DEVICE_FUNC inline void pscatter<unsigned char, Packet16uc>(unsigned char* to, const Packet16uc& from, Index stride)
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pscatter_partial<int, Packet4i>(int* to, const Packet4i& from, Index stride, const Index n)
 {
-  pscatter_size16<Packet16uc>(to, from, stride);
+  pscatter_common<Packet4i>(to, from, stride, n);
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pscatter_partial<short int, Packet8s>(short int* to, const Packet8s& from, Index stride, const Index n)
+{
+  pscatter_common<Packet8s>(to, from, stride, n);
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pscatter_partial<unsigned short int, Packet8us>(unsigned short int* to, const Packet8us& from, Index stride, const Index n)
+{
+  pscatter_common<Packet8us>(to, from, stride, n);
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pscatter_partial<bfloat16, Packet8bf>(bfloat16* to, const Packet8bf& from, Index stride, const Index n)
+{
+  pscatter_common<Packet8bf>(to, from, stride, n);
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pscatter_partial<signed char, Packet16c>(signed char* to, const Packet16c& from, Index stride, const Index n)
+{
+  pscatter_common<Packet16c>(to, from, stride, n);
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pscatter_partial<unsigned char, Packet16uc>(unsigned char* to, const Packet16uc& from, Index stride, const Index n)
+{
+  pscatter_common<Packet16uc>(to, from, stride, n);
 }
 
 template<> EIGEN_STRONG_INLINE Packet4f   plset<Packet4f>(const float&     a) { return pset1<Packet4f>(a) + p4f_COUNTDOWN;  }
@@ -786,8 +997,22 @@ template<> EIGEN_STRONG_INLINE Packet8us  psub<Packet8us> (const Packet8us&  a,
 template<> EIGEN_STRONG_INLINE Packet16c  psub<Packet16c> (const Packet16c&  a, const Packet16c&  b) { return a - b; }
 template<> EIGEN_STRONG_INLINE Packet16uc psub<Packet16uc>(const Packet16uc& a, const Packet16uc& b) { return a - b; }
 
-template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a) { return p4f_ZERO - a; }
-template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) { return p4i_ZERO - a; }
+template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a)
+{
+#ifdef __POWER8_VECTOR__
+  return vec_neg(a);
+#else
+  return p4f_ZERO - a;
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a)
+{
+#ifdef __POWER8_VECTOR__
+  return vec_neg(a);
+#else
+  return p4i_ZERO - a;
+#endif
+}
 
 template<> EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) { return a; }
 template<> EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) { return a; }
@@ -829,6 +1054,12 @@ template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i&
 template<> EIGEN_STRONG_INLINE Packet8s pmadd(const Packet8s& a, const Packet8s& b, const Packet8s& c) { return vec_madd(a,b,c); }
 template<> EIGEN_STRONG_INLINE Packet8us pmadd(const Packet8us& a, const Packet8us& b, const Packet8us& c) { return vec_madd(a,b,c); }
 
+#ifdef __VSX__
+template<> EIGEN_STRONG_INLINE Packet4f pmsub(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vec_msub(a,b,c); }
+template<> EIGEN_STRONG_INLINE Packet4f pnmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vec_nmsub(a,b,c); }
+template<> EIGEN_STRONG_INLINE Packet4f pnmsub(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vec_nmadd(a,b,c); }
+#endif
+
 template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b)
 {
   #ifdef __VSX__
@@ -872,19 +1103,29 @@ template<> EIGEN_STRONG_INLINE Packet4f pcmp_lt_or_nan(const Packet4f& a, const
   return vec_nor(c,c);
 }
 
+#ifdef __VSX__
 template<> EIGEN_STRONG_INLINE Packet4i pcmp_le(const Packet4i& a, const Packet4i& b) { return reinterpret_cast<Packet4i>(vec_cmple(a,b)); }
+#endif
 template<> EIGEN_STRONG_INLINE Packet4i pcmp_lt(const Packet4i& a, const Packet4i& b) { return reinterpret_cast<Packet4i>(vec_cmplt(a,b)); }
 template<> EIGEN_STRONG_INLINE Packet4i pcmp_eq(const Packet4i& a, const Packet4i& b) { return reinterpret_cast<Packet4i>(vec_cmpeq(a,b)); }
+#ifdef __VSX__
 template<> EIGEN_STRONG_INLINE Packet8s pcmp_le(const Packet8s& a, const Packet8s& b) { return reinterpret_cast<Packet8s>(vec_cmple(a,b)); }
+#endif
 template<> EIGEN_STRONG_INLINE Packet8s pcmp_lt(const Packet8s& a, const Packet8s& b) { return reinterpret_cast<Packet8s>(vec_cmplt(a,b)); }
 template<> EIGEN_STRONG_INLINE Packet8s pcmp_eq(const Packet8s& a, const Packet8s& b) { return reinterpret_cast<Packet8s>(vec_cmpeq(a,b)); }
+#ifdef __VSX__
 template<> EIGEN_STRONG_INLINE Packet8us pcmp_le(const Packet8us& a, const Packet8us& b) { return reinterpret_cast<Packet8us>(vec_cmple(a,b)); }
+#endif
 template<> EIGEN_STRONG_INLINE Packet8us pcmp_lt(const Packet8us& a, const Packet8us& b) { return reinterpret_cast<Packet8us>(vec_cmplt(a,b)); }
 template<> EIGEN_STRONG_INLINE Packet8us pcmp_eq(const Packet8us& a, const Packet8us& b) { return reinterpret_cast<Packet8us>(vec_cmpeq(a,b)); }
+#ifdef __VSX__
 template<> EIGEN_STRONG_INLINE Packet16c pcmp_le(const Packet16c& a, const Packet16c& b) { return reinterpret_cast<Packet16c>(vec_cmple(a,b)); }
+#endif
 template<> EIGEN_STRONG_INLINE Packet16c pcmp_lt(const Packet16c& a, const Packet16c& b) { return reinterpret_cast<Packet16c>(vec_cmplt(a,b)); }
 template<> EIGEN_STRONG_INLINE Packet16c pcmp_eq(const Packet16c& a, const Packet16c& b) { return reinterpret_cast<Packet16c>(vec_cmpeq(a,b)); }
+#ifdef __VSX__
 template<> EIGEN_STRONG_INLINE Packet16uc pcmp_le(const Packet16uc& a, const Packet16uc& b) { return reinterpret_cast<Packet16uc>(vec_cmple(a,b)); }
+#endif
 template<> EIGEN_STRONG_INLINE Packet16uc pcmp_lt(const Packet16uc& a, const Packet16uc& b) { return reinterpret_cast<Packet16uc>(vec_cmplt(a,b)); }
 template<> EIGEN_STRONG_INLINE Packet16uc pcmp_eq(const Packet16uc& a, const Packet16uc& b) { return reinterpret_cast<Packet16uc>(vec_cmpeq(a,b)); }
 
@@ -937,6 +1178,7 @@ template<> EIGEN_STRONG_INLINE Packet4f pround<Packet4f>(const Packet4f& a)
 }
 template<> EIGEN_STRONG_INLINE Packet4f pceil<Packet4f>(const  Packet4f& a) { return vec_ceil(a); }
 template<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a) { return vec_floor(a); }
+#ifdef __VSX__
 template<> EIGEN_STRONG_INLINE Packet4f print<Packet4f>(const Packet4f& a)
 {
     Packet4f res;
@@ -947,11 +1189,15 @@ template<> EIGEN_STRONG_INLINE Packet4f print<Packet4f>(const Packet4f& a)
 
     return res;
 }
+#endif
 
 template<typename Packet> EIGEN_STRONG_INLINE Packet ploadu_common(const __UNPACK_TYPE__(Packet)* from)
 {
   EIGEN_DEBUG_ALIGNED_LOAD
-#ifdef _BIG_ENDIAN
+#if defined(__VSX__) || !defined(_BIG_ENDIAN)
+  EIGEN_DEBUG_UNALIGNED_LOAD
+  return vec_xl(0, const_cast<__UNPACK_TYPE__(Packet)*>(from));
+#else
   Packet16uc MSQ, LSQ;
   Packet16uc mask;
   MSQ = vec_ld(0, (unsigned char *)from);          // most significant quadword
@@ -959,9 +1205,6 @@ template<typename Packet> EIGEN_STRONG_INLINE Packet ploadu_common(const __UNPAC
   mask = vec_lvsl(0, from);                        // create the permute mask
   //TODO: Add static_cast here
   return (Packet) vec_perm(MSQ, LSQ, mask);           // align the data
-#else
-  EIGEN_DEBUG_UNALIGNED_LOAD
-  return vec_xl(0, const_cast<__UNPACK_TYPE__(Packet)*>(from));
 #endif
 }
 
@@ -994,12 +1237,80 @@ template<> EIGEN_STRONG_INLINE Packet16uc ploadu<Packet16uc>(const unsigned char
   return ploadu_common<Packet16uc>(from);
 }
 
+template<typename Packet> EIGEN_ALWAYS_INLINE Packet ploadu_partial_common(const __UNPACK_TYPE__(Packet)* from, const Index n)
+{
+  const Index packet_size = unpacket_traits<Packet>::size;
+  eigen_assert(n <= packet_size && "number of elements will read past end of packet");
+  const Index size = sizeof(__UNPACK_TYPE__(Packet));
+#ifdef _ARCH_PWR9
+  EIGEN_UNUSED_VARIABLE(packet_size);
+  EIGEN_DEBUG_ALIGNED_LOAD
+  EIGEN_DEBUG_UNALIGNED_LOAD
+  return vec_xl_len(const_cast<__UNPACK_TYPE__(Packet)*>(from), n * size);
+#else
+  EIGEN_ALIGN16 __UNPACK_TYPE__(Packet) load[packet_size];
+  unsigned char* load2 = reinterpret_cast<unsigned char *>(load);
+  unsigned char* from2 = reinterpret_cast<unsigned char *>(const_cast<__UNPACK_TYPE__(Packet)*>(from));
+  Index n2 = n * size;
+  Index i = 0;
+  if (16 <= n2) {
+    pstore(load2, ploadu<Packet16uc>(from2));
+    i += 16;
+  }
+  if (i + 8 <= n2) {
+    *reinterpret_cast<uint64_t *>(load2 + i) = *reinterpret_cast<uint64_t *>(from2 + i);
+    i += 8;
+  }
+  if (i + 4 <= n2) {
+    *reinterpret_cast<uint32_t *>(load2 + i) = *reinterpret_cast<uint32_t *>(from2 + i);
+    i += 4;
+  }
+  if (i + 2 <= n2) {
+    *reinterpret_cast<uint16_t *>(load2 + i) = *reinterpret_cast<uint16_t *>(from2 + i);
+    i += 2;
+  }
+  if (i < n2) {
+    *reinterpret_cast<uint8_t *>(load2 + i) = *reinterpret_cast<uint8_t *>(from2 + i);
+  }
+  return pload_ignore<Packet>(load);
+#endif
+}
+
+template<> EIGEN_ALWAYS_INLINE Packet4f ploadu_partial<Packet4f>(const float* from, const Index n)
+{
+  return ploadu_partial_common<Packet4f>(from, n);
+}
+template<> EIGEN_ALWAYS_INLINE Packet4i ploadu_partial<Packet4i>(const int* from, const Index n)
+{
+  return ploadu_partial_common<Packet4i>(from, n);
+}
+template<> EIGEN_ALWAYS_INLINE Packet8s ploadu_partial<Packet8s>(const short int* from, const Index n)
+{
+  return ploadu_partial_common<Packet8s>(from, n);
+}
+template<> EIGEN_ALWAYS_INLINE Packet8us ploadu_partial<Packet8us>(const unsigned short int* from, const Index n)
+{
+  return ploadu_partial_common<Packet8us>(from, n);
+}
+template<> EIGEN_ALWAYS_INLINE Packet8bf ploadu_partial<Packet8bf>(const bfloat16* from, const Index n)
+{
+  return ploadu_partial_common<Packet8us>(reinterpret_cast<const unsigned short int*>(from), n);
+}
+template<> EIGEN_ALWAYS_INLINE Packet16c ploadu_partial<Packet16c>(const signed char* from, const Index n)
+{
+  return ploadu_partial_common<Packet16c>(from, n);
+}
+template<> EIGEN_ALWAYS_INLINE Packet16uc ploadu_partial<Packet16uc>(const unsigned char* from, const Index n)
+{
+  return ploadu_partial_common<Packet16uc>(from, n);
+}
+
 template<typename Packet> EIGEN_STRONG_INLINE Packet ploaddup_common(const __UNPACK_TYPE__(Packet)*   from)
 {
   Packet p;
   if((std::ptrdiff_t(from) % 16) == 0)  p = pload<Packet>(from);
   else                                  p = ploadu<Packet>(from);
-  return vec_perm(p, p, p16uc_DUPLICATE32_HI);
+  return vec_mergeh(p, p);
 }
 template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float*   from)
 {
@@ -1015,7 +1326,7 @@ template<> EIGEN_STRONG_INLINE Packet8s ploaddup<Packet8s>(const short int*
   Packet8s p;
   if((std::ptrdiff_t(from) % 16) == 0)  p = pload<Packet8s>(from);
   else                                  p = ploadu<Packet8s>(from);
-  return vec_perm(p, p, p16uc_DUPLICATE16_HI);
+  return vec_mergeh(p, p);
 }
 
 template<> EIGEN_STRONG_INLINE Packet8us ploaddup<Packet8us>(const unsigned short int*     from)
@@ -1023,7 +1334,7 @@ template<> EIGEN_STRONG_INLINE Packet8us ploaddup<Packet8us>(const unsigned shor
   Packet8us p;
   if((std::ptrdiff_t(from) % 16) == 0)  p = pload<Packet8us>(from);
   else                                  p = ploadu<Packet8us>(from);
-  return vec_perm(p, p, p16uc_DUPLICATE16_HI);
+  return vec_mergeh(p, p);
 }
 
 template<> EIGEN_STRONG_INLINE Packet8s ploadquad<Packet8s>(const short int*     from)
@@ -1052,7 +1363,7 @@ template<> EIGEN_STRONG_INLINE Packet16c ploaddup<Packet16c>(const signed char*
   Packet16c p;
   if((std::ptrdiff_t(from) % 16) == 0)  p = pload<Packet16c>(from);
   else                                  p = ploadu<Packet16c>(from);
-  return vec_perm(p, p, p16uc_DUPLICATE8_HI);
+  return vec_mergeh(p, p);
 }
 
 template<> EIGEN_STRONG_INLINE Packet16uc ploaddup<Packet16uc>(const unsigned char*     from)
@@ -1060,13 +1371,15 @@ template<> EIGEN_STRONG_INLINE Packet16uc ploaddup<Packet16uc>(const unsigned ch
   Packet16uc p;
   if((std::ptrdiff_t(from) % 16) == 0)  p = pload<Packet16uc>(from);
   else                                  p = ploadu<Packet16uc>(from);
-  return vec_perm(p, p, p16uc_DUPLICATE8_HI);
+  return vec_mergeh(p, p);
 }
 
 template<typename Packet> EIGEN_STRONG_INLINE void pstoreu_common(__UNPACK_TYPE__(Packet)*  to, const Packet& from)
 {
   EIGEN_DEBUG_UNALIGNED_STORE
-#ifdef _BIG_ENDIAN
+#if defined(__VSX__) || !defined(_BIG_ENDIAN)
+  vec_xst(from, 0, to);
+#else
   // Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html
   // Warning: not thread safe!
   Packet16uc MSQ, LSQ, edges;
@@ -1081,8 +1394,6 @@ template<typename Packet> EIGEN_STRONG_INLINE void pstoreu_common(__UNPACK_TYPE_
   LSQ = vec_perm((Packet16uc)from,edges,align);             // misalign the data (LSQ)
   vec_st( LSQ, 15, (unsigned char *)to );                   // Store the LSQ part first
   vec_st( MSQ, 0, (unsigned char *)to );                   // Store the MSQ part second
-#else
-  vec_xst(from, 0, to);
 #endif
 }
 template<> EIGEN_STRONG_INLINE void pstoreu<float>(float*  to, const Packet4f& from)
@@ -1114,6 +1425,73 @@ template<> EIGEN_STRONG_INLINE void pstoreu<unsigned char>(unsigned char*      t
   pstoreu_common<Packet16uc>(to, from);
 }
 
+template<typename Packet> EIGEN_ALWAYS_INLINE void pstoreu_partial_common(__UNPACK_TYPE__(Packet)*  to, const Packet& from, const Index n)
+{
+  const Index packet_size = unpacket_traits<Packet>::size;
+  eigen_assert(n <= packet_size && "number of elements will write past end of packet");
+  const Index size = sizeof(__UNPACK_TYPE__(Packet));
+#ifdef _ARCH_PWR9
+  EIGEN_UNUSED_VARIABLE(packet_size);
+  EIGEN_DEBUG_UNALIGNED_STORE
+  vec_xst_len(from, to, n * size);
+#else
+  EIGEN_ALIGN16 __UNPACK_TYPE__(Packet) store[packet_size];
+  pstore(store, from);
+  unsigned char* store2 = reinterpret_cast<unsigned char *>(store);
+  unsigned char* to2 = reinterpret_cast<unsigned char *>(to);
+  Index n2 = n * size;
+  Index i = 0;
+  if (16 <= n2) {
+    pstoreu(to2, pload<Packet16uc>(store2));
+    i += 16;
+  }
+  if (i + 8 <= n2) {
+    *reinterpret_cast<uint64_t *>(to2 + i) = *reinterpret_cast<uint64_t *>(store2 + i);
+    i += 8;
+  }
+  if (i + 4 <= n2) {
+    *reinterpret_cast<uint32_t *>(to2 + i) = *reinterpret_cast<uint32_t *>(store2 + i);
+    i += 4;
+  }
+  if (i + 2 <= n2) {
+    *reinterpret_cast<uint16_t *>(to2 + i) = *reinterpret_cast<uint16_t *>(store2 + i);
+    i += 2;
+  }
+  if (i < n2) {
+    *reinterpret_cast<uint8_t *>(to2 + i) = *reinterpret_cast<uint8_t *>(store2 + i);
+  }
+#endif
+}
+
+template<> EIGEN_ALWAYS_INLINE void pstoreu_partial<float>(float*  to, const Packet4f& from, const Index n)
+{
+  pstoreu_partial_common<Packet4f>(to, from, n);
+}
+template<> EIGEN_ALWAYS_INLINE void pstoreu_partial<int>(int*  to, const Packet4i& from, const Index n)
+{
+  pstoreu_partial_common<Packet4i>(to, from, n);
+}
+template<> EIGEN_ALWAYS_INLINE void pstoreu_partial<short int>(short int*  to, const Packet8s& from, const Index n)
+{
+  pstoreu_partial_common<Packet8s>(to, from, n);
+}
+template<> EIGEN_ALWAYS_INLINE void pstoreu_partial<unsigned short int>(unsigned short int*  to, const Packet8us& from, const Index n)
+{
+  pstoreu_partial_common<Packet8us>(to, from, n);
+}
+template<> EIGEN_ALWAYS_INLINE void pstoreu_partial<bfloat16>(bfloat16*      to, const Packet8bf& from, const Index n)
+{
+  pstoreu_partial_common<Packet8us>(reinterpret_cast<unsigned short int*>(to), from, n);
+}
+template<> EIGEN_ALWAYS_INLINE void pstoreu_partial<signed char>(signed char*  to, const Packet16c& from, const Index n)
+{
+  pstoreu_partial_common<Packet16c>(to, from, n);
+}
+template<> EIGEN_ALWAYS_INLINE void pstoreu_partial<unsigned char>(unsigned char*  to, const Packet16uc& from, const Index n)
+{
+  pstoreu_partial_common<Packet16uc>(to, from, n);
+}
+
 template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr)    { EIGEN_PPC_PREFETCH(addr); }
 template<> EIGEN_STRONG_INLINE void prefetch<int>(const int*     addr)    { EIGEN_PPC_PREFETCH(addr); }
 
@@ -1162,11 +1540,19 @@ template<> EIGEN_STRONG_INLINE Packet8us preverse(const Packet8us& a)
 }
 template<> EIGEN_STRONG_INLINE Packet16c preverse(const Packet16c& a)
 {
+#ifdef _ARCH_PWR9
+  return vec_revb(a);
+#else
   return vec_perm(a, a, p16uc_REVERSE8);
+#endif
 }
 template<> EIGEN_STRONG_INLINE Packet16uc preverse(const Packet16uc& a)
 {
+#ifdef _ARCH_PWR9
+  return vec_revb(a);
+#else
   return vec_perm(a, a, p16uc_REVERSE8);
+#endif
 }
 template<> EIGEN_STRONG_INLINE Packet8bf preverse(const Packet8bf& a)
 {
@@ -1180,10 +1566,13 @@ template<> EIGEN_STRONG_INLINE Packet8us pabs(const Packet8us& a) { return a; }
 template<> EIGEN_STRONG_INLINE Packet16c pabs(const Packet16c& a) { return vec_abs(a); }
 template<> EIGEN_STRONG_INLINE Packet16uc pabs(const Packet16uc& a) { return a; }
 template<> EIGEN_STRONG_INLINE Packet8bf  pabs(const Packet8bf& a) {
-  _EIGEN_DECLARE_CONST_FAST_Packet8us(abs_mask,0x7FFF);
+  EIGEN_DECLARE_CONST_FAST_Packet8us(abs_mask,0x7FFF);
   return pand<Packet8us>(p8us_abs_mask, a);
 }
 
+template<> EIGEN_STRONG_INLINE Packet8bf psignbit(const Packet8bf& a) { return vec_sra(a.m_val, vec_splat_u16(15)); }
+template<> EIGEN_STRONG_INLINE Packet4f  psignbit(const Packet4f&  a) { return  (Packet4f)vec_sra((Packet4i)a, vec_splats((unsigned int)(31))); }
+
 template<int N> EIGEN_STRONG_INLINE Packet4i parithmetic_shift_right(const Packet4i& a)
 { return vec_sra(a,reinterpret_cast<Packet4ui>(pset1<Packet4i>(N))); }
 template<int N> EIGEN_STRONG_INLINE Packet4i plogical_shift_right(const Packet4i& a)
@@ -1192,38 +1581,38 @@ template<int N> EIGEN_STRONG_INLINE Packet4i plogical_shift_left(const Packet4i&
 { return vec_sl(a,reinterpret_cast<Packet4ui>(pset1<Packet4i>(N))); }
 template<int N> EIGEN_STRONG_INLINE Packet4f plogical_shift_left(const Packet4f& a)
 {
-  const _EIGEN_DECLARE_CONST_FAST_Packet4ui(mask, N);
+  const EIGEN_DECLARE_CONST_FAST_Packet4ui(mask, N);
   Packet4ui r = vec_sl(reinterpret_cast<Packet4ui>(a), p4ui_mask);
   return reinterpret_cast<Packet4f>(r);
 }
 
 template<int N> EIGEN_STRONG_INLINE Packet4f plogical_shift_right(const Packet4f& a)
 {
-  const _EIGEN_DECLARE_CONST_FAST_Packet4ui(mask, N);
+  const EIGEN_DECLARE_CONST_FAST_Packet4ui(mask, N);
   Packet4ui r = vec_sr(reinterpret_cast<Packet4ui>(a), p4ui_mask);
   return reinterpret_cast<Packet4f>(r);
 }
 
 template<int N> EIGEN_STRONG_INLINE Packet4ui plogical_shift_right(const Packet4ui& a)
 {
-  const _EIGEN_DECLARE_CONST_FAST_Packet4ui(mask, N);
+  const EIGEN_DECLARE_CONST_FAST_Packet4ui(mask, N);
   return vec_sr(a, p4ui_mask);
 }
 
 template<int N> EIGEN_STRONG_INLINE Packet4ui plogical_shift_left(const Packet4ui& a)
 {
-  const _EIGEN_DECLARE_CONST_FAST_Packet4ui(mask, N);
+  const EIGEN_DECLARE_CONST_FAST_Packet4ui(mask, N);
   return vec_sl(a, p4ui_mask);
 }
 
 template<int N> EIGEN_STRONG_INLINE Packet8us plogical_shift_left(const Packet8us& a)
 {
-  const _EIGEN_DECLARE_CONST_FAST_Packet8us(mask, N);
+  const EIGEN_DECLARE_CONST_FAST_Packet8us(mask, N);
   return vec_sl(a, p8us_mask);
 }
 template<int N> EIGEN_STRONG_INLINE Packet8us plogical_shift_right(const Packet8us& a)
 {
-  const _EIGEN_DECLARE_CONST_FAST_Packet8us(mask, N);
+  const EIGEN_DECLARE_CONST_FAST_Packet8us(mask, N);
   return vec_sr(a, p8us_mask);
 }
 
@@ -1232,7 +1621,7 @@ EIGEN_STRONG_INLINE Packet4f Bf16ToF32Even(const Packet8bf& bf){
 }
 
 EIGEN_STRONG_INLINE Packet4f Bf16ToF32Odd(const Packet8bf& bf){
-  const _EIGEN_DECLARE_CONST_FAST_Packet4ui(high_mask, 0xFFFF0000);
+  const EIGEN_DECLARE_CONST_FAST_Packet4ui(high_mask, 0xFFFF0000);
   return pand<Packet4f>(
     reinterpret_cast<Packet4f>(bf.m_val),
     reinterpret_cast<Packet4f>(p4ui_high_mask)
@@ -1242,7 +1631,7 @@ EIGEN_STRONG_INLINE Packet4f Bf16ToF32Odd(const Packet8bf& bf){
 // Simple interleaving of bool masks, prevents true values from being
 // converted to NaNs.
 EIGEN_STRONG_INLINE Packet8bf F32ToBf16Bool(Packet4f even, Packet4f odd) {
-  const _EIGEN_DECLARE_CONST_FAST_Packet4ui(high_mask, 0xFFFF0000);
+  const EIGEN_DECLARE_CONST_FAST_Packet4ui(high_mask, 0xFFFF0000);
   Packet4f bf_odd, bf_even;
   bf_odd = pand(reinterpret_cast<Packet4f>(p4ui_high_mask), odd);
   bf_even = plogical_shift_right<16>(even);
@@ -1254,18 +1643,18 @@ EIGEN_STRONG_INLINE Packet8bf F32ToBf16(Packet4f p4f){
   Packet4ui lsb = plogical_shift_right<16>(input);
   lsb = pand<Packet4ui>(lsb, reinterpret_cast<Packet4ui>(p4i_ONE));
 
-  _EIGEN_DECLARE_CONST_FAST_Packet4ui(BIAS,0x7FFFu);
+  EIGEN_DECLARE_CONST_FAST_Packet4ui(BIAS,0x7FFFu);
   Packet4ui rounding_bias = padd<Packet4ui>(lsb, p4ui_BIAS);
   input = padd<Packet4ui>(input, rounding_bias);
 
   //Test NaN and Subnormal - Begin
-  const _EIGEN_DECLARE_CONST_FAST_Packet4ui(exp_mask, 0x7F800000);
+  const EIGEN_DECLARE_CONST_FAST_Packet4ui(exp_mask, 0x7F800000);
   Packet4ui exp = pand<Packet4ui>(p4ui_exp_mask, reinterpret_cast<Packet4ui>(p4f));
 
-  const _EIGEN_DECLARE_CONST_FAST_Packet4ui(mantissa_mask, 0x7FFFFF);
+  const EIGEN_DECLARE_CONST_FAST_Packet4ui(mantissa_mask, 0x7FFFFF);
   Packet4ui mantissa = pand<Packet4ui>(p4ui_mantissa_mask, reinterpret_cast<Packet4ui>(p4f));
 
-  const _EIGEN_DECLARE_CONST_FAST_Packet4ui(max_exp, 0x7F800000);
+  const EIGEN_DECLARE_CONST_FAST_Packet4ui(max_exp, 0x7F800000);
   Packet4bi is_max_exp = vec_cmpeq(exp, p4ui_max_exp);
   Packet4bi is_zero_exp = vec_cmpeq(exp, reinterpret_cast<Packet4ui>(p4i_ZERO));
 
@@ -1280,7 +1669,7 @@ EIGEN_STRONG_INLINE Packet8bf F32ToBf16(Packet4f p4f){
       reinterpret_cast<Packet4ui>(is_mant_zero)
   );
 
-  const _EIGEN_DECLARE_CONST_FAST_Packet4ui(nan, 0x7FC00000);
+  const EIGEN_DECLARE_CONST_FAST_Packet4ui(nan, 0x7FC00000);
   input = vec_sel(input, p4ui_nan, nan_selector);
   input = vec_sel(input, reinterpret_cast<Packet4ui>(p4f), subnormal_selector);
   //Test NaN and Subnormal - End
@@ -1341,12 +1730,6 @@ template<> EIGEN_STRONG_INLINE Packet8bf psub<Packet8bf>(const Packet8bf& a, con
   BF16_TO_F32_BINARY_OP_WRAPPER(psub<Packet4f>, a, b);
 }
 
-template<> EIGEN_STRONG_INLINE Packet8bf psqrt<Packet8bf> (const Packet8bf& a){
-  BF16_TO_F32_UNARY_OP_WRAPPER(vec_sqrt, a);
-}
-template<> EIGEN_STRONG_INLINE Packet8bf prsqrt<Packet8bf> (const Packet8bf& a){
-  BF16_TO_F32_UNARY_OP_WRAPPER(prsqrt<Packet4f>, a);
-}
 template<> EIGEN_STRONG_INLINE Packet8bf pexp<Packet8bf> (const Packet8bf& a){
   BF16_TO_F32_UNARY_OP_WRAPPER(pexp_float, a);
 }
@@ -1390,9 +1773,11 @@ template<> EIGEN_STRONG_INLINE Packet8bf pceil<Packet8bf> (const Packet8bf& a){
 template<> EIGEN_STRONG_INLINE Packet8bf pround<Packet8bf> (const Packet8bf& a){
   BF16_TO_F32_UNARY_OP_WRAPPER(pround<Packet4f>, a);
 }
+#ifdef __VSX__
 template<> EIGEN_STRONG_INLINE Packet8bf print<Packet8bf> (const Packet8bf& a){
   BF16_TO_F32_UNARY_OP_WRAPPER(print<Packet4f>, a);
 }
+#endif
 template<> EIGEN_STRONG_INLINE Packet8bf pmadd(const Packet8bf& a, const Packet8bf& b, const Packet8bf& c) {
   Packet4f a_even = Bf16ToF32Even(a);
   Packet4f a_odd = Bf16ToF32Odd(a);
@@ -2100,7 +2485,11 @@ ptranspose(PacketBlock<Packet16uc,16>& kernel) {
 template<typename Packet> EIGEN_STRONG_INLINE
 Packet pblend4(const Selector<4>& ifPacket, const Packet& thenPacket, const Packet& elsePacket) {
   Packet4ui select = { ifPacket.select[0], ifPacket.select[1], ifPacket.select[2], ifPacket.select[3] };
+#ifdef __POWER8_VECTOR__
+  Packet4ui mask = reinterpret_cast<Packet4ui>(vec_neg(reinterpret_cast<Packet4i>(select)));
+#else
   Packet4ui mask = reinterpret_cast<Packet4ui>(vec_cmpeq(reinterpret_cast<Packet4ui>(select), reinterpret_cast<Packet4ui>(p4i_ONE)));
+#endif
   return vec_sel(elsePacket, thenPacket, mask);
 }
 
@@ -2115,7 +2504,11 @@ template<> EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, cons
 template<> EIGEN_STRONG_INLINE Packet8s pblend(const Selector<8>& ifPacket, const Packet8s& thenPacket, const Packet8s& elsePacket) {
   Packet8us select = { ifPacket.select[0], ifPacket.select[1], ifPacket.select[2], ifPacket.select[3],
                        ifPacket.select[4], ifPacket.select[5], ifPacket.select[6], ifPacket.select[7] };
+#ifdef __POWER8_VECTOR__
+  Packet8us mask = reinterpret_cast<Packet8us>(vec_neg(reinterpret_cast<Packet8s>(select)));
+#else
   Packet8us mask = reinterpret_cast<Packet8us>(vec_cmpeq(select, p8us_ONE));
+#endif
   Packet8s result = vec_sel(elsePacket, thenPacket, mask);
   return result;
 }
@@ -2123,7 +2516,11 @@ template<> EIGEN_STRONG_INLINE Packet8s pblend(const Selector<8>& ifPacket, cons
 template<> EIGEN_STRONG_INLINE Packet8us pblend(const Selector<8>& ifPacket, const Packet8us& thenPacket, const Packet8us& elsePacket) {
   Packet8us select = { ifPacket.select[0], ifPacket.select[1], ifPacket.select[2], ifPacket.select[3],
                        ifPacket.select[4], ifPacket.select[5], ifPacket.select[6], ifPacket.select[7] };
+#ifdef __POWER8_VECTOR__
+  Packet8us mask = reinterpret_cast<Packet8us>(vec_neg(reinterpret_cast<Packet8s>(select)));
+#else
   Packet8us mask = reinterpret_cast<Packet8us>(vec_cmpeq(reinterpret_cast<Packet8us>(select), p8us_ONE));
+#endif
   return vec_sel(elsePacket, thenPacket, mask);
 }
 
@@ -2137,7 +2534,11 @@ template<> EIGEN_STRONG_INLINE Packet16c pblend(const Selector<16>& ifPacket, co
                        ifPacket.select[8], ifPacket.select[9], ifPacket.select[10], ifPacket.select[11],
                        ifPacket.select[12], ifPacket.select[13], ifPacket.select[14], ifPacket.select[15] };
 
+#ifdef __POWER8_VECTOR__
+  Packet16uc mask = reinterpret_cast<Packet16uc>(vec_neg(reinterpret_cast<Packet16c>(select)));
+#else
   Packet16uc mask = reinterpret_cast<Packet16uc>(vec_cmpeq(reinterpret_cast<Packet16uc>(select), p16uc_ONE));
+#endif
   return vec_sel(elsePacket, thenPacket, mask);
 }
 
@@ -2147,7 +2548,11 @@ template<> EIGEN_STRONG_INLINE Packet16uc pblend(const Selector<16>& ifPacket, c
                        ifPacket.select[8], ifPacket.select[9], ifPacket.select[10], ifPacket.select[11],
                        ifPacket.select[12], ifPacket.select[13], ifPacket.select[14], ifPacket.select[15] };
 
+#ifdef __POWER8_VECTOR__
+  Packet16uc mask = reinterpret_cast<Packet16uc>(vec_neg(reinterpret_cast<Packet16c>(select)));
+#else
   Packet16uc mask = reinterpret_cast<Packet16uc>(vec_cmpeq(reinterpret_cast<Packet16uc>(select), p16uc_ONE));
+#endif
   return vec_sel(elsePacket, thenPacket, mask);
 }
 
@@ -2208,7 +2613,7 @@ template<> EIGEN_STRONG_INLINE Packet8us pcast<Packet8bf, Packet8us>(const Packe
   Packet4f float_odd = Bf16ToF32Odd(a);
   Packet4ui int_even = pcast<Packet4f, Packet4ui>(float_even);
   Packet4ui int_odd = pcast<Packet4f, Packet4ui>(float_odd);
-  const _EIGEN_DECLARE_CONST_FAST_Packet4ui(low_mask, 0x0000FFFF);
+  const EIGEN_DECLARE_CONST_FAST_Packet4ui(low_mask, 0x0000FFFF);
   Packet4ui low_even = pand<Packet4ui>(int_even, p4ui_low_mask);
   Packet4ui low_odd = pand<Packet4ui>(int_odd, p4ui_low_mask);
 
@@ -2231,7 +2636,7 @@ template<> EIGEN_STRONG_INLINE Packet8us pcast<Packet8bf, Packet8us>(const Packe
 
 template<> EIGEN_STRONG_INLINE Packet8bf pcast<Packet8us, Packet8bf>(const Packet8us& a) {
   //short -> int -> float -> bfloat16
-  const _EIGEN_DECLARE_CONST_FAST_Packet4ui(low_mask, 0x0000FFFF);
+  const EIGEN_DECLARE_CONST_FAST_Packet4ui(low_mask, 0x0000FFFF);
   Packet4ui int_cast = reinterpret_cast<Packet4ui>(a);
   Packet4ui int_even = pand<Packet4ui>(int_cast, p4ui_low_mask);
   Packet4ui int_odd = plogical_shift_right<16>(int_cast);
@@ -2301,6 +2706,7 @@ template<> struct packet_traits<double> : default_packet_traits
     HasAbs  = 1,
     HasSin  = 0,
     HasCos  = 0,
+    HasATan = 0,
     HasLog  = 0,
     HasExp  = 1,
     HasSqrt = 1,
@@ -2345,12 +2751,22 @@ template<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from)
   return vec_xl(0, const_cast<double *>(from)); // cast needed by Clang
 }
 
+template<> EIGEN_ALWAYS_INLINE Packet2d pload_partial<Packet2d>(const double* from, const Index n, const Index offset)
+{
+  return pload_partial_common<Packet2d>(from, n, offset);
+}
+
 template<> EIGEN_STRONG_INLINE void pstore<double>(double*   to, const Packet2d& from)
 {
   EIGEN_DEBUG_ALIGNED_STORE
   vec_xst(from, 0, to);
 }
 
+template<> EIGEN_ALWAYS_INLINE void pstore_partial<double>(double*  to, const Packet2d& from, const Index n, const Index offset)
+{
+  pstore_partial_common<Packet2d>(to, from, n, offset);
+}
+
 template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double&  from) {
   Packet2d v = {from, from};
   return v;
@@ -2372,19 +2788,21 @@ pbroadcast4<Packet2d>(const double *a,
   a3 = pset1<Packet2d>(a[3]);
 }
 
-template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, Index stride)
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet2d pgather<double, Packet2d>(const double* from, Index stride)
 {
-  EIGEN_ALIGN16 double af[2];
-  af[0] = from[0*stride];
-  af[1] = from[1*stride];
- return pload<Packet2d>(af);
+  return pgather_common<Packet2d>(from, stride);
 }
-template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, Index stride)
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet2d pgather_partial<double, Packet2d>(const double* from, Index stride, const Index n)
 {
-  EIGEN_ALIGN16 double af[2];
-  pstore<double>(af, from);
-  to[0*stride] = af[0];
-  to[1*stride] = af[1];
+  return pgather_common<Packet2d>(from, stride, n);
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pscatter<double, Packet2d>(double* to, const Packet2d& from, Index stride)
+{
+  pscatter_common<Packet2d>(to, from, stride);
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pscatter_partial<double, Packet2d>(double* to, const Packet2d& from, Index stride, const Index n)
+{
+  pscatter_common<Packet2d>(to, from, stride, n);
 }
 
 template<> EIGEN_STRONG_INLINE Packet2d plset<Packet2d>(const double& a) { return pset1<Packet2d>(a) + p2d_COUNTDOWN; }
@@ -2393,7 +2811,14 @@ template<> EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const
 
 template<> EIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) { return a - b; }
 
-template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a) { return p2d_ZERO - a; }
+template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a)
+{
+#ifdef __POWER8_VECTOR__
+  return vec_neg(a);
+#else
+  return p2d_ZERO - a;
+#endif
+}
 
 template<> EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) { return a; }
 
@@ -2402,6 +2827,9 @@ template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const
 
 // for some weird raisons, it has to be overloaded for packet of integers
 template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return vec_madd(a, b, c); }
+template<> EIGEN_STRONG_INLINE Packet2d pmsub(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return vec_msub(a,b,c); }
+template<> EIGEN_STRONG_INLINE Packet2d pnmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return vec_nmsub(a,b,c); }
+template<> EIGEN_STRONG_INLINE Packet2d pnmsub(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return vec_nmadd(a,b,c); }
 
 template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b)
 {
@@ -2465,6 +2893,11 @@ template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from)
   return vec_xl(0, const_cast<double*>(from));
 }
 
+template<> EIGEN_ALWAYS_INLINE Packet2d ploadu_partial<Packet2d>(const double* from, const Index n)
+{
+  return ploadu_partial_common<Packet2d>(from, n);
+}
+
 template<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double*   from)
 {
   Packet2d p;
@@ -2479,16 +2912,21 @@ template<> EIGEN_STRONG_INLINE void pstoreu<double>(double*  to, const Packet2d&
   vec_xst(from, 0, to);
 }
 
+template<> EIGEN_ALWAYS_INLINE void pstoreu_partial<double>(double*  to, const Packet2d& from, const Index n)
+{
+  pstoreu_partial_common<Packet2d>(to, from, n);
+}
+
 template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { EIGEN_PPC_PREFETCH(addr); }
 
 template<> EIGEN_STRONG_INLINE double  pfirst<Packet2d>(const Packet2d& a) { EIGEN_ALIGN16 double x[2]; pstore<double>(x, a); return x[0]; }
 
 template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a)
 {
-  return reinterpret_cast<Packet2d>(vec_perm(reinterpret_cast<Packet16uc>(a), reinterpret_cast<Packet16uc>(a), p16uc_REVERSE64));
+  return vec_sld(a, a, 8);
 }
 template<> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a) { return vec_abs(a); }
-
+template<> EIGEN_STRONG_INLINE Packet2d psignbit(const Packet2d&  a) { return (Packet2d)vec_sra((Packet2l)a, vec_splats((unsigned long long)(63))); }
 // VSX support varies between different compilers and even different
 // versions of the same compiler.  For gcc version >= 4.9.3, we can use
 // vec_cts to efficiently convert Packet2d to Packet2l.  Otherwise, use
@@ -2571,7 +3009,7 @@ template<int N, typename EnableIf = void>
 struct plogical_shift_left_impl;
 
 template<int N>
-struct plogical_shift_left_impl<N, typename enable_if<(N < 32) && (N >= 0)>::type> {
+struct plogical_shift_left_impl<N, std::enable_if_t<(N < 32) && (N >= 0)>> {
   static EIGEN_STRONG_INLINE Packet2l run(const Packet2l& a) {
     static const unsigned n = static_cast<unsigned>(N);
     const Packet4ui shift = {n, n, n, n};
@@ -2585,7 +3023,7 @@ struct plogical_shift_left_impl<N, typename enable_if<(N < 32) && (N >= 0)>::typ
 };
 
 template<int N>
-struct plogical_shift_left_impl<N, typename enable_if<(N >= 32)>::type> {
+struct plogical_shift_left_impl<N, std::enable_if_t<(N >= 32)>> {
   static EIGEN_STRONG_INLINE Packet2l run(const Packet2l& a) {
     static const unsigned m = static_cast<unsigned>(N - 32);
     const Packet4ui shift = {m, m, m, m};
@@ -2603,7 +3041,7 @@ template<int N, typename EnableIf = void>
 struct plogical_shift_right_impl;
 
 template<int N>
-struct plogical_shift_right_impl<N, typename enable_if<(N < 32) && (N >= 0)>::type> {
+struct plogical_shift_right_impl<N, std::enable_if_t<(N < 32) && (N >= 0)>> {
   static EIGEN_STRONG_INLINE Packet2l run(const Packet2l& a) {
     static const unsigned n = static_cast<unsigned>(N);
     const Packet4ui shift = {n, n, n, n};
@@ -2617,7 +3055,7 @@ struct plogical_shift_right_impl<N, typename enable_if<(N < 32) && (N >= 0)>::ty
 };
 
 template<int N>
-struct plogical_shift_right_impl<N, typename enable_if<(N >= 32)>::type> {
+struct plogical_shift_right_impl<N, std::enable_if_t<(N >= 32)>> {
   static EIGEN_STRONG_INLINE Packet2l run(const Packet2l& a) {
     static const unsigned m = static_cast<unsigned>(N - 32);
     const Packet4ui shift = {m, m, m, m};
@@ -2690,8 +3128,8 @@ template<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a)
 EIGEN_DEVICE_FUNC inline void
 ptranspose(PacketBlock<Packet2d,2>& kernel) {
   Packet2d t0, t1;
-  t0 = vec_perm(kernel.packet[0], kernel.packet[1], p16uc_TRANSPOSE64_HI);
-  t1 = vec_perm(kernel.packet[0], kernel.packet[1], p16uc_TRANSPOSE64_LO);
+  t0 = vec_mergeh(kernel.packet[0], kernel.packet[1]);
+  t1 = vec_mergel(kernel.packet[0], kernel.packet[1]);
   kernel.packet[0] = t0;
   kernel.packet[1] = t1;
 }
diff --git a/libs/eigen/Eigen/src/Core/arch/Default/BFloat16.h b/libs/eigen/Eigen/src/Core/arch/Default/BFloat16.h
index 1c28f4f..d2137d4 100644
--- a/libs/eigen/Eigen/src/Core/arch/Default/BFloat16.h
+++ b/libs/eigen/Eigen/src/Core/arch/Default/BFloat16.h
@@ -16,6 +16,20 @@ limitations under the License.
 #ifndef EIGEN_BFLOAT16_H
 #define EIGEN_BFLOAT16_H
 
+#include "../../InternalHeaderCheck.h"
+
+#if defined(EIGEN_HAS_HIP_BF16)
+// When compiling with GPU support, the "hip_bfloat16" base class as well as
+// some other routines are defined in the GPU compiler header files
+// (hip_bfloat16.h), and they are not tagged constexpr
+// As a consequence, we get compile failures when compiling Eigen with
+// GPU support. Hence the need to disable EIGEN_CONSTEXPR when building
+// Eigen with GPU support
+  #pragma push_macro("EIGEN_CONSTEXPR")
+  #undef EIGEN_CONSTEXPR
+  #define EIGEN_CONSTEXPR
+#endif
+
 #define BF16_PACKET_FUNCTION(PACKET_F, PACKET_BF16, METHOD)         \
   template <>                                                       \
   EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED  \
@@ -23,19 +37,47 @@ limitations under the License.
     return F32ToBf16(METHOD<PACKET_F>(Bf16ToF32(_x)));              \
   }
 
+// Only use HIP GPU bf16 in kernels
+#if defined(EIGEN_HAS_HIP_BF16) && defined(EIGEN_GPU_COMPILE_PHASE)
+#define EIGEN_USE_HIP_BF16
+#endif
+
 namespace Eigen {
 
 struct bfloat16;
 
+namespace numext {
+template <>
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 bit_cast<Eigen::bfloat16, uint16_t>(const uint16_t& src);
+
+template <>
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC uint16_t bit_cast<uint16_t, Eigen::bfloat16>(const Eigen::bfloat16& src);
+}  // namespace numext
 namespace bfloat16_impl {
 
+#if defined(EIGEN_USE_HIP_BF16)
+
+struct __bfloat16_raw : public hip_bfloat16 {
+  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw() {}
+  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw(hip_bfloat16 hb) : hip_bfloat16(hb) {}
+  explicit EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw(unsigned short raw) : hip_bfloat16(raw) {}
+};
+
+#else
+
 // Make our own __bfloat16_raw definition.
 struct __bfloat16_raw {
+#if defined(EIGEN_HAS_HIP_BF16) && !defined(EIGEN_GPU_COMPILE_PHASE)
+  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw() {}
+#else
   EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw() : value(0) {}
+#endif
   explicit EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw(unsigned short raw) : value(raw) {}
   unsigned short value;
 };
 
+#endif // defined(EIGEN_USE_HIP_BF16)
+
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw raw_uint16_to_bfloat16(unsigned short value);
 template <bool AssumeArgumentIsNormalOrInfinityOrZero>
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw float_to_bfloat16_rtne(float ff);
@@ -83,57 +125,114 @@ struct bfloat16 : public bfloat16_impl::bfloat16_base {
     return bfloat16_impl::bfloat16_to_float(*this);
   }
 };
-} // namespace Eigen
 
-namespace std {
-template<>
-struct numeric_limits<Eigen::bfloat16> {
-  static const bool is_specialized = true;
-  static const bool is_signed = true;
-  static const bool is_integer = false;
-  static const bool is_exact = false;
-  static const bool has_infinity = true;
-  static const bool has_quiet_NaN = true;
-  static const bool has_signaling_NaN = true;
-  static const float_denorm_style has_denorm = std::denorm_absent;
-  static const bool has_denorm_loss = false;
-  static const std::float_round_style round_style = numeric_limits<float>::round_style;
-  static const bool is_iec559 = false;
-  static const bool is_bounded = true;
-  static const bool is_modulo = false;
-  static const int digits = 8;
-  static const int digits10 = 2;
-  static const int max_digits10 = 4;
-  static const int radix = 2;
-  static const int min_exponent = numeric_limits<float>::min_exponent;
-  static const int min_exponent10 = numeric_limits<float>::min_exponent10;
-  static const int max_exponent = numeric_limits<float>::max_exponent;
-  static const int max_exponent10 = numeric_limits<float>::max_exponent10;
-  static const bool traps = numeric_limits<float>::traps;
-  static const bool tinyness_before = numeric_limits<float>::tinyness_before;
+// TODO(majnemer): Get rid of this once we can rely on C++17 inline variables do
+// solve the ODR issue.
+namespace bfloat16_impl {
+template <typename = void>
+struct numeric_limits_bfloat16_impl {
+  static EIGEN_CONSTEXPR const bool is_specialized = true;
+  static EIGEN_CONSTEXPR const bool is_signed = true;
+  static EIGEN_CONSTEXPR const bool is_integer = false;
+  static EIGEN_CONSTEXPR const bool is_exact = false;
+  static EIGEN_CONSTEXPR const bool has_infinity = true;
+  static EIGEN_CONSTEXPR const bool has_quiet_NaN = true;
+  static EIGEN_CONSTEXPR const bool has_signaling_NaN = true;
+  static EIGEN_CONSTEXPR const std::float_denorm_style has_denorm = std::denorm_present;
+  static EIGEN_CONSTEXPR const bool has_denorm_loss = false;
+  static EIGEN_CONSTEXPR const std::float_round_style round_style = std::numeric_limits<float>::round_style;
+  static EIGEN_CONSTEXPR const bool is_iec559 = true;
+  // The C++ standard defines this as "true if the set of values representable
+  // by the type is finite." BFloat16 has finite precision.
+  static EIGEN_CONSTEXPR const bool is_bounded = true;
+  static EIGEN_CONSTEXPR const bool is_modulo = false;
+  static EIGEN_CONSTEXPR const int digits = 8;
+  static EIGEN_CONSTEXPR const int digits10 = 2;
+  static EIGEN_CONSTEXPR const int max_digits10 = 4;
+  static EIGEN_CONSTEXPR const int radix = std::numeric_limits<float>::radix;
+  static EIGEN_CONSTEXPR const int min_exponent = std::numeric_limits<float>::min_exponent;
+  static EIGEN_CONSTEXPR const int min_exponent10 = std::numeric_limits<float>::min_exponent10;
+  static EIGEN_CONSTEXPR const int max_exponent = std::numeric_limits<float>::max_exponent;
+  static EIGEN_CONSTEXPR const int max_exponent10 = std::numeric_limits<float>::max_exponent10;
+  static EIGEN_CONSTEXPR const bool traps = std::numeric_limits<float>::traps;
+  // IEEE754: "The implementer shall choose how tininess is detected, but shall
+  // detect tininess in the same way for all operations in radix two"
+  static EIGEN_CONSTEXPR const bool tinyness_before = std::numeric_limits<float>::tinyness_before;
 
-  static Eigen::bfloat16 (min)() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x0080); }
-  static Eigen::bfloat16 lowest() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0xff7f); }
-  static Eigen::bfloat16 (max)() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x7f7f); }
-  static Eigen::bfloat16 epsilon() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x3c00); }
-  static Eigen::bfloat16 round_error() { return Eigen::bfloat16(0x3f00); }
-  static Eigen::bfloat16 infinity() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x7f80); }
-  static Eigen::bfloat16 quiet_NaN() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x7fc0); }
-  static Eigen::bfloat16 signaling_NaN() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x7f81); }
-  static Eigen::bfloat16 denorm_min() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x0001); }
+  static EIGEN_CONSTEXPR Eigen::bfloat16 (min)() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x0080); }
+  static EIGEN_CONSTEXPR Eigen::bfloat16 lowest() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0xff7f); }
+  static EIGEN_CONSTEXPR Eigen::bfloat16 (max)() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x7f7f); }
+  static EIGEN_CONSTEXPR Eigen::bfloat16 epsilon() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x3c00); }
+  static EIGEN_CONSTEXPR Eigen::bfloat16 round_error() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x3f00); }
+  static EIGEN_CONSTEXPR Eigen::bfloat16 infinity() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x7f80); }
+  static EIGEN_CONSTEXPR Eigen::bfloat16 quiet_NaN() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x7fc0); }
+  static EIGEN_CONSTEXPR Eigen::bfloat16 signaling_NaN() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x7fa0); }
+  static EIGEN_CONSTEXPR Eigen::bfloat16 denorm_min() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x0001); }
 };
 
+template<typename T>
+EIGEN_CONSTEXPR const bool numeric_limits_bfloat16_impl<T>::is_specialized;
+template<typename T>
+EIGEN_CONSTEXPR const bool numeric_limits_bfloat16_impl<T>::is_signed;
+template<typename T>
+EIGEN_CONSTEXPR const bool numeric_limits_bfloat16_impl<T>::is_integer;
+template<typename T>
+EIGEN_CONSTEXPR const bool numeric_limits_bfloat16_impl<T>::is_exact;
+template<typename T>
+EIGEN_CONSTEXPR const bool numeric_limits_bfloat16_impl<T>::has_infinity;
+template<typename T>
+EIGEN_CONSTEXPR const bool numeric_limits_bfloat16_impl<T>::has_quiet_NaN;
+template<typename T>
+EIGEN_CONSTEXPR const bool numeric_limits_bfloat16_impl<T>::has_signaling_NaN;
+template<typename T>
+EIGEN_CONSTEXPR const std::float_denorm_style numeric_limits_bfloat16_impl<T>::has_denorm;
+template<typename T>
+EIGEN_CONSTEXPR const bool numeric_limits_bfloat16_impl<T>::has_denorm_loss;
+template<typename T>
+EIGEN_CONSTEXPR const std::float_round_style numeric_limits_bfloat16_impl<T>::round_style;
+template<typename T>
+EIGEN_CONSTEXPR const bool numeric_limits_bfloat16_impl<T>::is_iec559;
+template<typename T>
+EIGEN_CONSTEXPR const bool numeric_limits_bfloat16_impl<T>::is_bounded;
+template<typename T>
+EIGEN_CONSTEXPR const bool numeric_limits_bfloat16_impl<T>::is_modulo;
+template<typename T>
+EIGEN_CONSTEXPR const int numeric_limits_bfloat16_impl<T>::digits;
+template<typename T>
+EIGEN_CONSTEXPR const int numeric_limits_bfloat16_impl<T>::digits10;
+template<typename T>
+EIGEN_CONSTEXPR const int numeric_limits_bfloat16_impl<T>::max_digits10;
+template<typename T>
+EIGEN_CONSTEXPR const int numeric_limits_bfloat16_impl<T>::radix;
+template<typename T>
+EIGEN_CONSTEXPR const int numeric_limits_bfloat16_impl<T>::min_exponent;
+template<typename T>
+EIGEN_CONSTEXPR const int numeric_limits_bfloat16_impl<T>::min_exponent10;
+template<typename T>
+EIGEN_CONSTEXPR const int numeric_limits_bfloat16_impl<T>::max_exponent;
+template<typename T>
+EIGEN_CONSTEXPR const int numeric_limits_bfloat16_impl<T>::max_exponent10;
+template<typename T>
+EIGEN_CONSTEXPR const bool numeric_limits_bfloat16_impl<T>::traps;
+template<typename T>
+EIGEN_CONSTEXPR const bool numeric_limits_bfloat16_impl<T>::tinyness_before;
+}  // end namespace bfloat16_impl
+}  // end namespace Eigen
+
+namespace std {
 // If std::numeric_limits<T> is specialized, should also specialize
 // std::numeric_limits<const T>, std::numeric_limits<volatile T>, and
 // std::numeric_limits<const volatile T>
 // https://stackoverflow.com/a/16519653/
 template<>
-struct numeric_limits<const Eigen::bfloat16> : numeric_limits<Eigen::bfloat16> {};
+class numeric_limits<Eigen::bfloat16> : public Eigen::bfloat16_impl::numeric_limits_bfloat16_impl<> {};
 template<>
-struct numeric_limits<volatile Eigen::bfloat16> : numeric_limits<Eigen::bfloat16> {};
+class numeric_limits<const Eigen::bfloat16> : public numeric_limits<Eigen::bfloat16> {};
 template<>
-struct numeric_limits<const volatile Eigen::bfloat16> : numeric_limits<Eigen::bfloat16> {};
-} // namespace std
+class numeric_limits<volatile Eigen::bfloat16> : public numeric_limits<Eigen::bfloat16> {};
+template<>
+class numeric_limits<const volatile Eigen::bfloat16> : public numeric_limits<Eigen::bfloat16> {};
+}  // end namespace std
 
 namespace Eigen {
 
@@ -148,7 +247,7 @@ namespace bfloat16_impl {
 // We need to provide emulated *host-side* BF16 operators for clang.
 #pragma push_macro("EIGEN_DEVICE_FUNC")
 #undef EIGEN_DEVICE_FUNC
-#if defined(EIGEN_HAS_CUDA_BF16) && defined(EIGEN_HAS_NATIVE_BF16)
+#if (defined(EIGEN_HAS_GPU_BF16) && defined(EIGEN_HAS_NATIVE_BF16))
 #define EIGEN_DEVICE_FUNC __host__
 #else // both host and device need emulated ops.
 #define EIGEN_DEVICE_FUNC __host__ __device__
@@ -177,9 +276,8 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator / (const bfloat16& a, co
   return bfloat16(float(a) / float(b));
 }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator - (const bfloat16& a) {
-  bfloat16 result;
-  result.value = a.value ^ 0x8000;
-  return result;
+  numext::uint16_t x = numext::bit_cast<uint16_t>(a) ^ 0x8000;
+  return numext::bit_cast<bfloat16>(x);
 }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16& operator += (bfloat16& a, const bfloat16& b) {
   a = bfloat16(float(a) + float(b));
@@ -246,38 +344,47 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator / (const bfloat16& a, In
 }
 
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw truncate_to_bfloat16(const float v) {
+#if defined(EIGEN_USE_HIP_BF16)
+  return __bfloat16_raw(__bfloat16_raw::round_to_bfloat16(v, __bfloat16_raw::truncate));
+#else
   __bfloat16_raw output;
-  if (Eigen::numext::isnan EIGEN_NOT_A_MACRO(v)) {
+  if (numext::isnan EIGEN_NOT_A_MACRO(v)) {
     output.value = std::signbit(v) ? 0xFFC0: 0x7FC0;
     return output;
   }
-  const uint16_t* p = reinterpret_cast<const uint16_t*>(&v);
-#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-  output.value = p[0];
-#else
-  output.value = p[1];
-#endif
+  output.value = static_cast<numext::uint16_t>(numext::bit_cast<numext::uint32_t>(v) >> 16);
   return output;
+#endif
 }
 
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw raw_uint16_to_bfloat16(numext::uint16_t value) {
+#if defined(EIGEN_USE_HIP_BF16)
+  __bfloat16_raw bf;
+  bf.data = value;
+  return bf;
+#else
   return __bfloat16_raw(value);
+#endif
 }
 
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR numext::uint16_t raw_bfloat16_as_uint16(const __bfloat16_raw& bf) {
+#if defined(EIGEN_USE_HIP_BF16)
+  return bf.data;
+#else
   return bf.value;
+#endif
 }
 
 // float_to_bfloat16_rtne template specialization that does not make any
 // assumption about the value of its function argument (ff).
 template <>
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw float_to_bfloat16_rtne<false>(float ff) {
-#if (defined(EIGEN_HAS_CUDA_BF16) && defined(EIGEN_HAS_HIP_BF16))
-  // Nothing to do here
+#if defined(EIGEN_USE_HIP_BF16)
+  return __bfloat16_raw(__bfloat16_raw::round_to_bfloat16(ff));
 #else
   __bfloat16_raw output;
 
-  if (Eigen::numext::isnan EIGEN_NOT_A_MACRO(ff)) {
+  if (numext::isnan EIGEN_NOT_A_MACRO(ff)) {
     // If the value is a NaN, squash it to a qNaN with msb of fraction set,
     // this makes sure after truncation we don't end up with an inf.
     //
@@ -446,8 +553,8 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw float_to_bfloat16_rtne<fals
 // type to bfloat16.
 template <>
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw float_to_bfloat16_rtne<true>(float ff) {
-#if (defined(EIGEN_HAS_CUDA_BF16) && defined(EIGEN_HAS_HIP_BF16))
-    // Nothing to do here
+#if defined(EIGEN_USE_HIP_BF16)
+    return __bfloat16_raw(__bfloat16_raw::round_to_bfloat16(ff));
 #else
     numext::uint32_t input = numext::bit_cast<numext::uint32_t>(ff);
     __bfloat16_raw output;
@@ -462,36 +569,41 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw float_to_bfloat16_rtne<true
 }
 
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float bfloat16_to_float(__bfloat16_raw h) {
-    float result = 0;
-    unsigned short* q = reinterpret_cast<unsigned short*>(&result);
-#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-    q[0] = h.value;
+#if defined(EIGEN_USE_HIP_BF16)
+    return static_cast<float>(h);
 #else
-    q[1] = h.value;
+    return numext::bit_cast<float>(static_cast<numext::uint32_t>(h.value) << 16);
 #endif
-    return result;
 }
+
 // --- standard functions ---
 
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isinf)(const bfloat16& a) {
   EIGEN_USING_STD(isinf);
+#if defined(EIGEN_USE_HIP_BF16)
+  return (isinf)(a); // Uses HIP hip_bfloat16 isinf operator
+#else
   return (isinf)(float(a));
+#endif
 }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isnan)(const bfloat16& a) {
   EIGEN_USING_STD(isnan);
+#if defined(EIGEN_USE_HIP_BF16)
+  return (isnan)(a); // Uses HIP hip_bfloat16 isnan operator
+#else
   return (isnan)(float(a));
+#endif
 }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isfinite)(const bfloat16& a) {
   return !(isinf EIGEN_NOT_A_MACRO (a)) && !(isnan EIGEN_NOT_A_MACRO (a));
 }
 
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 abs(const bfloat16& a) {
-  bfloat16 result;
-  result.value = a.value & 0x7FFF;
-  return result;
+  numext::uint16_t x = numext::bit_cast<numext::uint16_t>(a) & 0x7FFF;
+  return numext::bit_cast<bfloat16>(x);
 }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 exp(const bfloat16& a) {
-   return bfloat16(::expf(float(a)));
+  return bfloat16(::expf(float(a)));
 }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 expm1(const bfloat16& a) {
   return bfloat16(numext::expm1(float(a)));
@@ -509,11 +621,14 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 log2(const bfloat16& a) {
   return bfloat16(static_cast<float>(EIGEN_LOG2E) * ::logf(float(a)));
 }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 sqrt(const bfloat16& a) {
-    return bfloat16(::sqrtf(float(a)));
+  return bfloat16(::sqrtf(float(a)));
 }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 pow(const bfloat16& a, const bfloat16& b) {
   return bfloat16(::powf(float(a), float(b)));
 }
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 atan2(const bfloat16& a, const bfloat16& b) {
+  return bfloat16(::atan2f(float(a), float(b)));
+}
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 sin(const bfloat16& a) {
   return bfloat16(::sinf(float(a)));
 }
@@ -541,7 +656,6 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 cosh(const bfloat16& a) {
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 tanh(const bfloat16& a) {
   return bfloat16(::tanhf(float(a)));
 }
-#if EIGEN_HAS_CXX11_MATH
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 asinh(const bfloat16& a) {
   return bfloat16(::asinhf(float(a)));
 }
@@ -551,7 +665,6 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 acosh(const bfloat16& a) {
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 atanh(const bfloat16& a) {
   return bfloat16(::atanhf(float(a)));
 }
-#endif
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 floor(const bfloat16& a) {
   return bfloat16(::floorf(float(a)));
 }
@@ -573,6 +686,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 (min)(const bfloat16& a, const bf
   const float f2 = static_cast<float>(b);
   return f2 < f1 ? b : a;
 }
+
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 (max)(const bfloat16& a, const bfloat16& b) {
   const float f1 = static_cast<float>(a);
   const float f2 = static_cast<float>(b);
@@ -584,6 +698,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 fmin(const bfloat16& a, const bfl
   const float f2 = static_cast<float>(b);
   return bfloat16(::fminf(f1, f2));
 }
+
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 fmax(const bfloat16& a, const bfloat16& b) {
   const float f1 = static_cast<float>(a);
   const float f2 = static_cast<float>(b);
@@ -633,7 +748,6 @@ template<> struct NumTraits<Eigen::bfloat16>
   }
   EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::bfloat16 dummy_precision() {
     return bfloat16_impl::raw_uint16_to_bfloat16(0x3D4D);  // bfloat16(5e-2f);
-
   }
   EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::bfloat16 highest() {
     return bfloat16_impl::raw_uint16_to_bfloat16(0x7F7F);
@@ -651,6 +765,11 @@ template<> struct NumTraits<Eigen::bfloat16>
 
 } // namespace Eigen
 
+
+#if defined(EIGEN_HAS_HIP_BF16)
+  #pragma pop_macro("EIGEN_CONSTEXPR")
+#endif
+
 namespace Eigen {
 namespace numext {
 
@@ -674,7 +793,7 @@ bool (isfinite)(const Eigen::bfloat16& h) {
 
 template <>
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 bit_cast<Eigen::bfloat16, uint16_t>(const uint16_t& src) {
-  return Eigen::bfloat16(Eigen::bfloat16_impl::raw_uint16_to_bfloat16(src));
+  return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(src);
 }
 
 template <>
@@ -696,5 +815,49 @@ struct hash<Eigen::bfloat16> {
 } // namespace std
 #endif
 
+// Add the missing shfl* intrinsics.
+// The __shfl* functions are only valid on HIP or _CUDA_ARCH_ >= 300.
+//   CUDA defines them for (__CUDA_ARCH__ >= 300 || !defined(__CUDA_ARCH__))
+//
+// HIP and CUDA prior to SDK 9.0 define
+//    __shfl, __shfl_up, __shfl_down, __shfl_xor for int and float
+// CUDA since 9.0 deprecates those and instead defines
+//    __shfl_sync, __shfl_up_sync, __shfl_down_sync, __shfl_xor_sync,
+//    with native support for __half and __nv_bfloat16
+//
+// Note that the following are __device__ - only functions.
+#if defined(EIGEN_HIPCC)
+
+#if defined(EIGEN_HAS_HIP_BF16)
+
+__device__ EIGEN_STRONG_INLINE Eigen::bfloat16 __shfl(Eigen::bfloat16 var, int srcLane, int width=warpSize) {
+  const int ivar = static_cast<int>(Eigen::numext::bit_cast<Eigen::numext::uint16_t>(var));
+  return Eigen::numext::bit_cast<Eigen::bfloat16>(static_cast<Eigen::numext::uint16_t>(__shfl(ivar, srcLane, width)));
+}
+
+__device__ EIGEN_STRONG_INLINE Eigen::bfloat16 __shfl_up(Eigen::bfloat16 var, unsigned int delta, int width=warpSize) {
+  const int ivar = static_cast<int>(Eigen::numext::bit_cast<Eigen::numext::uint16_t>(var));
+  return Eigen::numext::bit_cast<Eigen::bfloat16>(static_cast<Eigen::numext::uint16_t>(__shfl_up(ivar, delta, width)));
+}
+
+__device__ EIGEN_STRONG_INLINE Eigen::bfloat16 __shfl_down(Eigen::bfloat16 var, unsigned int delta, int width=warpSize) {
+  const int ivar = static_cast<int>(Eigen::numext::bit_cast<Eigen::numext::uint16_t>(var));
+  return Eigen::numext::bit_cast<Eigen::bfloat16>(static_cast<Eigen::numext::uint16_t>(__shfl_down(ivar, delta, width)));
+}
+
+__device__ EIGEN_STRONG_INLINE Eigen::bfloat16 __shfl_xor(Eigen::bfloat16 var, int laneMask, int width=warpSize) {
+  const int ivar = static_cast<int>(Eigen::numext::bit_cast<Eigen::numext::uint16_t>(var));
+  return Eigen::numext::bit_cast<Eigen::bfloat16>(static_cast<Eigen::numext::uint16_t>(__shfl_xor(ivar, laneMask, width)));
+}
+
+#endif // HIP
+
+#endif // __shfl*
+
+#if defined(EIGEN_HIPCC)
+EIGEN_STRONG_INLINE __device__ Eigen::bfloat16 __ldg(const Eigen::bfloat16* ptr) {
+  return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(__ldg(Eigen::numext::bit_cast<const Eigen::numext::uint16_t*>(ptr)));
+}
+#endif // __ldg
 
 #endif // EIGEN_BFLOAT16_H
diff --git a/libs/eigen/Eigen/src/Core/arch/Default/ConjHelper.h b/libs/eigen/Eigen/src/Core/arch/Default/ConjHelper.h
index 53830b5..6b5afe3 100644
--- a/libs/eigen/Eigen/src/Core/arch/Default/ConjHelper.h
+++ b/libs/eigen/Eigen/src/Core/arch/Default/ConjHelper.h
@@ -38,6 +38,8 @@
     }                                                                   \
   };
 
+#include "../../InternalHeaderCheck.h"
+
 namespace Eigen {
 namespace internal {
 
diff --git a/libs/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h b/libs/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
index c9fbaf6..3060214 100644
--- a/libs/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
+++ b/libs/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
@@ -16,6 +16,8 @@
 #ifndef EIGEN_ARCH_GENERIC_PACKET_MATH_FUNCTIONS_H
 #define EIGEN_ARCH_GENERIC_PACKET_MATH_FUNCTIONS_H
 
+#include "../../InternalHeaderCheck.h"
+
 namespace Eigen {
 namespace internal {
 
@@ -26,11 +28,11 @@ template<> struct make_integer<double>   { typedef numext::int64_t type; };
 template<> struct make_integer<half>     { typedef numext::int16_t type; };
 template<> struct make_integer<bfloat16> { typedef numext::int16_t type; };
 
-template<typename Packet> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC  
+template<typename Packet> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
 Packet pfrexp_generic_get_biased_exponent(const Packet& a) {
   typedef typename unpacket_traits<Packet>::type Scalar;
   typedef typename unpacket_traits<Packet>::integer_packet PacketI;
-  enum { mantissa_bits = numext::numeric_limits<Scalar>::digits - 1};
+  static constexpr int mantissa_bits = numext::numeric_limits<Scalar>::digits - 1;
   return pcast<PacketI, Packet>(plogical_shift_right<mantissa_bits>(preinterpret<PacketI>(pabs(a))));
 }
 
@@ -40,42 +42,41 @@ template<typename Packet> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
 Packet pfrexp_generic(const Packet& a, Packet& exponent) {
   typedef typename unpacket_traits<Packet>::type Scalar;
   typedef typename make_unsigned<typename make_integer<Scalar>::type>::type ScalarUI;
-  enum {
+  static constexpr int
     TotalBits = sizeof(Scalar) * CHAR_BIT,
     MantissaBits = numext::numeric_limits<Scalar>::digits - 1,
-    ExponentBits = int(TotalBits) - int(MantissaBits) - 1
-  };
+    ExponentBits = TotalBits - MantissaBits - 1;
 
-  EIGEN_CONSTEXPR ScalarUI scalar_sign_mantissa_mask = 
-      ~(((ScalarUI(1) << int(ExponentBits)) - ScalarUI(1)) << int(MantissaBits)); // ~0x7f800000
-  const Packet sign_mantissa_mask = pset1frombits<Packet>(static_cast<ScalarUI>(scalar_sign_mantissa_mask)); 
+  EIGEN_CONSTEXPR ScalarUI scalar_sign_mantissa_mask =
+      ~(((ScalarUI(1) << ExponentBits) - ScalarUI(1)) << MantissaBits); // ~0x7f800000
+  const Packet sign_mantissa_mask = pset1frombits<Packet>(static_cast<ScalarUI>(scalar_sign_mantissa_mask));
   const Packet half = pset1<Packet>(Scalar(0.5));
   const Packet zero = pzero(a);
   const Packet normal_min = pset1<Packet>((numext::numeric_limits<Scalar>::min)()); // Minimum normal value, 2^-126
-  
+
   // To handle denormals, normalize by multiplying by 2^(int(MantissaBits)+1).
   const Packet is_denormal = pcmp_lt(pabs(a), normal_min);
-  EIGEN_CONSTEXPR ScalarUI scalar_normalization_offset = ScalarUI(int(MantissaBits) + 1); // 24
+  EIGEN_CONSTEXPR ScalarUI scalar_normalization_offset = ScalarUI(MantissaBits + 1); // 24
   // The following cannot be constexpr because bfloat16(uint16_t) is not constexpr.
   const Scalar scalar_normalization_factor = Scalar(ScalarUI(1) << int(scalar_normalization_offset)); // 2^24
-  const Packet normalization_factor = pset1<Packet>(scalar_normalization_factor);  
+  const Packet normalization_factor = pset1<Packet>(scalar_normalization_factor);
   const Packet normalized_a = pselect(is_denormal, pmul(a, normalization_factor), a);
-  
+
   // Determine exponent offset: -126 if normal, -126-24 if denormal
-  const Scalar scalar_exponent_offset = -Scalar((ScalarUI(1)<<(int(ExponentBits)-1)) - ScalarUI(2)); // -126
+  const Scalar scalar_exponent_offset = -Scalar((ScalarUI(1)<<(ExponentBits-1)) - ScalarUI(2)); // -126
   Packet exponent_offset = pset1<Packet>(scalar_exponent_offset);
   const Packet normalization_offset = pset1<Packet>(-Scalar(scalar_normalization_offset)); // -24
   exponent_offset = pselect(is_denormal, padd(exponent_offset, normalization_offset), exponent_offset);
-  
+
   // Determine exponent and mantissa from normalized_a.
   exponent = pfrexp_generic_get_biased_exponent(normalized_a);
   // Zero, Inf and NaN return 'a' unmodified, exponent is zero
   // (technically the exponent is unspecified for inf/NaN, but GCC/Clang set it to zero)
-  const Scalar scalar_non_finite_exponent = Scalar((ScalarUI(1) << int(ExponentBits)) - ScalarUI(1));  // 255
+  const Scalar scalar_non_finite_exponent = Scalar((ScalarUI(1) << ExponentBits) - ScalarUI(1));  // 255
   const Packet non_finite_exponent = pset1<Packet>(scalar_non_finite_exponent);
   const Packet is_zero_or_not_finite = por(pcmp_eq(a, zero), pcmp_eq(exponent, non_finite_exponent));
   const Packet m = pselect(is_zero_or_not_finite, a, por(pand(normalized_a, sign_mantissa_mask), half));
-  exponent = pselect(is_zero_or_not_finite, zero, padd(exponent, exponent_offset));  
+  exponent = pselect(is_zero_or_not_finite, zero, padd(exponent, exponent_offset));
   return m;
 }
 
@@ -108,25 +109,24 @@ Packet pldexp_generic(const Packet& a, const Packet& exponent) {
   typedef typename unpacket_traits<Packet>::integer_packet PacketI;
   typedef typename unpacket_traits<Packet>::type Scalar;
   typedef typename unpacket_traits<PacketI>::type ScalarI;
-  enum {
+  static constexpr int
     TotalBits = sizeof(Scalar) * CHAR_BIT,
     MantissaBits = numext::numeric_limits<Scalar>::digits - 1,
-    ExponentBits = int(TotalBits) - int(MantissaBits) - 1
-  };
+    ExponentBits = TotalBits - MantissaBits - 1;
 
-  const Packet max_exponent = pset1<Packet>(Scalar((ScalarI(1)<<int(ExponentBits)) + ScalarI(int(MantissaBits) - 1)));  // 278
-  const PacketI bias = pset1<PacketI>((ScalarI(1)<<(int(ExponentBits)-1)) - ScalarI(1));  // 127
+  const Packet max_exponent = pset1<Packet>(Scalar((ScalarI(1)<<ExponentBits) + ScalarI(MantissaBits - 1)));  // 278
+  const PacketI bias = pset1<PacketI>((ScalarI(1)<<(ExponentBits-1)) - ScalarI(1));  // 127
   const PacketI e = pcast<Packet, PacketI>(pmin(pmax(exponent, pnegate(max_exponent)), max_exponent));
   PacketI b = parithmetic_shift_right<2>(e); // floor(e/4);
-  Packet c = preinterpret<Packet>(plogical_shift_left<int(MantissaBits)>(padd(b, bias)));  // 2^b
+  Packet c = preinterpret<Packet>(plogical_shift_left<MantissaBits>(padd(b, bias)));  // 2^b
   Packet out = pmul(pmul(pmul(a, c), c), c);  // a * 2^(3b)
   b = psub(psub(psub(e, b), b), b); // e - 3b
-  c = preinterpret<Packet>(plogical_shift_left<int(MantissaBits)>(padd(b, bias)));  // 2^(e-3*b)
+  c = preinterpret<Packet>(plogical_shift_left<MantissaBits>(padd(b, bias)));  // 2^(e-3*b)
   out = pmul(out, c);
   return out;
 }
 
-// Explicitly multiplies 
+// Explicitly multiplies
 //    a * (2^e)
 // clamping e to the range
 // [NumTraits<Scalar>::min_exponent()-2, NumTraits<Scalar>::max_exponent()]
@@ -140,20 +140,19 @@ struct pldexp_fast_impl {
   typedef typename unpacket_traits<Packet>::integer_packet PacketI;
   typedef typename unpacket_traits<Packet>::type Scalar;
   typedef typename unpacket_traits<PacketI>::type ScalarI;
-  enum {
+  static constexpr int
     TotalBits = sizeof(Scalar) * CHAR_BIT,
     MantissaBits = numext::numeric_limits<Scalar>::digits - 1,
-    ExponentBits = int(TotalBits) - int(MantissaBits) - 1
-  };
-  
+    ExponentBits = TotalBits - MantissaBits - 1;
+
   static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
   Packet run(const Packet& a, const Packet& exponent) {
-    const Packet bias = pset1<Packet>(Scalar((ScalarI(1)<<(int(ExponentBits)-1)) - ScalarI(1)));  // 127
-    const Packet limit = pset1<Packet>(Scalar((ScalarI(1)<<int(ExponentBits)) - ScalarI(1)));     // 255
+    const Packet bias = pset1<Packet>(Scalar((ScalarI(1)<<(ExponentBits-1)) - ScalarI(1)));  // 127
+    const Packet limit = pset1<Packet>(Scalar((ScalarI(1)<<ExponentBits) - ScalarI(1)));     // 255
     // restrict biased exponent between 0 and 255 for float.
     const PacketI e = pcast<Packet, PacketI>(pmin(pmax(padd(exponent, bias), pzero(limit)), limit)); // exponent + 127
     // return a * (2^e)
-    return pmul(a, preinterpret<Packet>(plogical_shift_left<int(MantissaBits)>(e)));
+    return pmul(a, preinterpret<Packet>(plogical_shift_left<MantissaBits>(e)));
   }
 };
 
@@ -165,36 +164,16 @@ struct pldexp_fast_impl {
 //               polynomial interpolants -> ... -> profit!
 template <typename Packet, bool base2>
 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
-EIGEN_UNUSED
 Packet plog_impl_float(const Packet _x)
 {
-  Packet x = _x;
-
   const Packet cst_1              = pset1<Packet>(1.0f);
-  const Packet cst_neg_half       = pset1<Packet>(-0.5f);
-  // The smallest non denormalized float number.
-  const Packet cst_min_norm_pos   = pset1frombits<Packet>( 0x00800000u);
-  const Packet cst_minus_inf      = pset1frombits<Packet>( 0xff800000u);
-  const Packet cst_pos_inf        = pset1frombits<Packet>( 0x7f800000u);
+  const Packet cst_minus_inf      = pset1frombits<Packet>(static_cast<Eigen::numext::uint32_t>(0xff800000u));
+  const Packet cst_pos_inf        = pset1frombits<Packet>(static_cast<Eigen::numext::uint32_t>(0x7f800000u));
 
-  // Polynomial coefficients.
   const Packet cst_cephes_SQRTHF = pset1<Packet>(0.707106781186547524f);
-  const Packet cst_cephes_log_p0 = pset1<Packet>(7.0376836292E-2f);
-  const Packet cst_cephes_log_p1 = pset1<Packet>(-1.1514610310E-1f);
-  const Packet cst_cephes_log_p2 = pset1<Packet>(1.1676998740E-1f);
-  const Packet cst_cephes_log_p3 = pset1<Packet>(-1.2420140846E-1f);
-  const Packet cst_cephes_log_p4 = pset1<Packet>(+1.4249322787E-1f);
-  const Packet cst_cephes_log_p5 = pset1<Packet>(-1.6668057665E-1f);
-  const Packet cst_cephes_log_p6 = pset1<Packet>(+2.0000714765E-1f);
-  const Packet cst_cephes_log_p7 = pset1<Packet>(-2.4999993993E-1f);
-  const Packet cst_cephes_log_p8 = pset1<Packet>(+3.3333331174E-1f);
-
-  // Truncate input values to the minimum positive normal.
-  x = pmax(x, cst_min_norm_pos);
-
-  Packet e;
+  Packet e, x;
   // extract significant in the range [0.5,1) and exponent
-  x = pfrexp(x,e);
+  x = pfrexp(_x,e);
 
   // part2: Shift the inputs from the range [0.5,1) to [sqrt(1/2),sqrt(2))
   // and shift by -1. The values are then centered around 0, which improves
@@ -209,24 +188,22 @@ Packet plog_impl_float(const Packet _x)
   e = psub(e, pand(cst_1, mask));
   x = padd(x, tmp);
 
-  Packet x2 = pmul(x, x);
-  Packet x3 = pmul(x2, x);
+  // Polynomial coefficients for rational (3,3) r(x) = p(x)/q(x)
+  // approximating log(1+x) on [sqrt(0.5)-1;sqrt(2)-1].
+  const Packet cst_p1 = pset1<Packet>(1.0000000190281136f);
+  const Packet cst_p2 = pset1<Packet>(1.0000000190281063f);
+  const Packet cst_p3 = pset1<Packet>(0.18256296349849254f);
+  const Packet cst_q1 = pset1<Packet>(1.4999999999999927f);
+  const Packet cst_q2 = pset1<Packet>(0.59923249590823520f);
+  const Packet cst_q3 = pset1<Packet>(0.049616247954120038f);
 
-  // Evaluate the polynomial approximant of degree 8 in three parts, probably
-  // to improve instruction-level parallelism.
-  Packet y, y1, y2;
-  y  = pmadd(cst_cephes_log_p0, x, cst_cephes_log_p1);
-  y1 = pmadd(cst_cephes_log_p3, x, cst_cephes_log_p4);
-  y2 = pmadd(cst_cephes_log_p6, x, cst_cephes_log_p7);
-  y  = pmadd(y, x, cst_cephes_log_p2);
-  y1 = pmadd(y1, x, cst_cephes_log_p5);
-  y2 = pmadd(y2, x, cst_cephes_log_p8);
-  y  = pmadd(y, x3, y1);
-  y  = pmadd(y, x3, y2);
-  y  = pmul(y, x3);
-
-  y = pmadd(cst_neg_half, x2, y);
-  x = padd(x, y);
+  Packet p = pmadd(x, cst_p3, cst_p2);
+  p = pmadd(x, p, cst_p1);
+  p = pmul(x, p);
+  Packet q = pmadd(x, cst_q3, cst_q2);
+  q = pmadd(x, q, cst_q1);
+  q = pmadd(x, q, cst_1);
+  x = pdiv(p, q);
 
   // Add the logarithm of the exponent back to the result of the interpolation.
   if (base2) {
@@ -250,7 +227,6 @@ Packet plog_impl_float(const Packet _x)
 
 template <typename Packet>
 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
-EIGEN_UNUSED
 Packet plog_float(const Packet _x)
 {
   return plog_impl_float<Packet, /* base2 */ false>(_x);
@@ -258,7 +234,6 @@ Packet plog_float(const Packet _x)
 
 template <typename Packet>
 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
-EIGEN_UNUSED
 Packet plog2_float(const Packet _x)
 {
   return plog_impl_float<Packet, /* base2 */ true>(_x);
@@ -275,15 +250,12 @@ Packet plog2_float(const Packet _x)
  */
 template <typename Packet, bool base2>
 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
-EIGEN_UNUSED
 Packet plog_impl_double(const Packet _x)
 {
   Packet x = _x;
 
   const Packet cst_1              = pset1<Packet>(1.0);
   const Packet cst_neg_half       = pset1<Packet>(-0.5);
-  // The smallest non denormalized double.
-  const Packet cst_min_norm_pos   = pset1frombits<Packet>( static_cast<uint64_t>(0x0010000000000000ull));
   const Packet cst_minus_inf      = pset1frombits<Packet>( static_cast<uint64_t>(0xfff0000000000000ull));
   const Packet cst_pos_inf        = pset1frombits<Packet>( static_cast<uint64_t>(0x7ff0000000000000ull));
 
@@ -305,9 +277,6 @@ Packet plog_impl_double(const Packet _x)
   const Packet cst_cephes_log_q4 = pset1<Packet>(7.11544750618563894466E1);
   const Packet cst_cephes_log_q5 = pset1<Packet>(2.31251620126765340583E1);
 
-  // Truncate input values to the minimum positive normal.
-  x = pmax(x, cst_min_norm_pos);
-
   Packet e;
   // extract significant in the range [0.5,1) and exponent
   x = pfrexp(x,e);
@@ -371,7 +340,6 @@ Packet plog_impl_double(const Packet _x)
 
 template <typename Packet>
 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
-EIGEN_UNUSED
 Packet plog_double(const Packet _x)
 {
   return plog_impl_double<Packet, /* base2 */ false>(_x);
@@ -379,7 +347,6 @@ Packet plog_double(const Packet _x)
 
 template <typename Packet>
 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
-EIGEN_UNUSED
 Packet plog2_double(const Packet _x)
 {
   return plog_impl_double<Packet, /* base2 */ true>(_x);
@@ -433,26 +400,27 @@ Packet generic_expm1(const Packet& x)
 // Exponential function. Works by writing "x = m*log(2) + r" where
 // "m = floor(x/log(2)+1/2)" and "r" is the remainder. The result is then
 // "exp(x) = 2^m*exp(r)" where exp(r) is in the range [-1,1).
+// exp(r) is computed using a 6th order minimax polynomial approximation.
 template <typename Packet>
 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
-EIGEN_UNUSED
 Packet pexp_float(const Packet _x)
 {
-  const Packet cst_1      = pset1<Packet>(1.0f);
+  const Packet cst_zero   = pset1<Packet>(0.0f);
+  const Packet cst_one    = pset1<Packet>(1.0f);
   const Packet cst_half   = pset1<Packet>(0.5f);
-  const Packet cst_exp_hi = pset1<Packet>( 88.723f);
-  const Packet cst_exp_lo = pset1<Packet>(-88.723f);
+  const Packet cst_exp_hi = pset1<Packet>(88.723f);
+  const Packet cst_exp_lo = pset1<Packet>(-104.f);
 
   const Packet cst_cephes_LOG2EF = pset1<Packet>(1.44269504088896341f);
-  const Packet cst_cephes_exp_p0 = pset1<Packet>(1.9875691500E-4f);
-  const Packet cst_cephes_exp_p1 = pset1<Packet>(1.3981999507E-3f);
-  const Packet cst_cephes_exp_p2 = pset1<Packet>(8.3334519073E-3f);
-  const Packet cst_cephes_exp_p3 = pset1<Packet>(4.1665795894E-2f);
-  const Packet cst_cephes_exp_p4 = pset1<Packet>(1.6666665459E-1f);
-  const Packet cst_cephes_exp_p5 = pset1<Packet>(5.0000001201E-1f);
+  const Packet cst_p2 = pset1<Packet>(0.49999988079071044921875f);
+  const Packet cst_p3 = pset1<Packet>(0.16666518151760101318359375f);
+  const Packet cst_p4 = pset1<Packet>(4.166965186595916748046875e-2f);
+  const Packet cst_p5 = pset1<Packet>(8.36894474923610687255859375e-3f);
+  const Packet cst_p6 = pset1<Packet>(1.37449637986719608306884765625e-3f);
 
   // Clamp x.
-  Packet x = pmax(pmin(_x, cst_exp_hi), cst_exp_lo);
+  Packet zero_mask = pcmp_lt(_x, cst_exp_lo);
+  Packet x = pmin(_x, cst_exp_hi);
 
   // Express exp(x) as exp(m*ln(2) + r), start by extracting
   // m = floor(x/ln(2) + 0.5).
@@ -466,31 +434,27 @@ Packet pexp_float(const Packet _x)
   Packet r = pmadd(m, cst_cephes_exp_C1, x);
   r = pmadd(m, cst_cephes_exp_C2, r);
 
-  Packet r2 = pmul(r, r);
-  Packet r3 = pmul(r2, r);
-
-  // Evaluate the polynomial approximant,improved by instruction-level parallelism.
-  Packet y, y1, y2;
-  y  = pmadd(cst_cephes_exp_p0, r, cst_cephes_exp_p1);
-  y1 = pmadd(cst_cephes_exp_p3, r, cst_cephes_exp_p4);
-  y2 = padd(r, cst_1);
-  y  = pmadd(y, r, cst_cephes_exp_p2);
-  y1 = pmadd(y1, r, cst_cephes_exp_p5);
-  y  = pmadd(y, r3, y1);
-  y  = pmadd(y, r2, y2);
+  // Evaluate the 6th order polynomial approximation to exp(r)
+  // with r in the interval [-ln(2)/2;ln(2)/2].
+  const Packet r2 = pmul(r, r);
+  Packet p_even = pmadd(r2, cst_p6, cst_p4);
+  const Packet p_odd = pmadd(r2, cst_p5, cst_p3);
+  p_even = pmadd(r2, p_even, cst_p2);
+  const Packet p_low = padd(r, cst_one);
+  Packet y = pmadd(r, p_odd, p_even);
+  y = pmadd(r2, y, p_low);
 
   // Return 2^m * exp(r).
   // TODO: replace pldexp with faster implementation since y in [-1, 1).
-  return pmax(pldexp(y,m), _x);
+  return pselect(zero_mask, cst_zero, pmax(pldexp(y,m), _x));
 }
 
 template <typename Packet>
 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
-EIGEN_UNUSED
 Packet pexp_double(const Packet _x)
 {
   Packet x = _x;
-
+  const Packet cst_zero = pset1<Packet>(0.0);
   const Packet cst_1 = pset1<Packet>(1.0);
   const Packet cst_2 = pset1<Packet>(2.0);
   const Packet cst_half = pset1<Packet>(0.5);
@@ -512,7 +476,8 @@ Packet pexp_double(const Packet _x)
   Packet tmp, fx;
 
   // clamp x
-  x = pmax(pmin(x, cst_exp_hi), cst_exp_lo);
+  Packet zero_mask = pcmp_lt(_x, cst_exp_lo);
+  x = pmin(x, cst_exp_hi);
   // Express exp(x) as exp(g + n*log(2)).
   fx = pmadd(cst_cephes_LOG2EF, x, cst_half);
 
@@ -550,7 +515,7 @@ Packet pexp_double(const Packet _x)
   // Construct the result 2^n * exp(g) = e * x. The max is used to catch
   // non-finite values in the input.
   // TODO: replace pldexp with faster implementation since x in [-1, 1).
-  return pmax(pldexp(x,fx), _x);
+  return pselect(zero_mask, cst_zero, pmax(pldexp(x,fx), _x));
 }
 
 // The following code is inspired by the following stack-overflow answer:
@@ -562,7 +527,7 @@ Packet pexp_double(const Packet _x)
 //    aligned on 8-bits, and (2) replicating the storage of the bits of 2/pi.
 //  - Avoid a branch in rounding and extraction of the remaining fractional part.
 // Overall, I measured a speed up higher than x2 on x86-64.
-inline float trig_reduce_huge (float xf, int *quadrant)
+inline float trig_reduce_huge (float xf, Eigen::numext::int32_t *quadrant)
 {
   using Eigen::numext::int32_t;
   using Eigen::numext::uint32_t;
@@ -570,7 +535,7 @@ inline float trig_reduce_huge (float xf, int *quadrant)
   using Eigen::numext::uint64_t;
 
   const double pio2_62 = 3.4061215800865545e-19;    // pi/2 * 2^-62
-  const uint64_t zero_dot_five = uint64_t(1) << 61; // 0.5 in 2.62-bit fixed-point foramt
+  const uint64_t zero_dot_five = uint64_t(1) << 61; // 0.5 in 2.62-bit fixed-point format
 
   // 192 bits of 2/pi for Payne-Hanek reduction
   // Bits are introduced by packet of 8 to enable aligned reads.
@@ -618,8 +583,7 @@ inline float trig_reduce_huge (float xf, int *quadrant)
 
 template<bool ComputeSine,typename Packet>
 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
-EIGEN_UNUSED
-#if EIGEN_GNUC_AT_LEAST(4,4) && EIGEN_COMP_GNUC_STRICT
+#if EIGEN_COMP_GNUC_STRICT
 __attribute__((optimize("-fno-unsafe-math-optimizations")))
 #endif
 Packet psincos_float(const Packet& _x)
@@ -629,20 +593,20 @@ Packet psincos_float(const Packet& _x)
   const Packet  cst_2oPI            = pset1<Packet>(0.636619746685028076171875f); // 2/PI
   const Packet  cst_rounding_magic  = pset1<Packet>(12582912); // 2^23 for rounding
   const PacketI csti_1              = pset1<PacketI>(1);
-  const Packet  cst_sign_mask       = pset1frombits<Packet>(0x80000000u);
+  const Packet  cst_sign_mask       = pset1frombits<Packet>(static_cast<Eigen::numext::uint32_t>(0x80000000u));
 
   Packet x = pabs(_x);
 
   // Scale x by 2/Pi to find x's octant.
   Packet y = pmul(x, cst_2oPI);
 
-  // Rounding trick:
+  // Rounding trick to find nearest integer:
   Packet y_round = padd(y, cst_rounding_magic);
   EIGEN_OPTIMIZATION_BARRIER(y_round)
   PacketI y_int = preinterpret<PacketI>(y_round); // last 23 digits represent integer (if abs(x)<2^24)
-  y = psub(y_round, cst_rounding_magic); // nearest integer to x*4/pi
+  y = psub(y_round, cst_rounding_magic); // nearest integer to x * (2/pi)
 
-  // Reduce x by y octants to get: -Pi/4 <= x <= +Pi/4
+  // Subtract y * Pi/2 to reduce x to the interval -Pi/4 <= x <= +Pi/4
   // using "Extended precision modular arithmetic"
   #if defined(EIGEN_HAS_SINGLE_INSTRUCTION_MADD)
   // This version requires true FMA for high accuracy
@@ -685,7 +649,7 @@ Packet psincos_float(const Packet& _x)
     const int PacketSize = unpacket_traits<Packet>::size;
     EIGEN_ALIGN_TO_BOUNDARY(sizeof(Packet)) float vals[PacketSize];
     EIGEN_ALIGN_TO_BOUNDARY(sizeof(Packet)) float x_cpy[PacketSize];
-    EIGEN_ALIGN_TO_BOUNDARY(sizeof(Packet)) int y_int2[PacketSize];
+    EIGEN_ALIGN_TO_BOUNDARY(sizeof(Packet)) Eigen::numext::int32_t y_int2[PacketSize];
     pstoreu(vals, pabs(_x));
     pstoreu(x_cpy, x);
     pstoreu(y_int2, y_int);
@@ -743,7 +707,6 @@ Packet psincos_float(const Packet& _x)
 
 template<typename Packet>
 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
-EIGEN_UNUSED
 Packet psin_float(const Packet& x)
 {
   return psincos_float<true>(x);
@@ -751,16 +714,268 @@ Packet psin_float(const Packet& x)
 
 template<typename Packet>
 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
-EIGEN_UNUSED
 Packet pcos_float(const Packet& x)
 {
   return psincos_float<false>(x);
 }
 
+// Generic implementation of acos(x).
+template<typename Packet>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet pacos_float(const Packet& x_in) {
+  typedef typename unpacket_traits<Packet>::type Scalar;
+  static_assert(std::is_same<Scalar, float>::value, "Scalar type must be float");
+
+  const Packet cst_one = pset1<Packet>(Scalar(1));
+  const Packet cst_pi = pset1<Packet>(Scalar(EIGEN_PI));
+  const Packet p6 = pset1<Packet>(Scalar(2.26911413483321666717529296875e-3));
+  const Packet p5 = pset1<Packet>(Scalar(-1.1063250713050365447998046875e-2));
+  const Packet p4 = pset1<Packet>(Scalar(2.680264413356781005859375e-2));
+  const Packet p3 = pset1<Packet>(Scalar(-4.87488098442554473876953125e-2));
+  const Packet p2 = pset1<Packet>(Scalar(8.874166011810302734375e-2));
+  const Packet p1 = pset1<Packet>(Scalar(-0.2145837843418121337890625));
+  const Packet p0 = pset1<Packet>(Scalar(1.57079613208770751953125));
+
+  // For x in [0:1], we approximate acos(x)/sqrt(1-x), which is a smooth
+  // function, by a 6'th order polynomial.
+  // For x in [-1:0) we use that acos(-x) = pi - acos(x).
+  const Packet neg_mask = pcmp_lt(x_in, pzero(x_in));
+  Packet x = pabs(x_in);
+  const Packet invalid_mask = pcmp_lt(pset1<Packet>(1.0f), x);
+
+  // Evaluate the polynomial using Horner's rule:
+  //   P(x) = p0 + x * (p1 +  x * (p2 + ... (p5 + x * p6)) ... ) .
+  // We evaluate even and odd terms independently to increase
+  // instruction level parallelism.
+  Packet x2 = pmul(x_in,x_in);
+  Packet p_even = pmadd(p6, x2, p4);
+  Packet p_odd = pmadd(p5, x2, p3);
+  p_even = pmadd(p_even, x2, p2);
+  p_odd = pmadd(p_odd, x2, p1);
+  p_even = pmadd(p_even, x2, p0);
+  Packet p = pmadd(p_odd, x, p_even);
+
+  // The polynomial approximates acos(x)/sqrt(1-x), so
+  // multiply by sqrt(1-x) to get acos(x).
+  Packet denom = psqrt(psub(cst_one, x));
+  Packet result = pmul(denom, p);
+
+  // Undo mapping for negative arguments.
+  result = pselect(neg_mask, psub(cst_pi, result), result);
+  // Return NaN for arguments outside [-1:1].
+  return pselect(invalid_mask,
+                 pset1<Packet>(std::numeric_limits<float>::quiet_NaN()),
+                 result);
+}
+
+// Generic implementation of asin(x).
+template<typename Packet>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet pasin_float(const Packet& x_in) {
+  typedef typename unpacket_traits<Packet>::type Scalar;
+  static_assert(std::is_same<Scalar, float>::value, "Scalar type must be float");
+
+  // For |x| < 0.5 approximate asin(x)/x by an 8th order polynomial with
+  // even terms only.
+  const Packet p9 = pset1<Packet>(Scalar(5.08838854730129241943359375e-2f));
+  const Packet p7 = pset1<Packet>(Scalar(3.95139865577220916748046875e-2f));
+  const Packet p5 = pset1<Packet>(Scalar(7.550220191478729248046875e-2f));
+  const Packet p3 = pset1<Packet>(Scalar(0.16664917767047882080078125f));
+  const Packet p1 = pset1<Packet>(Scalar(1.00000011920928955078125f));
+
+  const Packet neg_mask = pcmp_lt(x_in, pzero(x_in));
+  Packet x = pabs(x_in);
+  const Packet invalid_mask = pcmp_lt(pset1<Packet>(1.0f), x);
+  // For arguments |x| > 0.5, we map x back to [0:0.5] using
+  // the transformation x_large = sqrt(0.5*(1-x)), and use the
+  // identity
+  //   asin(x) = pi/2 - 2 * asin( sqrt( 0.5 * (1 - x)))
+  const Packet cst_half = pset1<Packet>(Scalar(0.5f));
+  const Packet cst_two = pset1<Packet>(Scalar(2));
+  Packet x_large = psqrt(pnmadd(cst_half, x, cst_half));
+  const Packet large_mask = pcmp_lt(cst_half, x);
+  x = pselect(large_mask, x_large, x);
+
+  // Compute polynomial.
+  // x * (p1 + x^2*(p3 + x^2*(p5 + x^2*(p7 + x^2*p9))))
+  Packet x2 = pmul(x, x);
+  Packet p = pmadd(p9, x2, p7);
+  p = pmadd(p, x2, p5);
+  p = pmadd(p, x2, p3);
+  p = pmadd(p, x2, p1);
+  p = pmul(p, x);
+
+  constexpr float kPiOverTwo = static_cast<float>(EIGEN_PI/2);
+  Packet p_large = pnmadd(cst_two, p, pset1<Packet>(kPiOverTwo));
+  p = pselect(large_mask, p_large, p);
+  // Flip the sign for negative arguments.
+  p = pselect(neg_mask, pnegate(p), p);
+
+  // Return NaN for arguments outside [-1:1].
+  return pselect(invalid_mask, pset1<Packet>(std::numeric_limits<float>::quiet_NaN()), p);
+}
+
+// Computes elementwise atan(x) for x in [-1:1] with 2 ulp accuracy.
+template<typename Packet>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet patan_reduced_float(const Packet& x) {
+  const Packet q0 = pset1<Packet>(-0.3333314359188079833984375f);
+  const Packet q2 = pset1<Packet>(0.19993579387664794921875f);
+  const Packet q4 = pset1<Packet>(-0.14209578931331634521484375f);
+  const Packet q6 = pset1<Packet>(0.1066047251224517822265625f);
+  const Packet q8 = pset1<Packet>(-7.5408883392810821533203125e-2f);
+  const Packet q10 = pset1<Packet>(4.3082617223262786865234375e-2f);
+  const Packet q12 = pset1<Packet>(-1.62907354533672332763671875e-2f);
+  const Packet q14 = pset1<Packet>(2.90188402868807315826416015625e-3f);
+
+  // Approximate atan(x) by a polynomial of the form
+  //   P(x) = x + x^3 * Q(x^2),
+  // where Q(x^2) is a 7th order polynomial in x^2.
+  // We evaluate even and odd terms in x^2 in parallel
+  // to take advantage of instruction level parallelism
+  // and hardware with multiple FMA units.
+  const Packet x2 = pmul(x, x);
+  const Packet x4 = pmul(x2, x2);
+  Packet q_odd = pmadd(q14, x4, q10);
+  Packet q_even = pmadd(q12, x4, q8);
+  q_odd = pmadd(q_odd, x4, q6);
+  q_even = pmadd(q_even, x4, q4);
+  q_odd = pmadd(q_odd, x4, q2);
+  q_even = pmadd(q_even, x4, q0);
+  const Packet q = pmadd(q_odd, x2, q_even);
+  return pmadd(q, pmul(x, x2), x);
+}
+
+template<typename Packet>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet patan_float(const Packet& x_in) {
+  typedef typename unpacket_traits<Packet>::type Scalar;
+  static_assert(std::is_same<Scalar, float>::value, "Scalar type must be float");
+
+  const Packet cst_one = pset1<Packet>(1.0f);
+  constexpr float kPiOverTwo = static_cast<float>(EIGEN_PI/2);
+
+  //   "Large": For |x| > 1, use atan(1/x) = sign(x)*pi/2 - atan(x).
+  //   "Small": For |x| <= 1, approximate atan(x) directly by a polynomial
+  //            calculated using Sollya.
+  const Packet neg_mask = pcmp_lt(x_in, pzero(x_in));
+  const Packet large_mask = pcmp_lt(cst_one, pabs(x_in));
+  const Packet large_shift = pselect(neg_mask, pset1<Packet>(-kPiOverTwo), pset1<Packet>(kPiOverTwo));
+  const Packet x = pselect(large_mask, preciprocal(x_in), x_in);
+  const Packet p = patan_reduced_float(x);
+  
+  // Apply transformations according to the range reduction masks.
+  return pselect(large_mask, psub(large_shift, p), p);
+}
+
+// Computes elementwise atan(x) for x in [-tan(pi/8):tan(pi/8)]
+// with 2 ulp accuracy.
+template <typename Packet>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet
+patan_reduced_double(const Packet& x) {
+  const Packet q0 =
+      pset1<Packet>(-0.33333333333330028569463365784031338989734649658203);
+  const Packet q2 =
+      pset1<Packet>(0.199999999990664090177006073645316064357757568359375);
+  const Packet q4 =
+      pset1<Packet>(-0.142857141937123677255527809393242932856082916259766);
+  const Packet q6 =
+      pset1<Packet>(0.111111065991039953404495577160560060292482376098633);
+  const Packet q8 =
+      pset1<Packet>(-9.0907812986129224452902519715280504897236824035645e-2);
+  const Packet q10 =
+      pset1<Packet>(7.6900542950704739442180368769186316058039665222168e-2);
+  const Packet q12 =
+      pset1<Packet>(-6.6410112986494976294871150912513257935643196105957e-2);
+  const Packet q14 =
+      pset1<Packet>(5.6920144995467943094258345126945641823112964630127e-2);
+  const Packet q16 =
+      pset1<Packet>(-4.3577020814990513608577771265117917209863662719727e-2);
+  const Packet q18 =
+      pset1<Packet>(2.1244050233624342527427586446719942614436149597168e-2);
+
+  // Approximate atan(x) on [0:tan(pi/8)] by a polynomial of the form
+  //   P(x) = x + x^3 * Q(x^2),
+  // where Q(x^2) is a 9th order polynomial in x^2.
+  // We evaluate even and odd terms in x^2 in parallel
+  // to take advantage of instruction level parallelism
+  // and hardware with multiple FMA units.
+  const Packet x2 = pmul(x, x);
+  const Packet x4 = pmul(x2, x2);
+  Packet q_odd = pmadd(q18, x4, q14);
+  Packet q_even = pmadd(q16, x4, q12);
+  q_odd = pmadd(q_odd, x4, q10);
+  q_even = pmadd(q_even, x4, q8);
+  q_odd = pmadd(q_odd, x4, q6);
+  q_even = pmadd(q_even, x4, q4);
+  q_odd = pmadd(q_odd, x4, q2);
+  q_even = pmadd(q_even, x4, q0);
+  const Packet p = pmadd(q_odd, x2, q_even);
+  return pmadd(p, pmul(x, x2), x);
+}
+
+template<typename Packet>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet patan_double(const Packet& x_in) {
+  typedef typename unpacket_traits<Packet>::type Scalar;
+  static_assert(std::is_same<Scalar, double>::value, "Scalar type must be double");
+
+  const Packet cst_one = pset1<Packet>(1.0);
+  constexpr double kPiOverTwo = static_cast<double>(EIGEN_PI / 2);
+  const Packet cst_pi_over_two = pset1<Packet>(kPiOverTwo);
+  constexpr double kPiOverFour = static_cast<double>(EIGEN_PI / 4);
+  const Packet cst_pi_over_four = pset1<Packet>(kPiOverFour);
+  const Packet cst_large = pset1<Packet>(2.4142135623730950488016887);  // tan(3*pi/8);
+  const Packet cst_medium = pset1<Packet>(0.4142135623730950488016887);  // tan(pi/8);
+
+  const Packet neg_mask = pcmp_lt(x_in, pzero(x_in));
+  Packet x = pabs(x_in);
+
+  // Use the same range reduction strategy (to [0:tan(pi/8)]) as the
+  // Cephes library:
+  //   "Large": For x >= tan(3*pi/8), use atan(1/x) = pi/2 - atan(x).
+  //   "Medium": For x in [tan(pi/8) : tan(3*pi/8)),
+  //             use atan(x) = pi/4 + atan((x-1)/(x+1)).
+  //   "Small": For x < tan(pi/8), approximate atan(x) directly by a polynomial
+  //            calculated using Sollya.
+  const Packet large_mask = pcmp_lt(cst_large, x);
+  x = pselect(large_mask, preciprocal(x), x);
+  const Packet medium_mask = pandnot(pcmp_lt(cst_medium, x), large_mask);
+  x = pselect(medium_mask, pdiv(psub(x, cst_one), padd(x, cst_one)), x);
+
+  // Compute approximation of p ~= atan(x') where x' is the argument reduced to
+  // [0:tan(pi/8)].
+  Packet p = patan_reduced_double(x);
+
+  // Apply transformations according to the range reduction masks.
+  p = pselect(large_mask, psub(cst_pi_over_two, p), p);
+  p = pselect(medium_mask, padd(cst_pi_over_four, p), p);
+  return pselect(neg_mask, pnegate(p), p);
+}
+
+template<typename Packet>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet pdiv_complex(const Packet& x, const Packet& y) {
+  typedef typename unpacket_traits<Packet>::as_real RealPacket;
+  // In the following we annotate the code for the case where the inputs
+  // are a pair length-2 SIMD vectors representing a single pair of complex
+  // numbers x = a + i*b, y = c + i*d.
+  const RealPacket y_abs = pabs(y.v);  // |c|, |d|
+  const RealPacket y_abs_flip = pcplxflip(Packet(y_abs)).v; // |d|, |c|
+  const RealPacket y_max = pmax(y_abs, y_abs_flip); // max(|c|, |d|), max(|c|, |d|)
+  const RealPacket y_scaled = pdiv(y.v, y_max);  // c / max(|c|, |d|), d / max(|c|, |d|)
+  // Compute scaled denominator.
+  const RealPacket y_scaled_sq = pmul(y_scaled, y_scaled); // c'**2, d'**2
+  const RealPacket denom = padd(y_scaled_sq, pcplxflip(Packet(y_scaled_sq)).v);
+  Packet result_scaled = pmul(x, pconj(Packet(y_scaled)));  // a * c' + b * d', -a * d + b * c
+  // Divide elementwise by denom.
+  result_scaled = Packet(pdiv(result_scaled.v, denom));
+  // Rescale result
+  return Packet(pdiv(result_scaled.v, y_max));
+}
 
 template<typename Packet>
 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
-EIGEN_UNUSED
 Packet psqrt_complex(const Packet& a) {
   typedef typename unpacket_traits<Packet>::type Scalar;
   typedef typename Scalar::value_type RealScalar;
@@ -832,8 +1047,8 @@ Packet psqrt_complex(const Packet& a) {
 
   // Step 4. Compute solution for inputs with negative real part:
   //         [|eta0|, sign(y0)*rho0, |eta1|, sign(y1)*rho1]
-  const RealScalar neg_zero = RealScalar(numext::bit_cast<float>(0x80000000u));
-  const RealPacket cst_imag_sign_mask = pset1<Packet>(Scalar(RealScalar(0.0), neg_zero)).v;
+  const RealPacket cst_imag_sign_mask =
+      pset1<Packet>(Scalar(RealScalar(0.0), RealScalar(-0.0))).v;
   RealPacket imag_signs = pand(a.v, cst_imag_sign_mask);
   Packet negative_real_result;
   // Notice that rho is positive, so taking it's absolute value is a noop.
@@ -871,6 +1086,98 @@ Packet psqrt_complex(const Packet& a) {
                   pselect(is_real_inf, real_inf_result,result));
 }
 
+
+template <typename Packet>
+struct psign_impl<
+    Packet,
+    std::enable_if_t<
+        !NumTraits<typename unpacket_traits<Packet>::type>::IsComplex &&
+        !NumTraits<typename unpacket_traits<Packet>::type>::IsInteger>> {
+  static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a) {
+    using Scalar = typename unpacket_traits<Packet>::type;
+    const Packet cst_one = pset1<Packet>(Scalar(1));
+    const Packet cst_minus_one = pset1<Packet>(Scalar(-1));
+    const Packet cst_zero = pzero(a);
+
+    const Packet not_nan_mask = pcmp_eq(a, a);
+    const Packet positive_mask = pcmp_lt(cst_zero, a);
+    const Packet positive = pand(positive_mask, cst_one);
+    const Packet negative_mask = pcmp_lt(a, cst_zero);
+    const Packet negative = pand(negative_mask, cst_minus_one);
+
+    return pselect(not_nan_mask, por(positive, negative), a);
+  }
+};
+
+template <typename Packet>
+struct psign_impl<
+    Packet, std::enable_if_t<
+                !NumTraits<typename unpacket_traits<Packet>::type>::IsComplex &&
+                NumTraits<typename unpacket_traits<Packet>::type>::IsSigned &&
+                NumTraits<typename unpacket_traits<Packet>::type>::IsInteger>> {
+  static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a) {
+    using Scalar = typename unpacket_traits<Packet>::type;
+    const Packet cst_one = pset1<Packet>(Scalar(1));
+    const Packet cst_minus_one = pset1<Packet>(Scalar(-1));
+    const Packet cst_zero = pzero(a);
+
+    const Packet positive_mask = pcmp_lt(cst_zero, a);
+    const Packet positive = pand(positive_mask, cst_one);
+    const Packet negative_mask = pcmp_lt(a, cst_zero);
+    const Packet negative = pand(negative_mask, cst_minus_one);
+
+    return por(positive, negative);
+  }
+};
+
+template <typename Packet>
+struct psign_impl<Packet, std::enable_if_t<!NumTraits<typename unpacket_traits<Packet>::type>::IsComplex &&
+                                           !NumTraits<typename unpacket_traits<Packet>::type>::IsSigned &&
+                                           NumTraits<typename unpacket_traits<Packet>::type>::IsInteger>> {
+  static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a) {
+    using Scalar = typename unpacket_traits<Packet>::type;
+    const Packet cst_one = pset1<Packet>(Scalar(1));
+    const Packet cst_zero = pzero(a);
+
+    const Packet zero_mask = pcmp_eq(cst_zero, a);
+    return pandnot(cst_one, zero_mask);
+  }
+};
+
+// \internal \returns the the sign of a complex number z, defined as z / abs(z).
+template <typename Packet>
+struct psign_impl<Packet, std::enable_if_t<NumTraits<typename unpacket_traits<Packet>::type>::IsComplex &&
+                                           unpacket_traits<Packet>::vectorizable>> {
+  static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a) {
+    typedef typename unpacket_traits<Packet>::type Scalar;
+    typedef typename Scalar::value_type RealScalar;
+    typedef typename unpacket_traits<Packet>::as_real RealPacket;
+
+    // Step 1. Compute (for each element z = x + i*y in a)
+    //     l = abs(z) = sqrt(x^2 + y^2).
+    // To avoid over- and underflow, we use the stable formula for each hypotenuse
+    //    l = (zmin == 0 ? zmax : zmax * sqrt(1 + (zmin/zmax)**2)),
+    // where zmax = max(|x|, |y|), zmin = min(|x|, |y|),
+    RealPacket a_abs = pabs(a.v);
+    RealPacket a_abs_flip = pcplxflip(Packet(a_abs)).v;
+    RealPacket a_max = pmax(a_abs, a_abs_flip);
+    RealPacket a_min = pmin(a_abs, a_abs_flip);
+    RealPacket a_min_zero_mask = pcmp_eq(a_min, pzero(a_min));
+    RealPacket a_max_zero_mask = pcmp_eq(a_max, pzero(a_max));
+    RealPacket r = pdiv(a_min, a_max);
+    const RealPacket cst_one = pset1<RealPacket>(RealScalar(1));
+    RealPacket l = pmul(a_max, psqrt(padd(cst_one, pmul(r, r))));  // [l0, l0, l1, l1]
+    // Set l to a_max if a_min is zero, since the roundtrip sqrt(a_max^2) may be
+    // lossy.
+    l = pselect(a_min_zero_mask, a_max, l);
+    // Step 2 compute a / abs(a).
+    RealPacket sign_as_real = pandnot(pdiv(a.v, l), a_max_zero_mask);
+    Packet sign;
+    sign.v = sign_as_real;
+    return sign;
+  }
+};
+
 // TODO(rmlarsen): The following set of utilities for double word arithmetic
 // should perhaps be refactored as a separate file, since it would be generally
 // useful for special function implementation etc. Writing the algorithms in
@@ -1040,32 +1347,23 @@ void twoprod(const Packet& x_hi, const Packet& x_lo,
   fast_twosum(p_hi_hi, p_hi_lo, p_lo_hi, p_lo_lo, p_hi, p_lo);
 }
 
-// This function computes the reciprocal of a floating point number
-// with extra precision and returns the result as a double word.
+// This function implements the division of double word {x_hi, x_lo}
+// by float y. This is Algorithm 15 from "Tight and rigourous error bounds
+// for basic building blocks of double-word arithmetic", Joldes, Muller, & Popescu,
+// 2017. https://hal.archives-ouvertes.fr/hal-01351529
 template <typename Packet>
-void doubleword_reciprocal(const Packet& x, Packet& recip_hi, Packet& recip_lo) {
-  typedef typename unpacket_traits<Packet>::type Scalar;
-  // 1. Approximate the reciprocal as the reciprocal of the high order element.
-  Packet approx_recip = prsqrt(x);
-  approx_recip = pmul(approx_recip, approx_recip);
-
-  // 2. Run one step of Newton-Raphson iteration in double word arithmetic
-  // to get the bottom half. The NR iteration for reciprocal of 'a' is
-  //    x_{i+1} = x_i * (2 - a * x_i)
-
-  // -a*x_i
-  Packet t1_hi, t1_lo;
-  twoprod(pnegate(x), approx_recip, t1_hi, t1_lo);
-  // 2 - a*x_i
-  Packet t2_hi, t2_lo;
-  fast_twosum(pset1<Packet>(Scalar(2)), t1_hi, t2_hi, t2_lo);
-  Packet t3_hi, t3_lo;
-  fast_twosum(t2_hi, padd(t2_lo, t1_lo), t3_hi, t3_lo);
-  // x_i * (2 - a * x_i)
-  twoprod(t3_hi, t3_lo, approx_recip, recip_hi, recip_lo);
+void doubleword_div_fp(const Packet& x_hi, const Packet& x_lo, const Packet& y,
+                           Packet& z_hi, Packet& z_lo) {
+  const Packet t_hi = pdiv(x_hi, y);
+  Packet pi_hi, pi_lo;
+  twoprod(t_hi, y, pi_hi, pi_lo);
+  const Packet delta_hi = psub(x_hi, pi_hi);
+  const Packet delta_t = psub(delta_hi, pi_lo);
+  const Packet delta = padd(delta_t, x_lo);
+  const Packet t_lo = pdiv(delta, y);
+  fast_twosum(t_hi, t_lo, z_hi, z_lo);
 }
 
-
 // This function computes log2(x) and returns the result as a double word.
 template <typename Scalar>
 struct accurate_log2 {
@@ -1204,16 +1502,13 @@ struct accurate_log2<double> {
     const Packet cst_2_log2e_hi = pset1<Packet>(2.88539008177792677);
     const Packet cst_2_log2e_lo = pset1<Packet>(4.07660016854549667e-17);
     // c * (x - 1)
-    Packet num_hi, num_lo;
-    twoprod(cst_2_log2e_hi, cst_2_log2e_lo, psub(x, one), num_hi, num_lo);
-    // TODO(rmlarsen): Investigate if using the division algorithm by
-    // Muller et al. is faster/more accurate.
-    // 1 / (x + 1)
-    Packet denom_hi, denom_lo;
-    doubleword_reciprocal(padd(x, one), denom_hi, denom_lo);
-    // r =  c * (x-1) / (x+1),
+    Packet t_hi, t_lo;
+    // t = c * (x-1)
+    twoprod(cst_2_log2e_hi, cst_2_log2e_lo, psub(x, one), t_hi, t_lo);
+    // r = c * (x-1) / (x+1),
     Packet r_hi, r_lo;
-    twoprod(num_hi, num_lo, denom_hi, denom_lo, r_hi, r_lo);
+    doubleword_div_fp(t_hi, t_lo, padd(x, one), r_hi, r_lo);
+
     // r2 = r * r
     Packet r2_hi, r2_lo;
     twoprod(r_hi, r_lo, r_hi, r_lo, r2_hi, r2_lo);
@@ -1443,39 +1738,40 @@ EIGEN_STRONG_INLINE Packet generic_pow_impl(const Packet& x, const Packet& y) {
 }
 
 // Generic implementation of pow(x,y).
-template<typename Packet>
-EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
-EIGEN_UNUSED
-Packet generic_pow(const Packet& x, const Packet& y) {
+template <typename Packet>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet generic_pow(const Packet& x, const Packet& y) {
   typedef typename unpacket_traits<Packet>::type Scalar;
 
   const Packet cst_pos_inf = pset1<Packet>(NumTraits<Scalar>::infinity());
+  const Packet cst_neg_inf = pset1<Packet>(-NumTraits<Scalar>::infinity());
   const Packet cst_zero = pset1<Packet>(Scalar(0));
   const Packet cst_one = pset1<Packet>(Scalar(1));
   const Packet cst_nan = pset1<Packet>(NumTraits<Scalar>::quiet_NaN());
 
   const Packet abs_x = pabs(x);
   // Predicates for sign and magnitude of x.
-  const Packet x_is_zero = pcmp_eq(x, cst_zero);
-  const Packet x_is_neg = pcmp_lt(x, cst_zero);
+  const Packet abs_x_is_zero = pcmp_eq(abs_x, cst_zero);
+  const Packet x_has_signbit = pcmp_eq(por(pand(x, cst_neg_inf), cst_pos_inf), cst_neg_inf);
+  const Packet x_is_neg = pandnot(x_has_signbit, abs_x_is_zero);
+  const Packet x_is_neg_zero = pand(x_has_signbit, abs_x_is_zero);
   const Packet abs_x_is_inf = pcmp_eq(abs_x, cst_pos_inf);
-  const Packet abs_x_is_one =  pcmp_eq(abs_x, cst_one);
+  const Packet abs_x_is_one = pcmp_eq(abs_x, cst_one);
   const Packet abs_x_is_gt_one = pcmp_lt(cst_one, abs_x);
   const Packet abs_x_is_lt_one = pcmp_lt(abs_x, cst_one);
-  const Packet x_is_one =  pandnot(abs_x_is_one, x_is_neg);
-  const Packet x_is_neg_one =  pand(abs_x_is_one, x_is_neg);
+  const Packet x_is_one = pandnot(abs_x_is_one, x_is_neg);
+  const Packet x_is_neg_one = pand(abs_x_is_one, x_is_neg);
   const Packet x_is_nan = pandnot(ptrue(x), pcmp_eq(x, x));
 
   // Predicates for sign and magnitude of y.
+  const Packet abs_y = pabs(y);
   const Packet y_is_one = pcmp_eq(y, cst_one);
-  const Packet y_is_zero = pcmp_eq(y, cst_zero);
+  const Packet abs_y_is_zero = pcmp_eq(abs_y, cst_zero);
   const Packet y_is_neg = pcmp_lt(y, cst_zero);
-  const Packet y_is_pos = pandnot(ptrue(y), por(y_is_zero, y_is_neg));
+  const Packet y_is_pos = pandnot(ptrue(y), por(abs_y_is_zero, y_is_neg));
   const Packet y_is_nan = pandnot(ptrue(y), pcmp_eq(y, y));
-  const Packet abs_y_is_inf = pcmp_eq(pabs(y), cst_pos_inf);
+  const Packet abs_y_is_inf = pcmp_eq(abs_y, cst_pos_inf);
   EIGEN_CONSTEXPR Scalar huge_exponent =
-      (NumTraits<Scalar>::max_exponent() * Scalar(EIGEN_LN2)) /
-       NumTraits<Scalar>::epsilon();
+      (NumTraits<Scalar>::max_exponent() * Scalar(EIGEN_LN2)) / NumTraits<Scalar>::epsilon();
   const Packet abs_y_is_huge = pcmp_le(pset1<Packet>(huge_exponent), pabs(y));
 
   // Predicates for whether y is integer and/or even.
@@ -1484,39 +1780,31 @@ Packet generic_pow(const Packet& x, const Packet& y) {
   const Packet y_is_even = pcmp_eq(pround(y_div_2), y_div_2);
 
   // Predicates encoding special cases for the value of pow(x,y)
-  const Packet invalid_negative_x = pandnot(pandnot(pandnot(x_is_neg, abs_x_is_inf),
-                                                    y_is_int),
-                                            abs_y_is_inf);
-  const Packet pow_is_one = por(por(x_is_one, y_is_zero),
-                                pand(x_is_neg_one,
-                                     por(abs_y_is_inf, pandnot(y_is_even, invalid_negative_x))));
+  const Packet invalid_negative_x = pandnot(pandnot(pandnot(x_is_neg, abs_x_is_inf), y_is_int), abs_y_is_inf);
   const Packet pow_is_nan = por(invalid_negative_x, por(x_is_nan, y_is_nan));
-  const Packet pow_is_zero = por(por(por(pand(x_is_zero, y_is_pos),
-                                         pand(abs_x_is_inf, y_is_neg)),
-                                     pand(pand(abs_x_is_lt_one, abs_y_is_huge),
-                                          y_is_pos)),
-                                 pand(pand(abs_x_is_gt_one, abs_y_is_huge),
-                                      y_is_neg));
-  const Packet pow_is_inf = por(por(por(pand(x_is_zero, y_is_neg),
-                                        pand(abs_x_is_inf, y_is_pos)),
-                                    pand(pand(abs_x_is_lt_one, abs_y_is_huge),
-                                         y_is_neg)),
-                                pand(pand(abs_x_is_gt_one, abs_y_is_huge),
-                                     y_is_pos));
+  const Packet pow_is_one =
+      por(por(x_is_one, abs_y_is_zero), pand(x_is_neg_one, por(abs_y_is_inf, pandnot(y_is_even, invalid_negative_x))));
+  const Packet pow_is_zero = por(por(por(pand(abs_x_is_zero, y_is_pos), pand(abs_x_is_inf, y_is_neg)),
+                                     pand(pand(abs_x_is_lt_one, abs_y_is_huge), y_is_pos)),
+                                 pand(pand(abs_x_is_gt_one, abs_y_is_huge), y_is_neg));
+  const Packet pow_is_inf = por(por(por(pand(abs_x_is_zero, y_is_neg), pand(abs_x_is_inf, y_is_pos)),
+                                    pand(pand(abs_x_is_lt_one, abs_y_is_huge), y_is_neg)),
+                                pand(pand(abs_x_is_gt_one, abs_y_is_huge), y_is_pos));
+  const Packet inf_val =
+      pselect(pandnot(pand(por(pand(abs_x_is_inf, x_is_neg), pand(x_is_neg_zero, y_is_neg)), y_is_int), y_is_even),
+              cst_neg_inf, cst_pos_inf);
 
   // General computation of pow(x,y) for positive x or negative x and integer y.
   const Packet negate_pow_abs = pandnot(x_is_neg, y_is_even);
   const Packet pow_abs = generic_pow_impl(abs_x, y);
-  return pselect(y_is_one, x,
-                 pselect(pow_is_one, cst_one,
-                         pselect(pow_is_nan, cst_nan,
-                                 pselect(pow_is_inf, cst_pos_inf,
-                                         pselect(pow_is_zero, cst_zero,
-                                                 pselect(negate_pow_abs, pnegate(pow_abs), pow_abs))))));
+  return pselect(
+      y_is_one, x,
+      pselect(pow_is_one, cst_one,
+              pselect(pow_is_nan, cst_nan,
+                      pselect(pow_is_inf, inf_val,
+                              pselect(pow_is_zero, cst_zero, pselect(negate_pow_abs, pnegate(pow_abs), pow_abs))))));
 }
 
-
-
 /* polevl (modified for Eigen)
  *
  *      Evaluate polynomial
@@ -1643,6 +1931,267 @@ struct pchebevl {
   }
 };
 
+namespace unary_pow {
+template <typename ScalarExponent, bool IsIntegerAtCompileTime = NumTraits<ScalarExponent>::IsInteger>
+struct is_odd {
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ScalarExponent run(const ScalarExponent& x) {
+    ScalarExponent xdiv2 = x / ScalarExponent(2);
+    ScalarExponent floorxdiv2 = numext::floor(xdiv2);
+    return xdiv2 != floorxdiv2;
+  }
+};
+template <typename ScalarExponent>
+struct is_odd<ScalarExponent, true> {
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ScalarExponent run(const ScalarExponent& x) {
+    return x % ScalarExponent(2);
+  }
+};
+
+template <typename Packet, typename ScalarExponent,
+          bool BaseIsIntegerType = NumTraits<typename unpacket_traits<Packet>::type>::IsInteger>
+struct do_div {
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(const Packet& x, const ScalarExponent& exponent) {
+    typedef typename unpacket_traits<Packet>::type Scalar;
+    const Packet cst_pos_one = pset1<Packet>(Scalar(1));
+    return exponent < 0 ? pdiv(cst_pos_one, x) : x;
+  }
+};
+
+template <typename Packet, typename ScalarExponent>
+struct do_div<Packet, ScalarExponent, true> {
+  // pdiv not defined, nor necessary for integer base types
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(const Packet& x, const ScalarExponent& exponent) {
+    EIGEN_UNUSED_VARIABLE(exponent);
+    return x;
+  }
+};
+
+template <typename Packet, typename ScalarExponent>
+static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet int_pow(const Packet& x, const ScalarExponent& exponent) {
+  typedef typename unpacket_traits<Packet>::type Scalar;
+  const Packet cst_pos_one = pset1<Packet>(Scalar(1));
+  if (exponent == 0) return cst_pos_one;
+  Packet result = x;
+  Packet y = cst_pos_one;
+  ScalarExponent m = numext::abs(exponent);
+  while (m > 1) {
+    bool odd = is_odd<ScalarExponent>::run(m);
+    if (odd) y = pmul(y, result);
+    result = pmul(result, result);
+    m = numext::floor(m / ScalarExponent(2));
+  }
+  result = pmul(y, result);
+  result = do_div<Packet, ScalarExponent>::run(result, exponent);
+  return result;
+}
+
+template <typename Packet>
+static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet gen_pow(const Packet& x,
+                                                            const typename unpacket_traits<Packet>::type& exponent) {
+  const Packet exponent_packet = pset1<Packet>(exponent);
+  return generic_pow_impl(x, exponent_packet);
+}
+
+template <typename Packet, typename ScalarExponent>
+static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet handle_nonint_int_errors(const Packet& x, const Packet& powx,
+                                                                             const ScalarExponent& exponent) {
+  typedef typename unpacket_traits<Packet>::type Scalar;
+
+  // non-integer base, integer exponent case
+
+  const bool exponent_is_odd = is_odd<ScalarExponent>::run(exponent);
+  const bool exponent_is_neg = exponent < 0;
+
+  const Packet exp_is_odd = exponent_is_odd ? ptrue(x) : pzero(x);
+  const Packet exp_is_neg = exponent_is_neg ? ptrue(x) : pzero(x);
+
+  const Scalar pos_zero = Scalar(0);
+  const Scalar neg_zero = -Scalar(0);
+  const Scalar pos_one = Scalar(1);
+  const Scalar pos_inf = NumTraits<Scalar>::infinity();
+  const Scalar neg_inf = -NumTraits<Scalar>::infinity();
+
+  const Packet cst_pos_zero = pset1<Packet>(pos_zero);
+  const Packet cst_neg_zero = pset1<Packet>(neg_zero);
+  const Packet cst_pos_one = pset1<Packet>(pos_one);
+  const Packet cst_pos_inf = pset1<Packet>(pos_inf);
+  const Packet cst_neg_inf = pset1<Packet>(neg_inf);
+
+  const Packet abs_x = pabs(x);
+  const Packet abs_x_is_zero = pcmp_eq(abs_x, cst_pos_zero);
+  const Packet abs_x_is_one = pcmp_eq(abs_x, cst_pos_one);
+  const Packet abs_x_is_inf = pcmp_eq(abs_x, cst_pos_inf);
+
+  const Packet x_has_signbit = pcmp_eq(por(pand(x, cst_neg_inf), cst_pos_inf), cst_neg_inf);
+  const Packet x_is_neg = pandnot(x_has_signbit, abs_x_is_zero);
+  const Packet x_is_neg_zero = pand(x_has_signbit, abs_x_is_zero);
+
+  if (exponent == 0) {
+    return cst_pos_one;
+  }
+
+  Packet pow_is_pos_inf = pand(pandnot(abs_x_is_zero, x_is_neg_zero), pand(exp_is_odd, exp_is_neg));
+  pow_is_pos_inf = por(pow_is_pos_inf, pand(abs_x_is_zero, pandnot(exp_is_neg, exp_is_odd)));
+  pow_is_pos_inf = por(pow_is_pos_inf, pand(pand(abs_x_is_inf, x_is_neg), pandnot(pnot(exp_is_neg), exp_is_odd)));
+  pow_is_pos_inf = por(pow_is_pos_inf, pandnot(pandnot(abs_x_is_inf, x_is_neg), exp_is_neg));
+
+  Packet pow_is_neg_inf = pand(x_is_neg_zero, pand(exp_is_neg, exp_is_odd));
+  pow_is_neg_inf = por(pow_is_neg_inf, pand(pand(abs_x_is_inf, x_is_neg), pandnot(exp_is_odd, exp_is_neg)));
+
+  Packet pow_is_pos_zero = pandnot(abs_x_is_zero, exp_is_neg);
+  pow_is_pos_zero = por(pow_is_pos_zero, pand(pand(abs_x_is_inf, x_is_neg), pandnot(exp_is_neg, exp_is_odd)));
+  pow_is_pos_zero = por(pow_is_pos_zero, pand(pandnot(abs_x_is_inf, x_is_neg), exp_is_neg));
+
+  Packet pow_is_neg_zero = pand(x_is_neg_zero, pandnot(exp_is_odd, exp_is_neg));
+  pow_is_neg_zero = por(pow_is_neg_zero, pand(pand(abs_x_is_inf, x_is_neg), pand(exp_is_odd, exp_is_neg)));
+
+  Packet result = pselect(pow_is_neg_inf, cst_neg_inf, powx);
+  result = pselect(pow_is_neg_zero, cst_neg_zero, result);
+  result = pselect(pow_is_pos_zero, cst_pos_zero, result);
+  result = pselect(pow_is_pos_inf, cst_pos_inf, result);
+  result = pselect(pandnot(abs_x_is_one, x_is_neg), cst_pos_one, result);
+  return result;
+}
+
+template <typename Packet, typename ScalarExponent>
+static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet handle_nonint_nonint_errors(const Packet& x, const Packet& powx,
+                                                                                const ScalarExponent& exponent) {
+  typedef typename unpacket_traits<Packet>::type Scalar;
+
+  // non-integer base and exponent case
+
+  const bool exponent_is_fin = (numext::isfinite)(exponent);
+  const bool exponent_is_nan = (numext::isnan)(exponent);
+  const bool exponent_is_neg = exponent < 0;
+  const bool exponent_is_inf = !exponent_is_fin && !exponent_is_nan;
+
+  const Packet exp_is_neg = exponent_is_neg ? ptrue(x) : pzero(x);
+  const Packet exp_is_inf = exponent_is_inf ? ptrue(x) : pzero(x);
+
+  const Scalar pos_zero = Scalar(0);
+  const Scalar pos_one = Scalar(1);
+  const Scalar pos_inf = NumTraits<Scalar>::infinity();
+  const Scalar neg_inf = -NumTraits<Scalar>::infinity();
+  const Scalar nan = NumTraits<Scalar>::quiet_NaN();
+
+  const Packet cst_pos_zero = pset1<Packet>(pos_zero);
+  const Packet cst_pos_one = pset1<Packet>(pos_one);
+  const Packet cst_pos_inf = pset1<Packet>(pos_inf);
+  const Packet cst_neg_inf = pset1<Packet>(neg_inf);
+  const Packet cst_nan = pset1<Packet>(nan);
+
+  const Packet abs_x = pabs(x);
+  const Packet abs_x_is_zero = pcmp_eq(abs_x, cst_pos_zero);
+  const Packet abs_x_is_lt_one = pcmp_lt(abs_x, cst_pos_one);
+  const Packet abs_x_is_gt_one = pcmp_lt(cst_pos_one, abs_x);
+  const Packet abs_x_is_one = pcmp_eq(abs_x, cst_pos_one);
+  const Packet abs_x_is_inf = pcmp_eq(abs_x, cst_pos_inf);
+
+  const Packet x_has_signbit = pcmp_eq(por(pand(x, cst_neg_inf), cst_pos_inf), cst_neg_inf);
+  const Packet x_is_neg = pandnot(x_has_signbit, abs_x_is_zero);
+
+  if (exponent_is_nan) {
+    return pselect(pandnot(abs_x_is_one, x_is_neg), cst_pos_one, cst_nan);
+  }
+
+  Packet pow_is_pos_zero = pandnot(abs_x_is_zero, exp_is_neg);
+  pow_is_pos_zero = por(pow_is_pos_zero, pand(abs_x_is_gt_one, pand(exp_is_inf, exp_is_neg)));
+  pow_is_pos_zero = por(pow_is_pos_zero, pand(abs_x_is_lt_one, pandnot(exp_is_inf, exp_is_neg)));
+  pow_is_pos_zero = por(pow_is_pos_zero, pand(abs_x_is_inf, exp_is_neg));
+
+  const Packet pow_is_pos_one = pand(abs_x_is_one, exp_is_inf);
+
+  Packet pow_is_pos_inf = pand(abs_x_is_zero, exp_is_neg);
+  pow_is_pos_inf = por(pow_is_pos_inf, pand(abs_x_is_lt_one, pand(exp_is_inf, exp_is_neg)));
+  pow_is_pos_inf = por(pow_is_pos_inf, pand(abs_x_is_gt_one, pandnot(exp_is_inf, exp_is_neg)));
+  pow_is_pos_inf = por(pow_is_pos_inf, pandnot(abs_x_is_inf, exp_is_neg));
+
+  const Packet pow_is_nan = pandnot(pandnot(x_is_neg, abs_x_is_inf), exp_is_inf);
+
+  Packet result = pselect(pow_is_pos_inf, cst_pos_inf, powx);
+  result = pselect(pow_is_pos_one, cst_pos_one, result);
+  result = pselect(pow_is_pos_zero, cst_pos_zero, result);
+  result = pselect(pow_is_nan, cst_nan, result);
+  result = pselect(pandnot(abs_x_is_one, x_is_neg), cst_pos_one, result);
+  return result;
+}
+
+template <typename Packet, typename ScalarExponent>
+static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet handle_int_int(const Packet& x, const ScalarExponent& exponent) {
+  typedef typename unpacket_traits<Packet>::type Scalar;
+
+  // integer base, integer exponent case
+
+  // This routine handles negative and very large positive exponents
+  // Signed integer overflow and divide by zero is undefined behavior
+  // Unsigned intgers do not overflow
+
+  const bool exponent_is_odd = unary_pow::is_odd<ScalarExponent>::run(exponent);
+
+  const Scalar zero = Scalar(0);
+  const Scalar pos_one = Scalar(1);
+
+  const Packet cst_zero = pset1<Packet>(zero);
+  const Packet cst_pos_one = pset1<Packet>(pos_one);
+
+  const Packet abs_x = pabs(x);
+
+  const Packet pow_is_zero = exponent < 0 ? pcmp_lt(cst_pos_one, abs_x) : pzero(x);
+  const Packet pow_is_one = pcmp_eq(cst_pos_one, abs_x);
+  const Packet pow_is_neg = exponent_is_odd ? pcmp_lt(x, cst_zero) : pzero(x);
+
+  Packet result = pselect(pow_is_zero, cst_zero, x);
+  result = pselect(pandnot(pow_is_one, pow_is_neg), cst_pos_one, result);
+  result = pselect(pand(pow_is_one, pow_is_neg), pnegate(cst_pos_one), result);
+  return result;
+}
+}  // end namespace unary_pow
+
+template <typename Packet, typename ScalarExponent,
+          bool BaseIsIntegerType = NumTraits<typename unpacket_traits<Packet>::type>::IsInteger,
+          bool ExponentIsIntegerType = NumTraits<ScalarExponent>::IsInteger>
+struct unary_pow_impl;
+
+template <typename Packet, typename ScalarExponent>
+struct unary_pow_impl<Packet, ScalarExponent, false, false> {
+  typedef typename unpacket_traits<Packet>::type Scalar;
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(const Packet& x, const ScalarExponent& exponent) {
+    const bool exponent_is_integer = (numext::isfinite)(exponent) && numext::round(exponent) == exponent;
+    if (exponent_is_integer) {
+      Packet result = unary_pow::int_pow(x, exponent);
+      result = unary_pow::handle_nonint_int_errors(x, result, exponent);
+      return result;
+    } else {
+      Packet result = unary_pow::gen_pow(x, exponent);
+      result = unary_pow::handle_nonint_nonint_errors(x, result, exponent);
+      return result;
+    }
+  }
+};
+
+template <typename Packet, typename ScalarExponent>
+struct unary_pow_impl<Packet, ScalarExponent, false, true> {
+  typedef typename unpacket_traits<Packet>::type Scalar;
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(const Packet& x, const ScalarExponent& exponent) {
+    Packet result = unary_pow::int_pow(x, exponent);
+    result = unary_pow::handle_nonint_int_errors(x, result, exponent);
+    return result;
+  }
+};
+
+template <typename Packet, typename ScalarExponent>
+struct unary_pow_impl<Packet, ScalarExponent, true, true> {
+    typedef typename unpacket_traits<Packet>::type Scalar;
+    static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(const Packet& x, const ScalarExponent& exponent) {
+        if (exponent < 0 || exponent > NumTraits<Scalar>::digits()) {
+            return unary_pow::handle_int_int(x, exponent);
+        }
+        else {
+            return unary_pow::int_pow(x, exponent);
+        }
+    }
+};
+
 } // end namespace internal
 } // end namespace Eigen
 
diff --git a/libs/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h b/libs/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h
index 177a04e..179c55c 100644
--- a/libs/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h
+++ b/libs/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_ARCH_GENERIC_PACKET_MATH_FUNCTIONS_FWD_H
 #define EIGEN_ARCH_GENERIC_PACKET_MATH_FUNCTIONS_FWD_H
 
+#include "../../InternalHeaderCheck.h"
+
 namespace Eigen {
 namespace internal {
 
@@ -42,25 +44,21 @@ Packet pldexp_generic(const Packet& a, const Packet& exponent);
 /** \internal \returns log(x) for single precision float */
 template <typename Packet>
 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
-EIGEN_UNUSED
 Packet plog_float(const Packet _x);
 
 /** \internal \returns log2(x) for single precision float */
 template <typename Packet>
 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
-EIGEN_UNUSED
 Packet plog2_float(const Packet _x);
 
 /** \internal \returns log(x) for single precision float */
 template <typename Packet>
 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
-EIGEN_UNUSED
 Packet plog_double(const Packet _x);
 
 /** \internal \returns log2(x) for single precision float */
 template <typename Packet>
 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
-EIGEN_UNUSED
 Packet plog2_double(const Packet _x);
 
 /** \internal \returns log(1 + x) */
@@ -74,33 +72,53 @@ Packet generic_expm1(const Packet& x);
 /** \internal \returns exp(x) for single precision float */
 template <typename Packet>
 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
-EIGEN_UNUSED
 Packet pexp_float(const Packet _x);
 
 /** \internal \returns exp(x) for double precision real numbers */
 template <typename Packet>
 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
-EIGEN_UNUSED
 Packet pexp_double(const Packet _x);
 
 /** \internal \returns sin(x) for single precision float */
 template<typename Packet>
 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
-EIGEN_UNUSED
 Packet psin_float(const Packet& x);
 
 /** \internal \returns cos(x) for single precision float */
 template<typename Packet>
 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
-EIGEN_UNUSED
 Packet pcos_float(const Packet& x);
 
+/** \internal \returns asin(x) for single precision float */
+template<typename Packet>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet pasin_float(const Packet& x);
+
+/** \internal \returns acos(x) for single precision float */
+template<typename Packet>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet pacos_float(const Packet& x);
+
+/** \internal \returns atan(x) for single precision float */
+template<typename Packet>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet patan_float(const Packet& x);
+
+/** \internal \returns atan(x) for double precision float */
+template<typename Packet>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet patan_double(const Packet& x);
+
 /** \internal \returns sqrt(x) for complex types */
 template<typename Packet>
 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
-EIGEN_UNUSED
 Packet psqrt_complex(const Packet& a);
 
+/** \internal \returns x / y for complex types */
+template<typename Packet>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet pdiv_complex(const Packet& x, const Packet& y);
+
 template <typename Packet, int N> struct ppolevl;
 
 
diff --git a/libs/eigen/Eigen/src/Core/arch/Default/Half.h b/libs/eigen/Eigen/src/Core/arch/Default/Half.h
index 9f8e8cc..75d6228 100644
--- a/libs/eigen/Eigen/src/Core/arch/Default/Half.h
+++ b/libs/eigen/Eigen/src/Core/arch/Default/Half.h
@@ -36,7 +36,7 @@
 #ifndef EIGEN_HALF_H
 #define EIGEN_HALF_H
 
-#include <sstream>
+#include "../../InternalHeaderCheck.h"
 
 #if defined(EIGEN_HAS_GPU_FP16) || defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC)
 // When compiling with GPU support, the "__half_raw" base class as well as
@@ -202,57 +202,113 @@ struct half : public half_impl::half_base {
 #endif
 };
 
-} // end namespace Eigen
+// TODO(majnemer): Get rid of this once we can rely on C++17 inline variables do
+// solve the ODR issue.
+namespace half_impl {
+template <typename = void>
+struct numeric_limits_half_impl {
+  static EIGEN_CONSTEXPR const bool is_specialized = true;
+  static EIGEN_CONSTEXPR const bool is_signed = true;
+  static EIGEN_CONSTEXPR const bool is_integer = false;
+  static EIGEN_CONSTEXPR const bool is_exact = false;
+  static EIGEN_CONSTEXPR const bool has_infinity = true;
+  static EIGEN_CONSTEXPR const bool has_quiet_NaN = true;
+  static EIGEN_CONSTEXPR const bool has_signaling_NaN = true;
+  static EIGEN_CONSTEXPR const std::float_denorm_style has_denorm = std::denorm_present;
+  static EIGEN_CONSTEXPR const bool has_denorm_loss = false;
+  static EIGEN_CONSTEXPR const std::float_round_style round_style = std::round_to_nearest;
+  static EIGEN_CONSTEXPR const bool is_iec559 = true;
+  // The C++ standard defines this as "true if the set of values representable
+  // by the type is finite." Half has finite precision.
+  static EIGEN_CONSTEXPR const bool is_bounded = true;
+  static EIGEN_CONSTEXPR const bool is_modulo = false;
+  static EIGEN_CONSTEXPR const int digits = 11;
+  static EIGEN_CONSTEXPR const int digits10 = 3;      // according to http://half.sourceforge.net/structstd_1_1numeric__limits_3_01half__float_1_1half_01_4.html
+  static EIGEN_CONSTEXPR const int max_digits10 = 5;  // according to http://half.sourceforge.net/structstd_1_1numeric__limits_3_01half__float_1_1half_01_4.html
+  static EIGEN_CONSTEXPR const int radix = std::numeric_limits<float>::radix;
+  static EIGEN_CONSTEXPR const int min_exponent = -13;
+  static EIGEN_CONSTEXPR const int min_exponent10 = -4;
+  static EIGEN_CONSTEXPR const int max_exponent = 16;
+  static EIGEN_CONSTEXPR const int max_exponent10 = 4;
+  static EIGEN_CONSTEXPR const bool traps = std::numeric_limits<float>::traps;
+  // IEEE754: "The implementer shall choose how tininess is detected, but shall
+  // detect tininess in the same way for all operations in radix two"
+  static EIGEN_CONSTEXPR const bool tinyness_before = std::numeric_limits<float>::tinyness_before;
 
-namespace std {
-template<>
-struct numeric_limits<Eigen::half> {
-  static const bool is_specialized = true;
-  static const bool is_signed = true;
-  static const bool is_integer = false;
-  static const bool is_exact = false;
-  static const bool has_infinity = true;
-  static const bool has_quiet_NaN = true;
-  static const bool has_signaling_NaN = true;
-  static const float_denorm_style has_denorm = denorm_present;
-  static const bool has_denorm_loss = false;
-  static const std::float_round_style round_style = std::round_to_nearest;
-  static const bool is_iec559 = false;
-  static const bool is_bounded = false;
-  static const bool is_modulo = false;
-  static const int digits = 11;
-  static const int digits10 = 3;      // according to http://half.sourceforge.net/structstd_1_1numeric__limits_3_01half__float_1_1half_01_4.html
-  static const int max_digits10 = 5;  // according to http://half.sourceforge.net/structstd_1_1numeric__limits_3_01half__float_1_1half_01_4.html
-  static const int radix = 2;
-  static const int min_exponent = -13;
-  static const int min_exponent10 = -4;
-  static const int max_exponent = 16;
-  static const int max_exponent10 = 4;
-  static const bool traps = true;
-  static const bool tinyness_before = false;
-
-  static Eigen::half (min)() { return Eigen::half_impl::raw_uint16_to_half(0x400); }
-  static Eigen::half lowest() { return Eigen::half_impl::raw_uint16_to_half(0xfbff); }
-  static Eigen::half (max)() { return Eigen::half_impl::raw_uint16_to_half(0x7bff); }
-  static Eigen::half epsilon() { return Eigen::half_impl::raw_uint16_to_half(0x0800); }
-  static Eigen::half round_error() { return Eigen::half(0.5); }
-  static Eigen::half infinity() { return Eigen::half_impl::raw_uint16_to_half(0x7c00); }
-  static Eigen::half quiet_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7e00); }
-  static Eigen::half signaling_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7d00); }
-  static Eigen::half denorm_min() { return Eigen::half_impl::raw_uint16_to_half(0x1); }
+  static EIGEN_CONSTEXPR Eigen::half (min)() { return Eigen::half_impl::raw_uint16_to_half(0x0400); }
+  static EIGEN_CONSTEXPR Eigen::half lowest() { return Eigen::half_impl::raw_uint16_to_half(0xfbff); }
+  static EIGEN_CONSTEXPR Eigen::half (max)() { return Eigen::half_impl::raw_uint16_to_half(0x7bff); }
+  static EIGEN_CONSTEXPR Eigen::half epsilon() { return Eigen::half_impl::raw_uint16_to_half(0x1400); }
+  static EIGEN_CONSTEXPR Eigen::half round_error() { return Eigen::half_impl::raw_uint16_to_half(0x3800); }
+  static EIGEN_CONSTEXPR Eigen::half infinity() { return Eigen::half_impl::raw_uint16_to_half(0x7c00); }
+  static EIGEN_CONSTEXPR Eigen::half quiet_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7e00); }
+  static EIGEN_CONSTEXPR Eigen::half signaling_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7d00); }
+  static EIGEN_CONSTEXPR Eigen::half denorm_min() { return Eigen::half_impl::raw_uint16_to_half(0x0001); }
 };
 
+template<typename T>
+EIGEN_CONSTEXPR const bool numeric_limits_half_impl<T>::is_specialized;
+template<typename T>
+EIGEN_CONSTEXPR const bool numeric_limits_half_impl<T>::is_signed;
+template<typename T>
+EIGEN_CONSTEXPR const bool numeric_limits_half_impl<T>::is_integer;
+template<typename T>
+EIGEN_CONSTEXPR const bool numeric_limits_half_impl<T>::is_exact;
+template<typename T>
+EIGEN_CONSTEXPR const bool numeric_limits_half_impl<T>::has_infinity;
+template<typename T>
+EIGEN_CONSTEXPR const bool numeric_limits_half_impl<T>::has_quiet_NaN;
+template<typename T>
+EIGEN_CONSTEXPR const bool numeric_limits_half_impl<T>::has_signaling_NaN;
+template<typename T>
+EIGEN_CONSTEXPR const std::float_denorm_style numeric_limits_half_impl<T>::has_denorm;
+template<typename T>
+EIGEN_CONSTEXPR const bool numeric_limits_half_impl<T>::has_denorm_loss;
+template<typename T>
+EIGEN_CONSTEXPR const std::float_round_style numeric_limits_half_impl<T>::round_style;
+template<typename T>
+EIGEN_CONSTEXPR const bool numeric_limits_half_impl<T>::is_iec559;
+template<typename T>
+EIGEN_CONSTEXPR const bool numeric_limits_half_impl<T>::is_bounded;
+template<typename T>
+EIGEN_CONSTEXPR const bool numeric_limits_half_impl<T>::is_modulo;
+template<typename T>
+EIGEN_CONSTEXPR const int numeric_limits_half_impl<T>::digits;
+template<typename T>
+EIGEN_CONSTEXPR const int numeric_limits_half_impl<T>::digits10;
+template<typename T>
+EIGEN_CONSTEXPR const int numeric_limits_half_impl<T>::max_digits10;
+template<typename T>
+EIGEN_CONSTEXPR const int numeric_limits_half_impl<T>::radix;
+template<typename T>
+EIGEN_CONSTEXPR const int numeric_limits_half_impl<T>::min_exponent;
+template<typename T>
+EIGEN_CONSTEXPR const int numeric_limits_half_impl<T>::min_exponent10;
+template<typename T>
+EIGEN_CONSTEXPR const int numeric_limits_half_impl<T>::max_exponent;
+template<typename T>
+EIGEN_CONSTEXPR const int numeric_limits_half_impl<T>::max_exponent10;
+template<typename T>
+EIGEN_CONSTEXPR const bool numeric_limits_half_impl<T>::traps;
+template<typename T>
+EIGEN_CONSTEXPR const bool numeric_limits_half_impl<T>::tinyness_before;
+}  // end namespace half_impl
+}  // end namespace Eigen
+
+namespace std {
 // If std::numeric_limits<T> is specialized, should also specialize
 // std::numeric_limits<const T>, std::numeric_limits<volatile T>, and
 // std::numeric_limits<const volatile T>
 // https://stackoverflow.com/a/16519653/
 template<>
-struct numeric_limits<const Eigen::half> : numeric_limits<Eigen::half> {};
+class numeric_limits<Eigen::half> : public Eigen::half_impl::numeric_limits_half_impl<> {};
 template<>
-struct numeric_limits<volatile Eigen::half> : numeric_limits<Eigen::half> {};
+class numeric_limits<const Eigen::half> : public numeric_limits<Eigen::half> {};
 template<>
-struct numeric_limits<const volatile Eigen::half> : numeric_limits<Eigen::half> {};
-} // end namespace std
+class numeric_limits<volatile Eigen::half> : public numeric_limits<Eigen::half> {};
+template<>
+class numeric_limits<const volatile Eigen::half> : public numeric_limits<Eigen::half> {};
+}  // end namespace std
 
 namespace Eigen {
 
@@ -261,7 +317,7 @@ namespace half_impl {
 #if (defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && \
      EIGEN_CUDA_ARCH >= 530) ||                                  \
     (defined(EIGEN_HAS_HIP_FP16) && defined(HIP_DEVICE_COMPILE))
-// Note: We deliberatly do *not* define this to 1 even if we have Arm's native
+// Note: We deliberately do *not* define this to 1 even if we have Arm's native
 // fp16 type since GPU halfs are rather different from native CPU halfs.
 // TODO: Rename to something like EIGEN_HAS_NATIVE_GPU_FP16
 #define EIGEN_HAS_NATIVE_FP16
@@ -334,7 +390,7 @@ EIGEN_STRONG_INLINE __device__ bool operator >= (const half& a, const half& b) {
 }
 #endif
 
-#if defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC)
+#if defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC) && !defined(EIGEN_GPU_COMPILE_PHASE)
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator + (const half& a, const half& b) {
   return half(vaddh_f16(a.x, b.x));
 }
@@ -534,7 +590,12 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half_raw float_to_half_rtne(float ff) {
 
 #elif defined(EIGEN_HAS_FP16_C)
   __half_raw h;
-  h.x = _cvtss_sh(ff, 0);
+  #if EIGEN_COMP_MSVC
+    // MSVC does not have scalar instructions.
+    h.x =_mm_extract_epi16(_mm_cvtps_ph(_mm_set_ss(ff), 0), 0);
+  #else
+    h.x = _cvtss_sh(ff, 0);
+  #endif
   return h;
 
 #elif defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC)
@@ -595,7 +656,12 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half_raw h) {
   (defined(EIGEN_HAS_HIP_FP16) && defined(EIGEN_HIP_DEVICE_COMPILE))
   return __half2float(h);
 #elif defined(EIGEN_HAS_FP16_C)
-  return _cvtsh_ss(h.x);
+  #if EIGEN_COMP_MSVC
+    // MSVC does not have scalar instructions.
+    return _mm_cvtss_f32(_mm_cvtph_ps(_mm_set1_epi16(h.x)));
+  #else
+    return _cvtsh_ss(h.x);
+  #endif
 #elif defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC)
   return static_cast<float>(h.x);
 #else
@@ -692,6 +758,9 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half sqrt(const half& a) {
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half pow(const half& a, const half& b) {
   return half(::powf(float(a), float(b)));
 }
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half atan2(const half& a, const half& b) {
+  return half(::atan2f(float(a), float(b)));
+}
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half sin(const half& a) {
   return half(::sinf(float(a)));
 }
diff --git a/libs/eigen/Eigen/src/Core/arch/Default/TypeCasting.h b/libs/eigen/Eigen/src/Core/arch/Default/TypeCasting.h
index fb8183b..dc779a7 100644
--- a/libs/eigen/Eigen/src/Core/arch/Default/TypeCasting.h
+++ b/libs/eigen/Eigen/src/Core/arch/Default/TypeCasting.h
@@ -11,13 +11,14 @@
 #ifndef EIGEN_GENERIC_TYPE_CASTING_H
 #define EIGEN_GENERIC_TYPE_CASTING_H
 
+#include "../../InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
 
 template<>
 struct scalar_cast_op<float, Eigen::half> {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
   typedef Eigen::half result_type;
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half operator() (const float& a) const {
     #if (defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300) || \
@@ -36,7 +37,6 @@ struct functor_traits<scalar_cast_op<float, Eigen::half> >
 
 template<>
 struct scalar_cast_op<int, Eigen::half> {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
   typedef Eigen::half result_type;
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half operator() (const int& a) const {
     #if (defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300) || \
@@ -55,7 +55,6 @@ struct functor_traits<scalar_cast_op<int, Eigen::half> >
 
 template<>
 struct scalar_cast_op<Eigen::half, float> {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
   typedef float result_type;
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float operator() (const Eigen::half& a) const {
     #if (defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300) || \
@@ -74,7 +73,6 @@ struct functor_traits<scalar_cast_op<Eigen::half, float> >
 
 template<>
 struct scalar_cast_op<float, Eigen::bfloat16> {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
   typedef Eigen::bfloat16 result_type;
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::bfloat16 operator() (const float& a) const {
     return Eigen::bfloat16(a);
@@ -88,7 +86,6 @@ struct functor_traits<scalar_cast_op<float, Eigen::bfloat16> >
 
 template<>
 struct scalar_cast_op<int, Eigen::bfloat16> {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
   typedef Eigen::bfloat16 result_type;
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::bfloat16 operator() (const int& a) const {
     return Eigen::bfloat16(static_cast<float>(a));
@@ -102,7 +99,6 @@ struct functor_traits<scalar_cast_op<int, Eigen::bfloat16> >
 
 template<>
 struct scalar_cast_op<Eigen::bfloat16, float> {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
   typedef float result_type;
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float operator() (const Eigen::bfloat16& a) const {
     return static_cast<float>(a);
diff --git a/libs/eigen/Eigen/src/Core/arch/CUDA/Complex.h b/libs/eigen/Eigen/src/Core/arch/GPU/Complex.h
similarity index 95%
rename from libs/eigen/Eigen/src/Core/arch/CUDA/Complex.h
rename to libs/eigen/Eigen/src/Core/arch/GPU/Complex.h
index deb4c86..c2b4c38 100644
--- a/libs/eigen/Eigen/src/Core/arch/CUDA/Complex.h
+++ b/libs/eigen/Eigen/src/Core/arch/GPU/Complex.h
@@ -8,18 +8,29 @@
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
-#ifndef EIGEN_COMPLEX_CUDA_H
-#define EIGEN_COMPLEX_CUDA_H
+#ifndef EIGEN_COMPLEX_GPU_H
+#define EIGEN_COMPLEX_GPU_H
 
-// clang-format off
 // Many std::complex methods such as operator+, operator-, operator* and
 // operator/ are not constexpr. Due to this, GCC and older versions of clang do
 // not treat them as device functions and thus Eigen functors making use of
 // these operators fail to compile. Here, we manually specialize these
 // operators and functors for complex types when building for CUDA to enable
 // their use on-device.
+//
+// NOTES:
+//  - Compound assignment operators +=,-=,*=,/=(Scalar) will not work on device,
+//    since they are already specialized in the standard. Using them will result
+//    in silent kernel failures.
+//  - Compiling with MSVC and using +=,-=,*=,/=(std::complex<Scalar>) will lead
+//    to duplicate definition errors, since these are already specialized in
+//    Visual Studio's <complex> header (contrary to the standard).  This is
+//    preferable to removing such definitions, which will lead to silent kernel
+//    failures.
+//  - Compiling with ICC requires defining _USE_COMPLEX_SPECIALIZATION_ prior
+//    to the first inclusion of <complex>.
 
-#if defined(EIGEN_CUDACC) && defined(EIGEN_GPU_COMPILE_PHASE)
+#if defined(EIGEN_GPUCC) && defined(EIGEN_GPU_COMPILE_PHASE)
     
 // ICC already specializes std::complex<float> and std::complex<double>
 // operators, preventing us from making them device functions here.
@@ -43,6 +54,8 @@
   using Eigen::complex_operator_detail::operator==; \
   using Eigen::complex_operator_detail::operator!=;
 
+#include "../../InternalHeaderCheck.h"
+
 namespace Eigen {
 
 // Specialized std::complex overloads.
@@ -253,6 +266,6 @@ EIGEN_USING_STD_COMPLEX_OPERATORS
 
 #endif  // !(EIGEN_COMP_ICC && _USE_COMPLEX_SPECIALIZATION_)
 
-#endif  // EIGEN_CUDACC && EIGEN_GPU_COMPILE_PHASE
+#endif  // EIGEN_GPUCC && EIGEN_GPU_COMPILE_PHASE
 
-#endif  // EIGEN_COMPLEX_CUDA_H
+#endif  // EIGEN_COMPLEX_GPU_H
diff --git a/libs/eigen/Eigen/src/Core/arch/GPU/MathFunctions.h b/libs/eigen/Eigen/src/Core/arch/GPU/MathFunctions.h
index d2b3a25..ad61e95 100644
--- a/libs/eigen/Eigen/src/Core/arch/GPU/MathFunctions.h
+++ b/libs/eigen/Eigen/src/Core/arch/GPU/MathFunctions.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_MATH_FUNCTIONS_GPU_H
 #define EIGEN_MATH_FUNCTIONS_GPU_H
 
+#include "../../InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
diff --git a/libs/eigen/Eigen/src/Core/arch/GPU/PacketMath.h b/libs/eigen/Eigen/src/Core/arch/GPU/PacketMath.h
index 689110d..e2bcf48 100644
--- a/libs/eigen/Eigen/src/Core/arch/GPU/PacketMath.h
+++ b/libs/eigen/Eigen/src/Core/arch/GPU/PacketMath.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_PACKET_MATH_GPU_H
 #define EIGEN_PACKET_MATH_GPU_H
 
+#include "../../InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
@@ -121,7 +123,6 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pset1<double2>(const do
 // invoked by NVCC’ (e.g. on MacOS). The former needs to see both host and device implementation
 // of the functions, while the latter can only deal with one of them.
 #if defined(EIGEN_CUDA_ARCH) || defined(EIGEN_HIPCC) || (defined(EIGEN_CUDACC) && EIGEN_COMP_CLANG && !EIGEN_COMP_NVCC)
-namespace {
 
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float bitwise_and(const float& a,
                                                         const float& b) {
@@ -175,12 +176,21 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float lt_mask(const float& a,
                                                     const float& b) {
   return __int_as_float(a < b ? 0xffffffffu : 0u);
 }
+
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double lt_mask(const double& a,
                                                      const double& b) {
   return __longlong_as_double(a < b ? 0xffffffffffffffffull : 0ull);
 }
 
-}  // namespace
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float le_mask(const float& a,
+                                                    const float& b) {
+  return __int_as_float(a <= b ? 0xffffffffu : 0u);
+}
+
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double le_mask(const double& a,
+                                                     const double& b) {
+  return __longlong_as_double(a <= b ? 0xffffffffffffffffull : 0ull);
+}
 
 template <>
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pand<float4>(const float4& a,
@@ -243,6 +253,12 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcmp_lt<float4>(const float4& a,
                      lt_mask(a.w, b.w));
 }
 template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcmp_le<float4>(const float4& a,
+                                                             const float4& b) {
+  return make_float4(le_mask(a.x, b.x), le_mask(a.y, b.y), le_mask(a.z, b.z),
+                     le_mask(a.w, b.w));
+}
+template <>
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2
 pcmp_eq<double2>(const double2& a, const double2& b) {
   return make_double2(eq_mask(a.x, b.x), eq_mask(a.y, b.y));
@@ -252,6 +268,11 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2
 pcmp_lt<double2>(const double2& a, const double2& b) {
   return make_double2(lt_mask(a.x, b.x), lt_mask(a.y, b.y));
 }
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2
+pcmp_le<double2>(const double2& a, const double2& b) {
+  return make_double2(le_mask(a.x, b.x), le_mask(a.y, b.y));
+}
 #endif // defined(EIGEN_CUDA_ARCH) || defined(EIGEN_HIPCC) || (defined(EIGEN_CUDACC) && EIGEN_COMP_CLANG && !EIGEN_COMP_NVCC)
 
 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 plset<float4>(const float& a) {
@@ -493,9 +514,10 @@ ptranspose(PacketBlock<double2,2>& kernel) {
 
 #endif // defined(EIGEN_GPUCC) && defined(EIGEN_USE_GPU)
 
-// Packet4h2 must be defined in the macro without EIGEN_CUDA_ARCH, meaning
-// its corresponding packet_traits<Eigen::half> must be visible on host.
-#if defined(EIGEN_HAS_CUDA_FP16) || defined(EIGEN_HAS_HIP_FP16)
+// Half-packet functions are not available on the host for CUDA 9.0-9.2, only
+// on device. There is no benefit to using them on the host anyways, since they are
+// emulated.
+#if (defined(EIGEN_HAS_CUDA_FP16) || defined(EIGEN_HAS_HIP_FP16)) && defined(EIGEN_GPU_COMPILE_PHASE)
 
 typedef ulonglong2 Packet4h2;
 template<> struct unpacket_traits<Packet4h2> { typedef Eigen::half type; enum {size=8, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet4h2 half; };
@@ -526,42 +548,9 @@ template<> struct packet_traits<Eigen::half> : default_packet_traits
   };
 };
 
-namespace {
-// This is equivalent to make_half2, which is undocumented and doesn't seem to always exist.
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 combine_half(const __half& a, const __half& b) {
-#if defined(EIGEN_GPU_COMPILE_PHASE)
-  return __halves2half2(a, b);
-#else
-  // Round-about way since __halves2half2 is a __device__ function.
-  return __floats2half2_rn(__half2float(a), __half2float(b));
-#endif
-}
-
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE __half get_half2_low(const half2& a) {
-#if defined(EIGEN_GPU_COMPILE_PHASE)
-  return __low2half(a);
-#else
-  return __float2half(__low2float(a));
-#endif
-}
-
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE __half get_half2_high(const half2& a) {
-#if defined(EIGEN_GPU_COMPILE_PHASE)
-  return __high2half(a);
-#else
-  return __float2half(__high2float(a));
-#endif
-}
-} // namespace
-
 template<>
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pset1<half2>(const Eigen::half& from) {
-#if defined(EIGEN_GPU_COMPILE_PHASE)
   return __half2half2(from);
-#else
-  const float f = __half2float(from);
-  return __floats2half2_rn(f, f);
-#endif
 }
 
 template <>
@@ -576,8 +565,6 @@ pset1<Packet4h2>(const Eigen::half& from) {
   return r;
 }
 
-// We now need this visible on both host and device.
-// #if defined(EIGEN_CUDA_ARCH) || defined(EIGEN_HIPCC) || (defined(EIGEN_CUDACC) && EIGEN_COMP_CLANG && !EIGEN_COMP_NVCC)
 namespace {
 
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pload(const Eigen::half* from) {
@@ -585,11 +572,11 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pload(const Eigen::half* from) {
 }
 
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 ploadu(const Eigen::half* from) {
-  return combine_half(from[0], from[1]);
+  return __halves2half2(from[0], from[1]);
 }
 
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 ploaddup(const Eigen::half*  from) {
-  return combine_half(from[0], from[0]);
+  return __halves2half2(from[0], from[0]);
 }
 
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstore(Eigen::half* to,
@@ -599,8 +586,8 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstore(Eigen::half* to,
 
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu(Eigen::half* to,
                                                    const half2& from) {
-  to[0] = get_half2_low(from);
-  to[1] = get_half2_high(from);
+  to[0] = __low2half(from);
+  to[1] = __high2half(from);
 }
 
 
@@ -610,7 +597,7 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro_aligned(
   // Input is guaranteed to be properly aligned.
   return __ldg(reinterpret_cast<const half2*>(from));
 #else
-  return combine_half(*(from+0), *(from+1));
+  return __halves2half2(*(from+0), *(from+1));
 #endif
 }
 
@@ -619,31 +606,31 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro_unaligned(
 #if defined(EIGEN_GPU_HAS_LDG)
   return __halves2half2(__ldg(from+0), __ldg(from+1));
 #else
-  return combine_half(*(from+0), *(from+1));
+  return __halves2half2(*(from+0), *(from+1));
 #endif
 }
 
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pgather(const Eigen::half* from,
                                                     Index stride) {
-  return combine_half(from[0*stride], from[1*stride]);
+  return __halves2half2(from[0*stride], from[1*stride]);
 }
 
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter(
     Eigen::half* to, const half2& from, Index stride) {
-  to[stride*0] = get_half2_low(from);
-  to[stride*1] = get_half2_high(from);
+  to[stride*0] = __low2half(from);
+  to[stride*1] = __high2half(from);
 }
 
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half pfirst(const half2& a) {
-  return get_half2_low(a);
+  return __low2half(a);
 }
 
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pabs(const half2& a) {
-  half a1 = get_half2_low(a);
-  half a2 = get_half2_high(a);
+  half a1 = __low2half(a);
+  half a2 = __high2half(a);
   half result1 = half_impl::raw_uint16_to_half(a1.x & 0x7FFF);
   half result2 = half_impl::raw_uint16_to_half(a2.x & 0x7FFF);
-  return combine_half(result1, result2);
+  return __halves2half2(result1, result2);
 }
 
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 ptrue(const half2& /*a*/) {
@@ -658,12 +645,12 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pzero(const half2& /*a*/) {
 
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void
 ptranspose(PacketBlock<half2,2>& kernel) {
-  __half a1 = get_half2_low(kernel.packet[0]);
-  __half a2 = get_half2_high(kernel.packet[0]);
-  __half b1 = get_half2_low(kernel.packet[1]);
-  __half b2 = get_half2_high(kernel.packet[1]);
-  kernel.packet[0] = combine_half(a1, b1);
-  kernel.packet[1] = combine_half(a2, b2);
+  __half a1 = __low2half(kernel.packet[0]);
+  __half a2 = __high2half(kernel.packet[0]);
+  __half b1 = __low2half(kernel.packet[1]);
+  __half b2 = __high2half(kernel.packet[1]);
+  kernel.packet[0] = __halves2half2(a1, b1);
+  kernel.packet[1] = __halves2half2(a2, b2);
 }
 
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 plset(const Eigen::half& a) {
@@ -671,88 +658,101 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 plset(const Eigen::half& a) {
   return __halves2half2(a, __hadd(a, __float2half(1.0f)));
 #else
   float f = __half2float(a) + 1.0f;
-  return combine_half(a, __float2half(f));
+  return __halves2half2(a, __float2half(f));
 #endif
 }
 
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pselect(const half2& mask,
                                                     const half2& a,
                                                     const half2& b) {
-  half mask_low = get_half2_low(mask);
-  half mask_high = get_half2_high(mask);
-  half result_low = mask_low == half(0) ? get_half2_low(b) : get_half2_low(a);
-  half result_high = mask_high == half(0) ? get_half2_high(b) : get_half2_high(a);
-  return combine_half(result_low, result_high);
+  half mask_low = __low2half(mask);
+  half mask_high = __high2half(mask);
+  half result_low = mask_low == half(0) ? __low2half(b) : __low2half(a);
+  half result_high = mask_high == half(0) ? __high2half(b) : __high2half(a);
+  return __halves2half2(result_low, result_high);
 }
 
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pcmp_eq(const half2& a,
                                                     const half2& b) {
   half true_half = half_impl::raw_uint16_to_half(0xffffu);
   half false_half = half_impl::raw_uint16_to_half(0x0000u);
-  half a1 = get_half2_low(a);
-  half a2 = get_half2_high(a);
-  half b1 = get_half2_low(b);
-  half b2 = get_half2_high(b);
+  half a1 = __low2half(a);
+  half a2 = __high2half(a);
+  half b1 = __low2half(b);
+  half b2 = __high2half(b);
   half eq1 = __half2float(a1) == __half2float(b1) ? true_half : false_half;
   half eq2 = __half2float(a2) == __half2float(b2) ? true_half : false_half;
-  return combine_half(eq1, eq2);
+  return __halves2half2(eq1, eq2);
 }
 
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pcmp_lt(const half2& a,
                                                     const half2& b) {
   half true_half = half_impl::raw_uint16_to_half(0xffffu);
   half false_half = half_impl::raw_uint16_to_half(0x0000u);
-  half a1 = get_half2_low(a);
-  half a2 = get_half2_high(a);
-  half b1 = get_half2_low(b);
-  half b2 = get_half2_high(b);
+  half a1 = __low2half(a);
+  half a2 = __high2half(a);
+  half b1 = __low2half(b);
+  half b2 = __high2half(b);
   half eq1 = __half2float(a1) < __half2float(b1) ? true_half : false_half;
   half eq2 = __half2float(a2) < __half2float(b2) ? true_half : false_half;
-  return combine_half(eq1, eq2);
+  return __halves2half2(eq1, eq2);
+}
+
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pcmp_le(const half2& a,
+                                                    const half2& b) {
+  half true_half = half_impl::raw_uint16_to_half(0xffffu);
+  half false_half = half_impl::raw_uint16_to_half(0x0000u);
+  half a1 = __low2half(a);
+  half a2 = __high2half(a);
+  half b1 = __low2half(b);
+  half b2 = __high2half(b);
+  half eq1 = __half2float(a1) <= __half2float(b1) ? true_half : false_half;
+  half eq2 = __half2float(a2) <= __half2float(b2) ? true_half : false_half;
+  return __halves2half2(eq1, eq2);
 }
 
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pand(const half2& a,
                                                  const half2& b) {
-  half a1 = get_half2_low(a);
-  half a2 = get_half2_high(a);
-  half b1 = get_half2_low(b);
-  half b2 = get_half2_high(b);
+  half a1 = __low2half(a);
+  half a2 = __high2half(a);
+  half b1 = __low2half(b);
+  half b2 = __high2half(b);
   half result1 = half_impl::raw_uint16_to_half(a1.x & b1.x);
   half result2 = half_impl::raw_uint16_to_half(a2.x & b2.x);
-  return combine_half(result1, result2);
+  return __halves2half2(result1, result2);
 }
 
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 por(const half2& a,
                                                 const half2& b) {
-  half a1 = get_half2_low(a);
-  half a2 = get_half2_high(a);
-  half b1 = get_half2_low(b);
-  half b2 = get_half2_high(b);
+  half a1 = __low2half(a);
+  half a2 = __high2half(a);
+  half b1 = __low2half(b);
+  half b2 = __high2half(b);
   half result1 = half_impl::raw_uint16_to_half(a1.x | b1.x);
   half result2 = half_impl::raw_uint16_to_half(a2.x | b2.x);
-  return combine_half(result1, result2);
+  return __halves2half2(result1, result2);
 }
 
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pxor(const half2& a,
                                                  const half2& b) {
-  half a1 = get_half2_low(a);
-  half a2 = get_half2_high(a);
-  half b1 = get_half2_low(b);
-  half b2 = get_half2_high(b);
+  half a1 = __low2half(a);
+  half a2 = __high2half(a);
+  half b1 = __low2half(b);
+  half b2 = __high2half(b);
   half result1 = half_impl::raw_uint16_to_half(a1.x ^ b1.x);
   half result2 = half_impl::raw_uint16_to_half(a2.x ^ b2.x);
-  return combine_half(result1, result2);
+  return __halves2half2(result1, result2);
 }
 
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pandnot(const half2& a,
                                                     const half2& b) {
-  half a1 = get_half2_low(a);
-  half a2 = get_half2_high(a);
-  half b1 = get_half2_low(b);
-  half b2 = get_half2_high(b);
+  half a1 = __low2half(a);
+  half a2 = __high2half(a);
+  half b1 = __low2half(b);
+  half b2 = __high2half(b);
   half result1 = half_impl::raw_uint16_to_half(a1.x & ~b1.x);
   half result2 = half_impl::raw_uint16_to_half(a2.x & ~b2.x);
-  return combine_half(result1, result2);
+  return __halves2half2(result1, result2);
 }
 
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 padd(const half2& a,
@@ -851,9 +851,9 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmin(const half2& a,
   float a2 = __high2float(a);
   float b1 = __low2float(b);
   float b2 = __high2float(b);
-  __half r1 = a1 < b1 ? get_half2_low(a) : get_half2_low(b);
-  __half r2 = a2 < b2 ? get_half2_high(a) : get_half2_high(b);
-  return combine_half(r1, r2);
+  __half r1 = a1 < b1 ? __low2half(a) : __low2half(b);
+  __half r2 = a2 < b2 ? __high2half(a) : __high2half(b);
+  return __halves2half2(r1, r2);
 }
 
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmax(const half2& a,
@@ -862,9 +862,9 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmax(const half2& a,
   float a2 = __high2float(a);
   float b1 = __low2float(b);
   float b2 = __high2float(b);
-  __half r1 = a1 > b1 ? get_half2_low(a) : get_half2_low(b);
-  __half r2 = a2 > b2 ? get_half2_high(a) : get_half2_high(b);
-  return combine_half(r1, r2);
+  __half r1 = a1 > b1 ? __low2half(a) : __low2half(b);
+  __half r2 = a2 > b2 ? __high2half(a) : __high2half(b);
+  return __halves2half2(r1, r2);
 }
 
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux(const half2& a) {
@@ -885,7 +885,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_max(const half2& a) {
 #else
   float a1 = __low2float(a);
   float a2 = __high2float(a);
-  return a1 > a2 ? get_half2_low(a) : get_half2_high(a);
+  return a1 > a2 ? __low2half(a) : __high2half(a);
 #endif
 }
 
@@ -897,7 +897,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_min(const half2& a) {
 #else
   float a1 = __low2float(a);
   float a2 = __high2float(a);
-  return a1 < a2 ? get_half2_low(a) : get_half2_high(a);
+  return a1 < a2 ? __low2half(a) : __high2half(a);
 #endif
 }
 
@@ -1068,10 +1068,10 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2
 pgather<Eigen::half, Packet4h2>(const Eigen::half* from, Index stride) {
   Packet4h2 r;
   half2* p_alias = reinterpret_cast<half2*>(&r);
-  p_alias[0] = combine_half(from[0 * stride], from[1 * stride]);
-  p_alias[1] = combine_half(from[2 * stride], from[3 * stride]);
-  p_alias[2] = combine_half(from[4 * stride], from[5 * stride]);
-  p_alias[3] = combine_half(from[6 * stride], from[7 * stride]);
+  p_alias[0] = __halves2half2(from[0 * stride], from[1 * stride]);
+  p_alias[1] = __halves2half2(from[2 * stride], from[3 * stride]);
+  p_alias[2] = __halves2half2(from[4 * stride], from[5 * stride]);
+  p_alias[3] = __halves2half2(from[6 * stride], from[7 * stride]);
   return r;
 }
 
@@ -1152,12 +1152,12 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose_half2(
 
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void
 ptranspose_half(half2& f0, half2& f1) {
-  __half a1 = get_half2_low(f0);
-  __half a2 = get_half2_high(f0);
-  __half b1 = get_half2_low(f1);
-  __half b2 = get_half2_high(f1);
-  f0 = combine_half(a1, b1);
-  f1 = combine_half(a2, b2);
+  __half a1 = __low2half(f0);
+  __half a2 = __high2half(f0);
+  __half b1 = __low2half(f1);
+  __half b2 = __high2half(f1);
+  f0 = __halves2half2(a1, b1);
+  f1 = __halves2half2(a2, b2);
 }
 
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void
@@ -1254,10 +1254,10 @@ plset<Packet4h2>(const Eigen::half& a) {
   float f = __half2float(a);
   Packet4h2 r;
   half2* p_alias = reinterpret_cast<half2*>(&r);
-  p_alias[0] = combine_half(a, __float2half(f + 1.0f));
-  p_alias[1] = combine_half(__float2half(f + 2.0f), __float2half(f + 3.0f));
-  p_alias[2] = combine_half(__float2half(f + 4.0f), __float2half(f + 5.0f));
-  p_alias[3] = combine_half(__float2half(f + 6.0f), __float2half(f + 7.0f));
+  p_alias[0] = __halves2half2(a, __float2half(f + 1.0f));
+  p_alias[1] = __halves2half2(__float2half(f + 2.0f), __float2half(f + 3.0f));
+  p_alias[2] = __halves2half2(__float2half(f + 4.0f), __float2half(f + 5.0f));
+  p_alias[3] = __halves2half2(__float2half(f + 6.0f), __float2half(f + 7.0f));
   return r;
 #endif
 }
@@ -1292,6 +1292,34 @@ pcmp_eq<Packet4h2>(const Packet4h2& a, const Packet4h2& b) {
   return r;
 }
 
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2
+pcmp_lt<Packet4h2>(const Packet4h2& a, const Packet4h2& b) {
+  Packet4h2 r;
+  half2* r_alias = reinterpret_cast<half2*>(&r);
+  const half2* a_alias = reinterpret_cast<const half2*>(&a);
+  const half2* b_alias = reinterpret_cast<const half2*>(&b);
+  r_alias[0] = pcmp_lt(a_alias[0], b_alias[0]);
+  r_alias[1] = pcmp_lt(a_alias[1], b_alias[1]);
+  r_alias[2] = pcmp_lt(a_alias[2], b_alias[2]);
+  r_alias[3] = pcmp_lt(a_alias[3], b_alias[3]);
+  return r;
+}
+
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2
+pcmp_le<Packet4h2>(const Packet4h2& a, const Packet4h2& b) {
+  Packet4h2 r;
+  half2* r_alias = reinterpret_cast<half2*>(&r);
+  const half2* a_alias = reinterpret_cast<const half2*>(&a);
+  const half2* b_alias = reinterpret_cast<const half2*>(&b);
+  r_alias[0] = pcmp_le(a_alias[0], b_alias[0]);
+  r_alias[1] = pcmp_le(a_alias[1], b_alias[1]);
+  r_alias[2] = pcmp_le(a_alias[2], b_alias[2]);
+  r_alias[3] = pcmp_le(a_alias[3], b_alias[3]);
+  return r;
+}
+
 template <>
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pand<Packet4h2>(
     const Packet4h2& a, const Packet4h2& b) {
@@ -1477,9 +1505,9 @@ template <>
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_max<Packet4h2>(
     const Packet4h2& a) {
   const half2* a_alias = reinterpret_cast<const half2*>(&a);
-  half2 m0 = combine_half(predux_max(a_alias[0]),
+  half2 m0 = __halves2half2(predux_max(a_alias[0]),
                             predux_max(a_alias[1]));
-  half2 m1 = combine_half(predux_max(a_alias[2]),
+  half2 m1 = __halves2half2(predux_max(a_alias[2]),
                             predux_max(a_alias[3]));
   __half first  = predux_max(m0);
   __half second = predux_max(m1);
@@ -1496,9 +1524,9 @@ template <>
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_min<Packet4h2>(
     const Packet4h2& a) {
   const half2* a_alias = reinterpret_cast<const half2*>(&a);
-  half2 m0 = combine_half(predux_min(a_alias[0]),
+  half2 m0 = __halves2half2(predux_min(a_alias[0]),
                             predux_min(a_alias[1]));
-  half2 m1 = combine_half(predux_min(a_alias[2]),
+  half2 m1 = __halves2half2(predux_min(a_alias[2]),
                             predux_min(a_alias[3]));
   __half first  = predux_min(m0);
   __half second = predux_min(m1);
@@ -1652,9 +1680,9 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmin<half2>(const half2& a,
   float a2 = __high2float(a);
   float b1 = __low2float(b);
   float b2 = __high2float(b);
-  __half r1 = a1 < b1 ? get_half2_low(a) : get_half2_low(b);
-  __half r2 = a2 < b2 ? get_half2_high(a) : get_half2_high(b);
-  return combine_half(r1, r2);
+  __half r1 = a1 < b1 ? __low2half(a) : __low2half(b);
+  __half r2 = a2 < b2 ? __high2half(a) : __high2half(b);
+  return __halves2half2(r1, r2);
 }
 
 template<>
@@ -1664,14 +1692,12 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmax<half2>(const half2& a,
   float a2 = __high2float(a);
   float b1 = __low2float(b);
   float b2 = __high2float(b);
-  __half r1 = a1 > b1 ? get_half2_low(a) : get_half2_low(b);
-  __half r2 = a2 > b2 ? get_half2_high(a) : get_half2_high(b);
-  return combine_half(r1, r2);
+  __half r1 = a1 > b1 ? __low2half(a) : __low2half(b);
+  __half r2 = a2 > b2 ? __high2half(a) : __high2half(b);
+  return __halves2half2(r1, r2);
 }
 
-// #endif // defined(EIGEN_CUDA_ARCH) || defined(EIGEN_HIPCC) || (defined(EIGEN_CUDACC) && EIGEN_COMP_CLANG && !EIGEN_COMP_NVCC)
-
-#endif // defined(EIGEN_HAS_CUDA_FP16) || defined(EIGEN_HAS_HIP_FP16)
+#endif // (defined(EIGEN_HAS_CUDA_FP16) || defined(EIGEN_HAS_HIP_FP16)) && defined(EIGEN_GPU_COMPILE_PHASE)
 
 #undef EIGEN_GPU_HAS_LDG
 #undef EIGEN_CUDA_HAS_FP16_ARITHMETIC
diff --git a/libs/eigen/Eigen/src/Core/arch/GPU/Tuple.h b/libs/eigen/Eigen/src/Core/arch/GPU/Tuple.h
new file mode 100644
index 0000000..e223ca1
--- /dev/null
+++ b/libs/eigen/Eigen/src/Core/arch/GPU/Tuple.h
@@ -0,0 +1,302 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2021 The Eigen Team
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_TUPLE_GPU
+#define EIGEN_TUPLE_GPU
+
+#include <type_traits>
+#include <utility>
+
+// This is a replacement of std::tuple that can be used in device code.
+
+namespace Eigen {
+namespace internal {
+namespace tuple_impl {
+
+// Internal tuple implementation.
+template<size_t N, typename... Types>
+class TupleImpl;
+
+// Generic recursive tuple.
+template<size_t N, typename T1, typename... Ts>
+class TupleImpl<N, T1, Ts...> {
+ public:
+  // Tuple may contain Eigen types.
+  EIGEN_MAKE_ALIGNED_OPERATOR_NEW
+  
+  // Default constructor, enable if all types are default-constructible.
+  template<typename U1 = T1, typename EnableIf = std::enable_if_t<
+      std::is_default_constructible<U1>::value
+      && reduce_all<std::is_default_constructible<Ts>::value...>::value
+    >>
+  EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC
+  TupleImpl() : head_{}, tail_{} {}
+ 
+  // Element constructor.
+  template<typename U1, typename... Us, 
+           // Only enable if...
+           typename EnableIf = std::enable_if_t<
+              // the number of input arguments match, and ...
+              sizeof...(Us) == sizeof...(Ts) && (
+                // this does not look like a copy/move constructor.
+                N > 1 || std::is_convertible<U1, T1>::value)
+           >>
+  EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC
+  TupleImpl(U1&& arg1, Us&&... args) 
+    : head_(std::forward<U1>(arg1)), tail_(std::forward<Us>(args)...) {}
+ 
+  // The first stored value. 
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+  T1& head() {
+    return head_;
+  }
+  
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+  const T1& head() const {
+    return head_;
+  }
+  
+  // The tail values.
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+  TupleImpl<N-1, Ts...>& tail() {
+    return tail_;
+  }
+  
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+  const TupleImpl<N-1, Ts...>& tail() const {
+    return tail_;
+  }
+  
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  void swap(TupleImpl& other) {
+    using numext::swap;
+    swap(head_, other.head_);
+    swap(tail_, other.tail_);
+  }
+  
+  template<typename... UTypes>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  TupleImpl& operator=(const TupleImpl<N, UTypes...>& other) {
+    head_ = other.head_;
+    tail_ = other.tail_;
+    return *this;
+  }
+  
+  template<typename... UTypes>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  TupleImpl& operator=(TupleImpl<N, UTypes...>&& other) {
+    head_ = std::move(other.head_);
+    tail_ = std::move(other.tail_);
+    return *this;
+  }
+  
+ private:
+  // Allow related tuples to reference head_/tail_.
+  template<size_t M, typename... UTypes>
+  friend class TupleImpl;
+ 
+  T1 head_;
+  TupleImpl<N-1, Ts...> tail_;
+};
+
+// Empty tuple specialization.
+template<>
+class TupleImpl<size_t(0)> {};
+
+template<typename TupleType>
+struct is_tuple : std::false_type {};
+
+template<typename... Types>
+struct is_tuple< TupleImpl<sizeof...(Types), Types...> > : std::true_type {};
+
+// Gets an element from a tuple.
+template<size_t Idx, typename T1, typename... Ts>
+struct tuple_get_impl {
+  using TupleType = TupleImpl<sizeof...(Ts) + 1, T1, Ts...>;
+  using ReturnType = typename tuple_get_impl<Idx - 1, Ts...>::ReturnType;
+  
+  static EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+  ReturnType& run(TupleType& tuple) {
+    return tuple_get_impl<Idx-1, Ts...>::run(tuple.tail());
+  }
+
+  static EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+  const ReturnType& run(const TupleType& tuple) {
+    return tuple_get_impl<Idx-1, Ts...>::run(tuple.tail());
+  }
+};
+
+// Base case, getting the head element.
+template<typename T1, typename... Ts>
+struct tuple_get_impl<0, T1, Ts...> {
+  using TupleType = TupleImpl<sizeof...(Ts) + 1, T1, Ts...>;
+  using ReturnType = T1;
+
+  static EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+  T1& run(TupleType& tuple) {
+    return tuple.head();
+  }
+
+  static EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+  const T1& run(const TupleType& tuple) {
+    return tuple.head();
+  }
+};
+
+// Concatenates N Tuples.
+template<size_t NTuples, typename... Tuples>
+struct tuple_cat_impl;
+
+template<size_t NTuples, size_t N1, typename... Args1, size_t N2, typename... Args2, typename... Tuples>
+struct tuple_cat_impl<NTuples, TupleImpl<N1, Args1...>, TupleImpl<N2, Args2...>, Tuples...> {
+  using TupleType1 = TupleImpl<N1, Args1...>;
+  using TupleType2 = TupleImpl<N2, Args2...>;
+  using MergedTupleType = TupleImpl<N1 + N2, Args1..., Args2...>;
+  
+  using ReturnType = typename tuple_cat_impl<NTuples-1, MergedTupleType, Tuples...>::ReturnType;
+  
+  // Uses the index sequences to extract and merge elements from tuple1 and tuple2,
+  // then recursively calls again.
+  template<typename Tuple1, size_t... I1s, typename Tuple2, size_t... I2s, typename... MoreTuples>
+  static EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  ReturnType run(Tuple1&& tuple1, std::index_sequence<I1s...>,
+                 Tuple2&& tuple2, std::index_sequence<I2s...>,
+                 MoreTuples&&... tuples) {
+    return tuple_cat_impl<NTuples-1, MergedTupleType, Tuples...>::run(
+        MergedTupleType(tuple_get_impl<I1s, Args1...>::run(std::forward<Tuple1>(tuple1))...,
+                        tuple_get_impl<I2s, Args2...>::run(std::forward<Tuple2>(tuple2))...),
+        std::forward<MoreTuples>(tuples)...);
+  }
+  
+  // Concatenates the first two tuples.
+  template<typename Tuple1, typename Tuple2, typename... MoreTuples>
+  static EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  ReturnType run(Tuple1&& tuple1, Tuple2&& tuple2, MoreTuples&&... tuples) {
+    return run(std::forward<Tuple1>(tuple1), std::make_index_sequence<N1>{},
+               std::forward<Tuple2>(tuple2), std::make_index_sequence<N2>{},
+               std::forward<MoreTuples>(tuples)...);
+  }
+};
+
+// Base case with a single tuple.
+template<size_t N, typename... Args>
+struct tuple_cat_impl<1, TupleImpl<N, Args...> > { 
+  using ReturnType = TupleImpl<N, Args...>;
+  
+  template<typename Tuple1>
+  static EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  ReturnType run(Tuple1&& tuple1) {
+    return tuple1;
+  }
+};
+
+// Special case of no tuples.
+template<>
+struct tuple_cat_impl<0> { 
+  using ReturnType = TupleImpl<0>;
+  static EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  ReturnType run() {return ReturnType{}; }
+};
+
+// For use in make_tuple, unwraps a reference_wrapper.
+template <typename T>
+struct unwrap_reference_wrapper { using type = T; };
+ 
+template <typename T>
+struct unwrap_reference_wrapper<std::reference_wrapper<T> > { using type = T&; };
+
+// For use in make_tuple, decays a type and unwraps a reference_wrapper.
+template <typename T>
+struct unwrap_decay {
+  using type = typename unwrap_reference_wrapper<typename std::decay<T>::type>::type;
+};
+
+/**
+ * Utility for determining a tuple's size.
+ */
+template<typename Tuple>
+struct tuple_size;
+
+template<typename... Types >
+struct tuple_size< TupleImpl<sizeof...(Types), Types...> > : std::integral_constant<size_t, sizeof...(Types)> {};
+
+/**
+ * Gets an element of a tuple.
+ * \tparam Idx index of the element.
+ * \tparam Types ... tuple element types.
+ * \param tuple the tuple.
+ * \return a reference to the desired element.
+ */
+template<size_t Idx, typename... Types>
+EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+const typename tuple_get_impl<Idx, Types...>::ReturnType&
+get(const TupleImpl<sizeof...(Types), Types...>& tuple) {
+  return tuple_get_impl<Idx, Types...>::run(tuple);
+}
+
+template<size_t Idx, typename... Types>
+EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+typename tuple_get_impl<Idx, Types...>::ReturnType&
+get(TupleImpl<sizeof...(Types), Types...>& tuple) {
+  return tuple_get_impl<Idx, Types...>::run(tuple);
+}
+
+/**
+ * Concatenate multiple tuples.
+ * \param tuples ... list of tuples.
+ * \return concatenated tuple.
+ */
+template<typename... Tuples,
+          typename EnableIf = std::enable_if_t<
+            internal::reduce_all<
+              is_tuple<typename std::decay<Tuples>::type>::value...>::value>>
+EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+typename tuple_cat_impl<sizeof...(Tuples), typename std::decay<Tuples>::type...>::ReturnType
+tuple_cat(Tuples&&... tuples) {
+  return tuple_cat_impl<sizeof...(Tuples), typename std::decay<Tuples>::type...>::run(std::forward<Tuples>(tuples)...);
+}
+
+/**
+ * Tie arguments together into a tuple.
+ */
+template <typename... Args, typename ReturnType = TupleImpl<sizeof...(Args), Args&...> >
+EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ReturnType tie(Args&... args) EIGEN_NOEXCEPT {
+    return ReturnType{args...};
+}
+
+/**
+ * Create a tuple of l-values with the supplied arguments.
+ */
+template <typename... Args, typename ReturnType = TupleImpl<sizeof...(Args), typename unwrap_decay<Args>::type...> >
+EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ReturnType make_tuple(Args&&... args) {
+  return ReturnType{std::forward<Args>(args)...};
+}
+
+/**
+ * Forward a set of arguments as a tuple.
+ */
+template <typename... Args>
+EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+TupleImpl<sizeof...(Args), Args...> forward_as_tuple(Args&&... args) {
+  return TupleImpl<sizeof...(Args), Args...>(std::forward<Args>(args)...);
+}
+
+/**
+ * Alternative to std::tuple that can be used on device.
+ */
+template<typename... Types>
+using tuple = TupleImpl<sizeof...(Types), Types...>;
+
+}  // namespace tuple_impl
+}  // namespace internal
+}  // namespace Eigen
+
+#endif  // EIGEN_TUPLE_GPU
diff --git a/libs/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h b/libs/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h
index 7545462..6e8ba27 100644
--- a/libs/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h
+++ b/libs/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h
@@ -10,13 +10,14 @@
 #ifndef EIGEN_TYPE_CASTING_GPU_H
 #define EIGEN_TYPE_CASTING_GPU_H
 
+#include "../../InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
 
 #if (defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300) || \
-  (defined(EIGEN_HAS_HIP_FP16) && defined(EIGEN_HIP_DEVICE_COMPILE))
-
+    (defined(EIGEN_HAS_HIP_FP16) && defined(EIGEN_HIP_DEVICE_COMPILE))
 
 template <>
 struct type_casting_traits<Eigen::half, float> {
diff --git a/libs/eigen/Eigen/src/Core/arch/MSA/Complex.h b/libs/eigen/Eigen/src/Core/arch/MSA/Complex.h
index 53dacfa..b11a9b4 100644
--- a/libs/eigen/Eigen/src/Core/arch/MSA/Complex.h
+++ b/libs/eigen/Eigen/src/Core/arch/MSA/Complex.h
@@ -15,6 +15,8 @@
 
 #include <iostream>
 
+#include "../../InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
@@ -75,15 +77,12 @@ struct Packet2cf {
   EIGEN_STRONG_INLINE Packet2cf operator-(const Packet2cf& b) const {
     return Packet2cf(*this) -= b;
   }
-  EIGEN_STRONG_INLINE Packet2cf& operator/=(const Packet2cf& b) {
-    *this *= b.conjugate();
-    Packet4f s = pmul<Packet4f>(b.v, b.v);
-    s = padd(s, (Packet4f)__builtin_msa_shf_w((v4i32)s, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));
-    v = pdiv(v, s);
-    return *this;
-  }
   EIGEN_STRONG_INLINE Packet2cf operator/(const Packet2cf& b) const {
-    return Packet2cf(*this) /= b;
+    return pdiv_complex(Packet2cf(*this), b);
+  }
+  EIGEN_STRONG_INLINE Packet2cf& operator/=(const Packet2cf& b) {
+    *this = Packet2cf(*this) / b;
+    return *this;
   }
   EIGEN_STRONG_INLINE Packet2cf operator-(void) const {
     return Packet2cf(pnegate(v));
diff --git a/libs/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h b/libs/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h
index f5181b9..5932041 100644
--- a/libs/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h
+++ b/libs/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h
@@ -26,27 +26,29 @@
 #ifndef EIGEN_MATH_FUNCTIONS_MSA_H
 #define EIGEN_MATH_FUNCTIONS_MSA_H
 
+#include "../../InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
 
 template <>
-EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f
 plog<Packet4f>(const Packet4f& _x) {
-  static _EIGEN_DECLARE_CONST_Packet4f(cephes_SQRTHF, 0.707106781186547524f);
-  static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p0, 7.0376836292e-2f);
-  static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p1, -1.1514610310e-1f);
-  static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p2, 1.1676998740e-1f);
-  static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p3, -1.2420140846e-1f);
-  static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p4, +1.4249322787e-1f);
-  static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p5, -1.6668057665e-1f);
-  static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p6, +2.0000714765e-1f);
-  static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p7, -2.4999993993e-1f);
-  static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p8, +3.3333331174e-1f);
-  static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q1, -2.12194440e-4f);
-  static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q2, 0.693359375f);
-  static _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
-  static _EIGEN_DECLARE_CONST_Packet4f(1, 1.0f);
+  static EIGEN_DECLARE_CONST_Packet4f(cephes_SQRTHF, 0.707106781186547524f);
+  static EIGEN_DECLARE_CONST_Packet4f(cephes_log_p0, 7.0376836292e-2f);
+  static EIGEN_DECLARE_CONST_Packet4f(cephes_log_p1, -1.1514610310e-1f);
+  static EIGEN_DECLARE_CONST_Packet4f(cephes_log_p2, 1.1676998740e-1f);
+  static EIGEN_DECLARE_CONST_Packet4f(cephes_log_p3, -1.2420140846e-1f);
+  static EIGEN_DECLARE_CONST_Packet4f(cephes_log_p4, +1.4249322787e-1f);
+  static EIGEN_DECLARE_CONST_Packet4f(cephes_log_p5, -1.6668057665e-1f);
+  static EIGEN_DECLARE_CONST_Packet4f(cephes_log_p6, +2.0000714765e-1f);
+  static EIGEN_DECLARE_CONST_Packet4f(cephes_log_p7, -2.4999993993e-1f);
+  static EIGEN_DECLARE_CONST_Packet4f(cephes_log_p8, +3.3333331174e-1f);
+  static EIGEN_DECLARE_CONST_Packet4f(cephes_log_q1, -2.12194440e-4f);
+  static EIGEN_DECLARE_CONST_Packet4f(cephes_log_q2, 0.693359375f);
+  static EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
+  static EIGEN_DECLARE_CONST_Packet4f(1, 1.0f);
 
   // Convert negative argument into NAN (quiet negative, to be specific).
   Packet4f zero = (Packet4f)__builtin_msa_ldi_w(0);
@@ -119,23 +121,23 @@ plog<Packet4f>(const Packet4f& _x) {
 }
 
 template <>
-EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f
 pexp<Packet4f>(const Packet4f& _x) {
   // Limiting single-precision pexp's argument to [-128, +128] lets pexp
   // reach 0 and INFINITY naturally.
-  static _EIGEN_DECLARE_CONST_Packet4f(exp_lo, -128.0f);
-  static _EIGEN_DECLARE_CONST_Packet4f(exp_hi, +128.0f);
-  static _EIGEN_DECLARE_CONST_Packet4f(cephes_LOG2EF, 1.44269504088896341f);
-  static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C1, 0.693359375f);
-  static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C2, -2.12194440e-4f);
-  static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p0, 1.9875691500e-4f);
-  static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p1, 1.3981999507e-3f);
-  static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p2, 8.3334519073e-3f);
-  static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p3, 4.1665795894e-2f);
-  static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p4, 1.6666665459e-1f);
-  static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p5, 5.0000001201e-1f);
-  static _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
-  static _EIGEN_DECLARE_CONST_Packet4f(1, 1.0f);
+  static EIGEN_DECLARE_CONST_Packet4f(exp_lo, -128.0f);
+  static EIGEN_DECLARE_CONST_Packet4f(exp_hi, +128.0f);
+  static EIGEN_DECLARE_CONST_Packet4f(cephes_LOG2EF, 1.44269504088896341f);
+  static EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C1, 0.693359375f);
+  static EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C2, -2.12194440e-4f);
+  static EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p0, 1.9875691500e-4f);
+  static EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p1, 1.3981999507e-3f);
+  static EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p2, 8.3334519073e-3f);
+  static EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p3, 4.1665795894e-2f);
+  static EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p4, 1.6666665459e-1f);
+  static EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p5, 5.0000001201e-1f);
+  static EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
+  static EIGEN_DECLARE_CONST_Packet4f(1, 1.0f);
 
   Packet4f x = _x;
 
@@ -172,23 +174,23 @@ pexp<Packet4f>(const Packet4f& _x) {
 }
 
 template <>
-EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f
 ptanh<Packet4f>(const Packet4f& _x) {
-  static _EIGEN_DECLARE_CONST_Packet4f(tanh_tiny, 1e-4f);
-  static _EIGEN_DECLARE_CONST_Packet4f(tanh_hi, 9.0f);
+  static EIGEN_DECLARE_CONST_Packet4f(tanh_tiny, 1e-4f);
+  static EIGEN_DECLARE_CONST_Packet4f(tanh_hi, 9.0f);
   // The monomial coefficients of the numerator polynomial (odd).
-  static _EIGEN_DECLARE_CONST_Packet4f(alpha_1, 4.89352455891786e-3f);
-  static _EIGEN_DECLARE_CONST_Packet4f(alpha_3, 6.37261928875436e-4f);
-  static _EIGEN_DECLARE_CONST_Packet4f(alpha_5, 1.48572235717979e-5f);
-  static _EIGEN_DECLARE_CONST_Packet4f(alpha_7, 5.12229709037114e-8f);
-  static _EIGEN_DECLARE_CONST_Packet4f(alpha_9, -8.60467152213735e-11f);
-  static _EIGEN_DECLARE_CONST_Packet4f(alpha_11, 2.00018790482477e-13f);
-  static _EIGEN_DECLARE_CONST_Packet4f(alpha_13, -2.76076847742355e-16f);
+  static EIGEN_DECLARE_CONST_Packet4f(alpha_1, 4.89352455891786e-3f);
+  static EIGEN_DECLARE_CONST_Packet4f(alpha_3, 6.37261928875436e-4f);
+  static EIGEN_DECLARE_CONST_Packet4f(alpha_5, 1.48572235717979e-5f);
+  static EIGEN_DECLARE_CONST_Packet4f(alpha_7, 5.12229709037114e-8f);
+  static EIGEN_DECLARE_CONST_Packet4f(alpha_9, -8.60467152213735e-11f);
+  static EIGEN_DECLARE_CONST_Packet4f(alpha_11, 2.00018790482477e-13f);
+  static EIGEN_DECLARE_CONST_Packet4f(alpha_13, -2.76076847742355e-16f);
   // The monomial coefficients of the denominator polynomial (even).
-  static _EIGEN_DECLARE_CONST_Packet4f(beta_0, 4.89352518554385e-3f);
-  static _EIGEN_DECLARE_CONST_Packet4f(beta_2, 2.26843463243900e-3f);
-  static _EIGEN_DECLARE_CONST_Packet4f(beta_4, 1.18534705686654e-4f);
-  static _EIGEN_DECLARE_CONST_Packet4f(beta_6, 1.19825839466702e-6f);
+  static EIGEN_DECLARE_CONST_Packet4f(beta_0, 4.89352518554385e-3f);
+  static EIGEN_DECLARE_CONST_Packet4f(beta_2, 2.26843463243900e-3f);
+  static EIGEN_DECLARE_CONST_Packet4f(beta_4, 1.18534705686654e-4f);
+  static EIGEN_DECLARE_CONST_Packet4f(beta_6, 1.19825839466702e-6f);
 
   Packet4f x = pabs(_x);
   Packet4i tiny_mask = __builtin_msa_fclt_w(x, p4f_tanh_tiny);
@@ -229,19 +231,19 @@ ptanh<Packet4f>(const Packet4f& _x) {
 
 template <bool sine>
 Packet4f psincos_inner_msa_float(const Packet4f& _x) {
-  static _EIGEN_DECLARE_CONST_Packet4f(sincos_max_arg, 13176795.0f);  // Approx. (2**24) / (4/Pi).
-  static _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP1, -0.78515625f);
-  static _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP2, -2.4187564849853515625e-4f);
-  static _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP3, -3.77489497744594108e-8f);
-  static _EIGEN_DECLARE_CONST_Packet4f(sincof_p0, -1.9515295891e-4f);
-  static _EIGEN_DECLARE_CONST_Packet4f(sincof_p1, 8.3321608736e-3f);
-  static _EIGEN_DECLARE_CONST_Packet4f(sincof_p2, -1.6666654611e-1f);
-  static _EIGEN_DECLARE_CONST_Packet4f(coscof_p0, 2.443315711809948e-5f);
-  static _EIGEN_DECLARE_CONST_Packet4f(coscof_p1, -1.388731625493765e-3f);
-  static _EIGEN_DECLARE_CONST_Packet4f(coscof_p2, 4.166664568298827e-2f);
-  static _EIGEN_DECLARE_CONST_Packet4f(cephes_FOPI, 1.27323954473516f);  // 4/Pi.
-  static _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
-  static _EIGEN_DECLARE_CONST_Packet4f(1, 1.0f);
+  static EIGEN_DECLARE_CONST_Packet4f(sincos_max_arg, 13176795.0f);  // Approx. (2**24) / (4/Pi).
+  static EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP1, -0.78515625f);
+  static EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP2, -2.4187564849853515625e-4f);
+  static EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP3, -3.77489497744594108e-8f);
+  static EIGEN_DECLARE_CONST_Packet4f(sincof_p0, -1.9515295891e-4f);
+  static EIGEN_DECLARE_CONST_Packet4f(sincof_p1, 8.3321608736e-3f);
+  static EIGEN_DECLARE_CONST_Packet4f(sincof_p2, -1.6666654611e-1f);
+  static EIGEN_DECLARE_CONST_Packet4f(coscof_p0, 2.443315711809948e-5f);
+  static EIGEN_DECLARE_CONST_Packet4f(coscof_p1, -1.388731625493765e-3f);
+  static EIGEN_DECLARE_CONST_Packet4f(coscof_p2, 4.166664568298827e-2f);
+  static EIGEN_DECLARE_CONST_Packet4f(cephes_FOPI, 1.27323954473516f);  // 4/Pi.
+  static EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
+  static EIGEN_DECLARE_CONST_Packet4f(1, 1.0f);
 
   Packet4f x = pabs(_x);
 
@@ -310,37 +312,37 @@ Packet4f psincos_inner_msa_float(const Packet4f& _x) {
 }
 
 template <>
-EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f
 psin<Packet4f>(const Packet4f& x) {
   return psincos_inner_msa_float</* sine */ true>(x);
 }
 
 template <>
-EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f
 pcos<Packet4f>(const Packet4f& x) {
   return psincos_inner_msa_float</* sine */ false>(x);
 }
 
 template <>
-EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet2d
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet2d
 pexp<Packet2d>(const Packet2d& _x) {
   // Limiting double-precision pexp's argument to [-1024, +1024] lets pexp
   // reach 0 and INFINITY naturally.
-  static _EIGEN_DECLARE_CONST_Packet2d(exp_lo, -1024.0);
-  static _EIGEN_DECLARE_CONST_Packet2d(exp_hi, +1024.0);
-  static _EIGEN_DECLARE_CONST_Packet2d(cephes_LOG2EF, 1.4426950408889634073599);
-  static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C1, 0.693145751953125);
-  static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C2, 1.42860682030941723212e-6);
-  static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p0, 1.26177193074810590878e-4);
-  static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p1, 3.02994407707441961300e-2);
-  static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p2, 9.99999999999999999910e-1);
-  static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q0, 3.00198505138664455042e-6);
-  static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q1, 2.52448340349684104192e-3);
-  static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q2, 2.27265548208155028766e-1);
-  static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q3, 2.00000000000000000009e0);
-  static _EIGEN_DECLARE_CONST_Packet2d(half, 0.5);
-  static _EIGEN_DECLARE_CONST_Packet2d(1, 1.0);
-  static _EIGEN_DECLARE_CONST_Packet2d(2, 2.0);
+  static EIGEN_DECLARE_CONST_Packet2d(exp_lo, -1024.0);
+  static EIGEN_DECLARE_CONST_Packet2d(exp_hi, +1024.0);
+  static EIGEN_DECLARE_CONST_Packet2d(cephes_LOG2EF, 1.4426950408889634073599);
+  static EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C1, 0.693145751953125);
+  static EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C2, 1.42860682030941723212e-6);
+  static EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p0, 1.26177193074810590878e-4);
+  static EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p1, 3.02994407707441961300e-2);
+  static EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p2, 9.99999999999999999910e-1);
+  static EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q0, 3.00198505138664455042e-6);
+  static EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q1, 2.52448340349684104192e-3);
+  static EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q2, 2.27265548208155028766e-1);
+  static EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q3, 2.00000000000000000009e0);
+  static EIGEN_DECLARE_CONST_Packet2d(half, 0.5);
+  static EIGEN_DECLARE_CONST_Packet2d(1, 1.0);
+  static EIGEN_DECLARE_CONST_Packet2d(2, 2.0);
 
   Packet2d x = _x;
 
diff --git a/libs/eigen/Eigen/src/Core/arch/MSA/PacketMath.h b/libs/eigen/Eigen/src/Core/arch/MSA/PacketMath.h
index afe8f33..f03dbed 100644
--- a/libs/eigen/Eigen/src/Core/arch/MSA/PacketMath.h
+++ b/libs/eigen/Eigen/src/Core/arch/MSA/PacketMath.h
@@ -16,6 +16,8 @@
 #include <iostream>
 #include <string>
 
+#include "../../InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
@@ -51,9 +53,9 @@ typedef v4f32 Packet4f;
 typedef v4i32 Packet4i;
 typedef v4u32 Packet4ui;
 
-#define _EIGEN_DECLARE_CONST_Packet4f(NAME, X) const Packet4f p4f_##NAME = { X, X, X, X }
-#define _EIGEN_DECLARE_CONST_Packet4i(NAME, X) const Packet4i p4i_##NAME = { X, X, X, X }
-#define _EIGEN_DECLARE_CONST_Packet4ui(NAME, X) const Packet4ui p4ui_##NAME = { X, X, X, X }
+#define EIGEN_DECLARE_CONST_Packet4f(NAME, X) const Packet4f p4f_##NAME = { X, X, X, X }
+#define EIGEN_DECLARE_CONST_Packet4i(NAME, X) const Packet4i p4i_##NAME = { X, X, X, X }
+#define EIGEN_DECLARE_CONST_Packet4ui(NAME, X) const Packet4ui p4ui_##NAME = { X, X, X, X }
 
 inline std::ostream& operator<<(std::ostream& os, const Packet4f& value) {
   os << "[ " << value[0] << ", " << value[1] << ", " << value[2] << ", " << value[3] << " ]";
@@ -821,9 +823,9 @@ typedef v2f64 Packet2d;
 typedef v2i64 Packet2l;
 typedef v2u64 Packet2ul;
 
-#define _EIGEN_DECLARE_CONST_Packet2d(NAME, X) const Packet2d p2d_##NAME = { X, X }
-#define _EIGEN_DECLARE_CONST_Packet2l(NAME, X) const Packet2l p2l_##NAME = { X, X }
-#define _EIGEN_DECLARE_CONST_Packet2ul(NAME, X) const Packet2ul p2ul_##NAME = { X, X }
+#define EIGEN_DECLARE_CONST_Packet2d(NAME, X) const Packet2d p2d_##NAME = { X, X }
+#define EIGEN_DECLARE_CONST_Packet2l(NAME, X) const Packet2l p2l_##NAME = { X, X }
+#define EIGEN_DECLARE_CONST_Packet2ul(NAME, X) const Packet2ul p2ul_##NAME = { X, X }
 
 inline std::ostream& operator<<(std::ostream& os, const Packet2d& value) {
   os << "[ " << value[0] << ", " << value[1] << " ]";
diff --git a/libs/eigen/Eigen/src/Core/arch/NEON/Complex.h b/libs/eigen/Eigen/src/Core/arch/NEON/Complex.h
index f40af7f..008dd7a 100644
--- a/libs/eigen/Eigen/src/Core/arch/NEON/Complex.h
+++ b/libs/eigen/Eigen/src/Core/arch/NEON/Complex.h
@@ -11,6 +11,8 @@
 #ifndef EIGEN_COMPLEX_NEON_H
 #define EIGEN_COMPLEX_NEON_H
 
+#include "../../InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
@@ -347,27 +349,11 @@ EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
 
 template<> EIGEN_STRONG_INLINE Packet1cf pdiv<Packet1cf>(const Packet1cf& a, const Packet1cf& b)
 {
-  // TODO optimize it for NEON
-  Packet1cf res = pmul(a, pconj(b));
-  Packet2f s, rev_s;
-
-  // this computes the norm
-  s = vmul_f32(b.v, b.v);
-  rev_s = vrev64_f32(s);
-
-  return Packet1cf(pdiv<Packet2f>(res.v, vadd_f32(s, rev_s)));
+  return pdiv_complex(a, b);
 }
 template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
 {
-  // TODO optimize it for NEON
-  Packet2cf res = pmul(a,pconj(b));
-  Packet4f s, rev_s;
-
-  // this computes the norm
-  s = vmulq_f32(b.v, b.v);
-  rev_s = vrev64q_f32(s);
-
-  return Packet2cf(pdiv<Packet4f>(res.v, vaddq_f32(s, rev_s)));
+  return pdiv_complex(a, b);
 }
 
 EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet1cf, 1>& /*kernel*/) {}
@@ -390,7 +376,7 @@ template<> EIGEN_STRONG_INLINE Packet2cf psqrt<Packet2cf>(const Packet2cf& a) {
 #if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG
 
 // See bug 1325, clang fails to call vld1q_u64.
-#if EIGEN_COMP_CLANG || EIGEN_COMP_CASTXML
+#if EIGEN_COMP_CLANG || EIGEN_COMP_CASTXML || EIGEN_COMP_CPE
   static uint64x2_t p2ul_CONJ_XOR = {0x0, 0x8000000000000000};
 #else
   const uint64_t  p2ul_conj_XOR_DATA[] = { 0x0, 0x8000000000000000 };
@@ -553,12 +539,7 @@ EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
 
 template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
 {
-  // TODO optimize it for NEON
-  Packet1cd res = pmul(a,pconj(b));
-  Packet2d s = pmul<Packet2d>(b.v, b.v);
-  Packet2d rev_s = preverse<Packet2d>(s);
-
-  return Packet1cd(pdiv(res.v, padd<Packet2d>(s,rev_s)));
+  return pdiv_complex(a, b);
 }
 
 EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x)
diff --git a/libs/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h b/libs/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h
index 3481f33..b97a090 100644
--- a/libs/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h
+++ b/libs/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h
@@ -1,6 +1,8 @@
+#include "../../InternalHeaderCheck.h"
+
 namespace Eigen {
 namespace internal {
-  
+
 #if EIGEN_ARCH_ARM && EIGEN_COMP_CLANG
 
 // Clang seems to excessively spill registers in the GEBP kernel on 32-bit arm.
@@ -41,15 +43,18 @@ struct gebp_traits <float,float,false,false,Architecture::NEON,GEBPPacketFull>
 
 #if EIGEN_ARCH_ARM64
 
+#ifndef EIGEN_NEON_GEBP_NR
+#define EIGEN_NEON_GEBP_NR 8
+#endif
+
 template<>
 struct gebp_traits <float,float,false,false,Architecture::NEON,GEBPPacketFull>
  : gebp_traits<float,float,false,false,Architecture::Generic,GEBPPacketFull>
 {
   typedef float RhsPacket;
   typedef float32x4_t RhsPacketx4;
-
-  EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacket& dest) const
-  {
+  enum { nr = EIGEN_NEON_GEBP_NR };
+  EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacket& dest) const {
     dest = *b;
   }
 
@@ -75,7 +80,6 @@ struct gebp_traits <float,float,false,false,Architecture::NEON,GEBPPacketFull>
   {
     c = vfmaq_n_f32(c, a, b);
   }
-
   // NOTE: Template parameter inference failed when compiled with Android NDK:
   // "candidate template ignored: could not match 'FixedInt<N>' against 'Eigen::internal::FixedInt<0>".
 
@@ -92,9 +96,10 @@ struct gebp_traits <float,float,false,false,Architecture::NEON,GEBPPacketFull>
   template<int LaneID>
   EIGEN_STRONG_INLINE void madd_helper(const LhsPacket& a, const RhsPacketx4& b, AccPacket& c) const
   {
-    #if EIGEN_COMP_GNUC_STRICT && !(EIGEN_GNUC_AT_LEAST(9,0))
-    // workaround gcc issue https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89101
-    // vfmaq_laneq_f32 is implemented through a costly dup
+    #if EIGEN_COMP_GNUC_STRICT
+    // 1. workaround gcc issue https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89101
+    //    vfmaq_laneq_f32 is implemented through a costly dup, which was fixed in gcc9
+    // 2. workaround the gcc register split problem on arm64-neon
          if(LaneID==0)  asm("fmla %0.4s, %1.4s, %2.s[0]\n" : "+w" (c) : "w" (a), "w" (b) :  );
     else if(LaneID==1)  asm("fmla %0.4s, %1.4s, %2.s[1]\n" : "+w" (c) : "w" (a), "w" (b) :  );
     else if(LaneID==2)  asm("fmla %0.4s, %1.4s, %2.s[2]\n" : "+w" (c) : "w" (a), "w" (b) :  );
@@ -111,7 +116,7 @@ struct gebp_traits <double,double,false,false,Architecture::NEON>
  : gebp_traits<double,double,false,false,Architecture::Generic>
 {
   typedef double RhsPacket;
-
+  enum { nr = EIGEN_NEON_GEBP_NR };
   struct RhsPacketx4 {
     float64x2_t B_0, B_1;
   };
@@ -161,9 +166,10 @@ struct gebp_traits <double,double,false,false,Architecture::NEON>
   template <int LaneID>
   EIGEN_STRONG_INLINE void madd_helper(const LhsPacket& a, const RhsPacketx4& b, AccPacket& c) const
   {
-    #if EIGEN_COMP_GNUC_STRICT && !(EIGEN_GNUC_AT_LEAST(9,0))
-    // workaround gcc issue https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89101
-    // vfmaq_laneq_f64 is implemented through a costly dup
+    #if EIGEN_COMP_GNUC_STRICT
+    // 1. workaround gcc issue https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89101
+    //    vfmaq_laneq_f64 is implemented through a costly dup, which was fixed in gcc9
+    // 2. workaround the gcc register split problem on arm64-neon
          if(LaneID==0)  asm("fmla %0.2d, %1.2d, %2.d[0]\n" : "+w" (c) : "w" (a), "w" (b.B_0) :  );
     else if(LaneID==1)  asm("fmla %0.2d, %1.2d, %2.d[1]\n" : "+w" (c) : "w" (a), "w" (b.B_0) :  );
     else if(LaneID==2)  asm("fmla %0.2d, %1.2d, %2.d[0]\n" : "+w" (c) : "w" (a), "w" (b.B_1) :  );
@@ -177,6 +183,73 @@ struct gebp_traits <double,double,false,false,Architecture::NEON>
   }
 };
 
+// The register at operand 3 of fmla for data type half must be v0~v15, the compiler may not
+// allocate a required register for the '%2' of inline asm 'fmla %0.8h, %1.8h, %2.h[id]',
+// so inline assembly can't be used here to advoid the bug that vfmaq_lane_f16 is implemented
+// through a costly dup in gcc compiler.
+#if EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC && EIGEN_COMP_CLANG
+
+template<>
+struct gebp_traits <half,half,false,false,Architecture::NEON>
+ : gebp_traits<half,half,false,false,Architecture::Generic>
+{
+  typedef half RhsPacket;
+  typedef float16x4_t RhsPacketx4;
+  typedef float16x4_t PacketHalf;
+  enum { nr = EIGEN_NEON_GEBP_NR };
+
+  EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacket& dest) const
+  {
+    dest = *b;
+  }
+
+  EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacketx4& dest) const
+  {
+    dest = vld1_f16((const __fp16 *)b);
+  }
+
+  EIGEN_STRONG_INLINE void updateRhs(const RhsScalar* b, RhsPacket& dest) const
+  {
+    dest = *b;
+  }
+
+  EIGEN_STRONG_INLINE void updateRhs(const RhsScalar*, RhsPacketx4&) const
+  {}
+
+  EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, RhsPacket& dest) const
+  {
+    // If LHS is a Packet8h, we cannot correctly mimic a ploadquad of the RHS
+    // using a single scalar value.
+    eigen_assert(false && "Cannot loadRhsQuad for a scalar RHS.");
+  }
+
+  EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& /*tmp*/, const FixedInt<0>&) const
+  {
+    c = vfmaq_n_f16(c, a, b);
+  }
+  EIGEN_STRONG_INLINE void madd(const PacketHalf& a, const RhsPacket& b, PacketHalf& c, RhsPacket& /*tmp*/, const FixedInt<0>&) const
+  {
+    c = vfma_n_f16(c, a, b);
+  }
+
+  // NOTE: Template parameter inference failed when compiled with Android NDK:
+  // "candidate template ignored: could not match 'FixedInt<N>' against 'Eigen::internal::FixedInt<0>".
+  EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacketx4& b, AccPacket& c, RhsPacket& /*tmp*/, const FixedInt<0>&) const
+  { madd_helper<0>(a, b, c); }
+  EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacketx4& b, AccPacket& c, RhsPacket& /*tmp*/, const FixedInt<1>&) const
+  { madd_helper<1>(a, b, c); }
+  EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacketx4& b, AccPacket& c, RhsPacket& /*tmp*/, const FixedInt<2>&) const
+  { madd_helper<2>(a, b, c); }
+  EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacketx4& b, AccPacket& c, RhsPacket& /*tmp*/, const FixedInt<3>&) const
+  { madd_helper<3>(a, b, c); }
+ private:
+  template<int LaneID>
+  EIGEN_STRONG_INLINE void madd_helper(const LhsPacket& a, const RhsPacketx4& b, AccPacket& c) const
+  {
+    c = vfmaq_lane_f16(c, a, b, LaneID);
+  }
+};
+#endif // EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC && EIGEN_COMP_CLANG
 #endif // EIGEN_ARCH_ARM64
 
 }  // namespace internal
diff --git a/libs/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h b/libs/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h
index fa6615a..aea5149 100644
--- a/libs/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h
+++ b/libs/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h
@@ -8,36 +8,72 @@
 #ifndef EIGEN_MATH_FUNCTIONS_NEON_H
 #define EIGEN_MATH_FUNCTIONS_NEON_H
 
+#include "../../InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
 
-template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet2f pexp<Packet2f>(const Packet2f& x)
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet2f pexp<Packet2f>(const Packet2f& x)
 { return pexp_float(x); }
-template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f pexp<Packet4f>(const Packet4f& x)
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f pexp<Packet4f>(const Packet4f& x)
 { return pexp_float(x); }
 
-template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet2f plog<Packet2f>(const Packet2f& x)
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet2f plog<Packet2f>(const Packet2f& x)
 { return plog_float(x); }
-template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f plog<Packet4f>(const Packet4f& x)
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f plog<Packet4f>(const Packet4f& x)
 { return plog_float(x); }
 
-template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet2f psin<Packet2f>(const Packet2f& x)
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet2f psin<Packet2f>(const Packet2f& x)
 { return psin_float(x); }
-template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f psin<Packet4f>(const Packet4f& x)
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f psin<Packet4f>(const Packet4f& x)
 { return psin_float(x); }
 
-template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet2f pcos<Packet2f>(const Packet2f& x)
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet2f pcos<Packet2f>(const Packet2f& x)
 { return pcos_float(x); }
-template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f pcos<Packet4f>(const Packet4f& x)
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f pcos<Packet4f>(const Packet4f& x)
 { return pcos_float(x); }
 
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet2f pacos<Packet2f>(const Packet2f& x)
+{ return pacos_float(x); }
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f pacos<Packet4f>(const Packet4f& x)
+{ return pacos_float(x); }
+
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet2f pasin<Packet2f>(const Packet2f& x)
+{ return pasin_float(x); }
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f pasin<Packet4f>(const Packet4f& x)
+{ return pasin_float(x); }
+
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet2f patan<Packet2f>(const Packet2f& x)
+{ return patan_float(x); }
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f patan<Packet4f>(const Packet4f& x)
+{ return patan_float(x); }
+
 // Hyperbolic Tangent function.
-template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet2f ptanh<Packet2f>(const Packet2f& x)
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet2f ptanh<Packet2f>(const Packet2f& x)
 { return internal::generic_fast_tanh_float(x); }
-template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f ptanh<Packet4f>(const Packet4f& x)
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f ptanh<Packet4f>(const Packet4f& x)
 { return internal::generic_fast_tanh_float(x); }
 
+#if EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC
+template <>
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
+Packet4hf ptanh<Packet4hf>(const Packet4hf& x) {
+  // Convert to float, call the float ptanh, and then convert back.
+  return vcvt_f16_f32(ptanh<Packet4f>(vcvt_f32_f16(x)));
+}
+
+template <>
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
+Packet8hf ptanh<Packet8hf>(const Packet8hf& x) {
+  // Convert each 4 halfs to float, call the float ptanh, and then convert back.
+  return vcombine_f16(
+    vcvt_f16_f32(ptanh<Packet4f>(vcvt_f32_f16(vget_low_f16(x)))),
+    vcvt_f16_f32(ptanh<Packet4f>(vcvt_high_f32_f16(x))));
+}
+#endif // EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC
+
+
 BF16_PACKET_FUNCTION(Packet4f, Packet4bf, psin)
 BF16_PACKET_FUNCTION(Packet4f, Packet4bf, pcos)
 BF16_PACKET_FUNCTION(Packet4f, Packet4bf, plog)
@@ -60,12 +96,15 @@ EIGEN_STRONG_INLINE Packet4bf pldexp(const Packet4bf& a, const Packet4bf& expone
 //---------- double ----------
 
 #if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG
-template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet2d pexp<Packet2d>(const Packet2d& x)
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet2d pexp<Packet2d>(const Packet2d& x)
 { return pexp_double(x); }
 
-template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet2d plog<Packet2d>(const Packet2d& x)
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet2d plog<Packet2d>(const Packet2d& x)
 { return plog_double(x); }
 
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet2d patan<Packet2d>(const Packet2d& x)
+{ return patan_double(x); }
+
 #endif
 
 } // end namespace internal
diff --git a/libs/eigen/Eigen/src/Core/arch/NEON/PacketMath.h b/libs/eigen/Eigen/src/Core/arch/NEON/PacketMath.h
index d2aeef4..8dd288b 100644
--- a/libs/eigen/Eigen/src/Core/arch/NEON/PacketMath.h
+++ b/libs/eigen/Eigen/src/Core/arch/NEON/PacketMath.h
@@ -12,6 +12,8 @@
 #ifndef EIGEN_PACKET_MATH_NEON_H
 #define EIGEN_PACKET_MATH_NEON_H
 
+#include "../../InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
@@ -137,13 +139,13 @@ EIGEN_STRONG_INLINE Packet4f vec4f_unpackhi(const Packet4f& a, const Packet4f& b
 #define vec4f_duplane(a, p) \
   vdupq_lane_f32(vget_low_f32(a), p)
 
-#define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
+#define EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
   const Packet4f p4f_##NAME = pset1<Packet4f>(X)
 
-#define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
+#define EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
   const Packet4f p4f_##NAME = vreinterpretq_f32_u32(pset1<int32_t>(X))
 
-#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
+#define EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
   const Packet4i p4i_##NAME = pset1<Packet4i>(X)
 
 #if EIGEN_ARCH_ARM64
@@ -155,7 +157,7 @@ EIGEN_STRONG_INLINE Packet4f vec4f_unpackhi(const Packet4f& a, const Packet4f& b
   #define EIGEN_ARM_PREFETCH(ADDR) __builtin_prefetch(ADDR);
 #elif defined __pld
   #define EIGEN_ARM_PREFETCH(ADDR) __pld(ADDR)
-#elif EIGEN_ARCH_ARM32
+#elif EIGEN_ARCH_ARM
   #define EIGEN_ARM_PREFETCH(ADDR) __asm__ __volatile__ ("pld [%[addr]]\n" :: [addr] "r" (ADDR) : );
 #else
   // by default no explicit prefetching
@@ -196,6 +198,9 @@ struct packet_traits<float> : default_packet_traits
 
     HasSin  = EIGEN_FAST_MATH,
     HasCos  = EIGEN_FAST_MATH,
+    HasACos  = 1,
+    HasASin  = 1,
+    HasATan  = 1,
     HasLog  = 1,
     HasExp  = 1,
     HasSqrt = 1,
@@ -219,6 +224,7 @@ struct packet_traits<int8_t> : default_packet_traits
     size = 16,
     HasHalfPacket = 1,
 
+    HasCmp       = 1,
     HasAdd       = 1,
     HasSub       = 1,
     HasShift     = 1,
@@ -248,6 +254,7 @@ struct packet_traits<uint8_t> : default_packet_traits
     size = 16,
     HasHalfPacket = 1,
 
+    HasCmp       = 1,
     HasAdd       = 1,
     HasSub       = 1,
     HasShift     = 1,
@@ -313,7 +320,7 @@ struct packet_traits<uint16_t> : default_packet_traits
     HasShift     = 1,
     HasMul       = 1,
     HasNegate    = 0,
-    HasAbs       = 0,
+    HasAbs       = 1,
     HasAbsDiff   = 1,
     HasArg       = 0,
     HasAbs2      = 1,
@@ -372,7 +379,7 @@ struct packet_traits<uint32_t> : default_packet_traits
     HasShift     = 1,
     HasMul       = 1,
     HasNegate    = 0,
-    HasAbs       = 0,
+    HasAbs       = 1,
     HasArg       = 0,
     HasAbs2      = 1,
     HasAbsDiff   = 1,
@@ -434,7 +441,7 @@ struct packet_traits<uint64_t> : default_packet_traits
     HasShift     = 1,
     HasMul       = 1,
     HasNegate    = 0,
-    HasAbs       = 0,
+    HasAbs       = 1,
     HasArg       = 0,
     HasAbs2      = 1,
     HasAbsDiff   = 1,
@@ -446,15 +453,6 @@ struct packet_traits<uint64_t> : default_packet_traits
   };
 };
 
-#if EIGEN_GNUC_AT_MOST(4, 4) && !EIGEN_COMP_LLVM
-// workaround gcc 4.2, 4.3 and 4.4 compilation issue
-EIGEN_STRONG_INLINE float32x4_t vld1q_f32(const float* x) { return ::vld1q_f32((const float32_t*)x); }
-EIGEN_STRONG_INLINE float32x2_t vld1_f32(const float* x) { return ::vld1_f32 ((const float32_t*)x); }
-EIGEN_STRONG_INLINE float32x2_t vld1_dup_f32(const float* x) { return ::vld1_dup_f32 ((const float32_t*)x); }
-EIGEN_STRONG_INLINE void vst1q_f32(float* to, float32x4_t from) { ::vst1q_f32((float32_t*)to,from); }
-EIGEN_STRONG_INLINE void vst1_f32 (float* to, float32x2_t from) { ::vst1_f32 ((float32_t*)to,from); }
-#endif
-
 template<> struct unpacket_traits<Packet2f>
 {
   typedef float type;
@@ -712,9 +710,9 @@ template<> EIGEN_STRONG_INLINE Packet4ui pset1<Packet4ui>(const uint32_t& from)
 template<> EIGEN_STRONG_INLINE Packet2l pset1<Packet2l>(const int64_t& from) { return vdupq_n_s64(from); }
 template<> EIGEN_STRONG_INLINE Packet2ul pset1<Packet2ul>(const uint64_t& from) { return vdupq_n_u64(from); }
 
-template<> EIGEN_STRONG_INLINE Packet2f pset1frombits<Packet2f>(unsigned int from)
+template<> EIGEN_STRONG_INLINE Packet2f pset1frombits<Packet2f>(uint32_t from)
 { return vreinterpret_f32_u32(vdup_n_u32(from)); }
-template<> EIGEN_STRONG_INLINE Packet4f pset1frombits<Packet4f>(unsigned int from)
+template<> EIGEN_STRONG_INLINE Packet4f pset1frombits<Packet4f>(uint32_t from)
 { return vreinterpretq_f32_u32(vdupq_n_u32(from)); }
 
 template<> EIGEN_STRONG_INLINE Packet2f plset<Packet2f>(const float& a)
@@ -2374,6 +2372,15 @@ template<> EIGEN_STRONG_INLINE Packet2l pabs(const Packet2l& a) {
 }
 template<> EIGEN_STRONG_INLINE Packet2ul pabs(const Packet2ul& a) { return a; }
 
+template <>
+EIGEN_STRONG_INLINE Packet2f psignbit(const Packet2f& a) {
+  return vreinterpret_f32_s32(vshr_n_s32(vreinterpret_s32_f32(a), 31));
+}
+template <>
+EIGEN_STRONG_INLINE Packet4f psignbit(const Packet4f& a) {
+  return vreinterpretq_f32_s32(vshrq_n_s32(vreinterpretq_s32_f32(a), 31));
+}
+
 template<> EIGEN_STRONG_INLINE Packet2f pfrexp<Packet2f>(const Packet2f& a, Packet2f& exponent)
 { return pfrexp_generic(a,exponent); }
 template<> EIGEN_STRONG_INLINE Packet4f pfrexp<Packet4f>(const Packet4f& a, Packet4f& exponent)
@@ -2384,12 +2391,17 @@ template<> EIGEN_STRONG_INLINE Packet2f pldexp<Packet2f>(const Packet2f& a, cons
 template<> EIGEN_STRONG_INLINE Packet4f pldexp<Packet4f>(const Packet4f& a, const Packet4f& exponent)
 { return pldexp_generic(a,exponent); }
 
+#if EIGEN_ARCH_ARM64
+template<> EIGEN_STRONG_INLINE float predux<Packet2f>(const Packet2f& a) { return vaddv_f32(a); }
+template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a) { return vaddvq_f32(a); }
+#else
 template<> EIGEN_STRONG_INLINE float predux<Packet2f>(const Packet2f& a) { return vget_lane_f32(vpadd_f32(a,a), 0); }
 template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
 {
   const float32x2_t sum = vadd_f32(vget_low_f32(a), vget_high_f32(a));
   return vget_lane_f32(vpadd_f32(sum, sum), 0);
 }
+#endif
 template<> EIGEN_STRONG_INLINE int8_t predux<Packet4c>(const Packet4c& a)
 {
   const int8x8_t a_dup = vreinterpret_s8_s32(vdup_n_s32(a));
@@ -2397,6 +2409,10 @@ template<> EIGEN_STRONG_INLINE int8_t predux<Packet4c>(const Packet4c& a)
   sum = vpadd_s8(sum, sum);
   return vget_lane_s8(sum, 0);
 }
+#if EIGEN_ARCH_ARM64
+template<> EIGEN_STRONG_INLINE int8_t predux<Packet8c>(const Packet8c& a) { return vaddv_s8(a); }
+template<> EIGEN_STRONG_INLINE int8_t predux<Packet16c>(const Packet16c& a) { return vaddvq_s8(a); }
+#else
 template<> EIGEN_STRONG_INLINE int8_t predux<Packet8c>(const Packet8c& a)
 {
   int8x8_t sum = vpadd_s8(a,a);
@@ -2412,6 +2428,7 @@ template<> EIGEN_STRONG_INLINE int8_t predux<Packet16c>(const Packet16c& a)
   sum = vpadd_s8(sum, sum);
   return vget_lane_s8(sum, 0);
 }
+#endif
 template<> EIGEN_STRONG_INLINE uint8_t predux<Packet4uc>(const Packet4uc& a)
 {
   const uint8x8_t a_dup = vreinterpret_u8_u32(vdup_n_u32(a));
@@ -2419,6 +2436,20 @@ template<> EIGEN_STRONG_INLINE uint8_t predux<Packet4uc>(const Packet4uc& a)
   sum = vpadd_u8(sum, sum);
   return vget_lane_u8(sum, 0);
 }
+#if EIGEN_ARCH_ARM64
+template<> EIGEN_STRONG_INLINE uint8_t predux<Packet8uc>(const Packet8uc& a) { return vaddv_u8(a); }
+template<> EIGEN_STRONG_INLINE uint8_t predux<Packet16uc>(const Packet16uc& a) { return vaddvq_u8(a); }
+template<> EIGEN_STRONG_INLINE int16_t predux<Packet4s>(const Packet4s& a) { return vaddv_s16(a); }
+template<> EIGEN_STRONG_INLINE int16_t predux<Packet8s>(const Packet8s& a) { return vaddvq_s16(a); }
+template<> EIGEN_STRONG_INLINE uint16_t predux<Packet4us>(const Packet4us& a) { return vaddv_u16(a); }
+template<> EIGEN_STRONG_INLINE uint16_t predux<Packet8us>(const Packet8us& a) { return vaddvq_u16(a); }
+template<> EIGEN_STRONG_INLINE int32_t predux<Packet2i>(const Packet2i& a) { return vaddv_s32(a); }
+template<> EIGEN_STRONG_INLINE int32_t predux<Packet4i>(const Packet4i& a) { return vaddvq_s32(a); }
+template<> EIGEN_STRONG_INLINE uint32_t predux<Packet2ui>(const Packet2ui& a) { return vaddv_u32(a); }
+template<> EIGEN_STRONG_INLINE uint32_t predux<Packet4ui>(const Packet4ui& a) { return vaddvq_u32(a); }
+template<> EIGEN_STRONG_INLINE int64_t predux<Packet2l>(const Packet2l& a) { return vaddvq_s64(a); }
+template<> EIGEN_STRONG_INLINE uint64_t predux<Packet2ul>(const Packet2ul& a) { return vaddvq_u64(a); }
+#else
 template<> EIGEN_STRONG_INLINE uint8_t predux<Packet8uc>(const Packet8uc& a)
 {
   uint8x8_t sum = vpadd_u8(a,a);
@@ -2474,6 +2505,7 @@ template<> EIGEN_STRONG_INLINE int64_t predux<Packet2l>(const Packet2l& a)
 { return vgetq_lane_s64(a, 0) + vgetq_lane_s64(a, 1); }
 template<> EIGEN_STRONG_INLINE uint64_t predux<Packet2ul>(const Packet2ul& a)
 { return vgetq_lane_u64(a, 0) + vgetq_lane_u64(a, 1); }
+#endif
 
 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4c predux_half_dowto4(const Packet8c& a)
 {
@@ -2574,6 +2606,10 @@ template<> EIGEN_STRONG_INLINE uint64_t predux_mul<Packet2ul>(const Packet2ul& a
 { return vgetq_lane_u64(a, 0) * vgetq_lane_u64(a, 1); }
 
 // min
+#if EIGEN_ARCH_ARM64
+template<> EIGEN_STRONG_INLINE float predux_min<Packet2f>(const Packet2f& a) { return vminv_f32(a); }
+template<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a) { return vminvq_f32(a); }
+#else
 template<> EIGEN_STRONG_INLINE float predux_min<Packet2f>(const Packet2f& a)
 { return vget_lane_f32(vpmin_f32(a,a), 0); }
 template<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a)
@@ -2581,6 +2617,7 @@ template<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a)
   const float32x2_t min = vmin_f32(vget_low_f32(a), vget_high_f32(a));
   return vget_lane_f32(vpmin_f32(min, min), 0);
 }
+#endif
 template<> EIGEN_STRONG_INLINE int8_t predux_min<Packet4c>(const Packet4c& a)
 {
   const int8x8_t a_dup = vreinterpret_s8_s32(vdup_n_s32(a));
@@ -2588,6 +2625,10 @@ template<> EIGEN_STRONG_INLINE int8_t predux_min<Packet4c>(const Packet4c& a)
   min = vpmin_s8(min, min);
   return vget_lane_s8(min, 0);
 }
+#if EIGEN_ARCH_ARM64
+template<> EIGEN_STRONG_INLINE int8_t predux_min<Packet8c>(const Packet8c& a) { return vminv_s8(a); }
+template<> EIGEN_STRONG_INLINE int8_t predux_min<Packet16c>(const Packet16c& a) { return vminvq_s8(a); }
+#else
 template<> EIGEN_STRONG_INLINE int8_t predux_min<Packet8c>(const Packet8c& a)
 {
   int8x8_t min = vpmin_s8(a,a);
@@ -2603,6 +2644,7 @@ template<> EIGEN_STRONG_INLINE int8_t predux_min<Packet16c>(const Packet16c& a)
   min = vpmin_s8(min, min);
   return vget_lane_s8(min, 0);
 }
+#endif
 template<> EIGEN_STRONG_INLINE uint8_t predux_min<Packet4uc>(const Packet4uc& a)
 {
   const uint8x8_t a_dup = vreinterpret_u8_u32(vdup_n_u32(a));
@@ -2610,6 +2652,18 @@ template<> EIGEN_STRONG_INLINE uint8_t predux_min<Packet4uc>(const Packet4uc& a)
   min = vpmin_u8(min, min);
   return vget_lane_u8(min, 0);
 }
+#if EIGEN_ARCH_ARM64
+template<> EIGEN_STRONG_INLINE uint8_t predux_min<Packet8uc>(const Packet8uc& a) { return vminv_u8(a); }
+template<> EIGEN_STRONG_INLINE uint8_t predux_min<Packet16uc>(const Packet16uc& a) { return vminvq_u8(a); }
+template<> EIGEN_STRONG_INLINE int16_t predux_min<Packet4s>(const Packet4s& a) { return vminv_s16(a); }
+template<> EIGEN_STRONG_INLINE int16_t predux_min<Packet8s>(const Packet8s& a) { return vminvq_s16(a); }
+template<> EIGEN_STRONG_INLINE uint16_t predux_min<Packet4us>(const Packet4us& a) { return vminv_u16(a); }
+template<> EIGEN_STRONG_INLINE uint16_t predux_min<Packet8us>(const Packet8us& a) { return vminvq_u16(a); }
+template<> EIGEN_STRONG_INLINE int32_t predux_min<Packet2i>(const Packet2i& a) { return vminv_s32(a); }
+template<> EIGEN_STRONG_INLINE int32_t predux_min<Packet4i>(const Packet4i& a) { return vminvq_s32(a); }
+template<> EIGEN_STRONG_INLINE uint32_t predux_min<Packet2ui>(const Packet2ui& a) { return vminv_u32(a); }
+template<> EIGEN_STRONG_INLINE uint32_t predux_min<Packet4ui>(const Packet4ui& a) { return vminvq_u32(a); }
+#else
 template<> EIGEN_STRONG_INLINE uint8_t predux_min<Packet8uc>(const Packet8uc& a)
 {
   uint8x8_t min = vpmin_u8(a,a);
@@ -2663,12 +2717,17 @@ template<> EIGEN_STRONG_INLINE uint32_t predux_min<Packet4ui>(const Packet4ui& a
   const uint32x2_t min = vmin_u32(vget_low_u32(a), vget_high_u32(a));
   return vget_lane_u32(vpmin_u32(min, min), 0);
 }
+#endif
 template<> EIGEN_STRONG_INLINE int64_t predux_min<Packet2l>(const Packet2l& a)
 { return (std::min)(vgetq_lane_s64(a, 0), vgetq_lane_s64(a, 1)); }
 template<> EIGEN_STRONG_INLINE uint64_t predux_min<Packet2ul>(const Packet2ul& a)
 { return (std::min)(vgetq_lane_u64(a, 0), vgetq_lane_u64(a, 1)); }
 
 // max
+#if EIGEN_ARCH_ARM64
+template<> EIGEN_STRONG_INLINE float predux_max<Packet2f>(const Packet2f& a) { return vmaxv_f32(a); }
+template<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a) { return vmaxvq_f32(a); }
+#else
 template<> EIGEN_STRONG_INLINE float predux_max<Packet2f>(const Packet2f& a)
 { return vget_lane_f32(vpmax_f32(a,a), 0); }
 template<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a)
@@ -2676,6 +2735,7 @@ template<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a)
   const float32x2_t max = vmax_f32(vget_low_f32(a), vget_high_f32(a));
   return vget_lane_f32(vpmax_f32(max, max), 0);
 }
+#endif
 template<> EIGEN_STRONG_INLINE int8_t predux_max<Packet4c>(const Packet4c& a)
 {
   const int8x8_t a_dup = vreinterpret_s8_s32(vdup_n_s32(a));
@@ -2683,6 +2743,10 @@ template<> EIGEN_STRONG_INLINE int8_t predux_max<Packet4c>(const Packet4c& a)
   max = vpmax_s8(max, max);
   return vget_lane_s8(max, 0);
 }
+#if EIGEN_ARCH_ARM64
+template<> EIGEN_STRONG_INLINE int8_t predux_max<Packet8c>(const Packet8c& a) { return vmaxv_s8(a); }
+template<> EIGEN_STRONG_INLINE int8_t predux_max<Packet16c>(const Packet16c& a) { return vmaxvq_s8(a); }
+#else
 template<> EIGEN_STRONG_INLINE int8_t predux_max<Packet8c>(const Packet8c& a)
 {
   int8x8_t max = vpmax_s8(a,a);
@@ -2698,6 +2762,7 @@ template<> EIGEN_STRONG_INLINE int8_t predux_max<Packet16c>(const Packet16c& a)
   max = vpmax_s8(max, max);
   return vget_lane_s8(max, 0);
 }
+#endif
 template<> EIGEN_STRONG_INLINE uint8_t predux_max<Packet4uc>(const Packet4uc& a)
 {
   const uint8x8_t a_dup = vreinterpret_u8_u32(vdup_n_u32(a));
@@ -2705,6 +2770,18 @@ template<> EIGEN_STRONG_INLINE uint8_t predux_max<Packet4uc>(const Packet4uc& a)
   max = vpmax_u8(max, max);
   return vget_lane_u8(max, 0);
 }
+#if EIGEN_ARCH_ARM64
+template<> EIGEN_STRONG_INLINE uint8_t predux_max<Packet8uc>(const Packet8uc& a) { return vmaxv_u8(a); }
+template<> EIGEN_STRONG_INLINE uint8_t predux_max<Packet16uc>(const Packet16uc& a) { return vmaxvq_u8(a); }
+template<> EIGEN_STRONG_INLINE int16_t predux_max<Packet4s>(const Packet4s& a) { return vmaxv_s16(a); }
+template<> EIGEN_STRONG_INLINE int16_t predux_max<Packet8s>(const Packet8s& a) { return vmaxvq_s16(a); }
+template<> EIGEN_STRONG_INLINE uint16_t predux_max<Packet4us>(const Packet4us& a) { return vmaxv_u16(a); }
+template<> EIGEN_STRONG_INLINE uint16_t predux_max<Packet8us>(const Packet8us& a) { return vmaxvq_u16(a); }
+template<> EIGEN_STRONG_INLINE int32_t predux_max<Packet2i>(const Packet2i& a) { return vmaxv_s32(a); }
+template<> EIGEN_STRONG_INLINE int32_t predux_max<Packet4i>(const Packet4i& a) { return vmaxvq_s32(a); }
+template<> EIGEN_STRONG_INLINE uint32_t predux_max<Packet2ui>(const Packet2ui& a) { return vmaxv_u32(a); }
+template<> EIGEN_STRONG_INLINE uint32_t predux_max<Packet4ui>(const Packet4ui& a) { return vmaxvq_u32(a); }
+#else
 template<> EIGEN_STRONG_INLINE uint8_t predux_max<Packet8uc>(const Packet8uc& a)
 {
   uint8x8_t max = vpmax_u8(a,a);
@@ -2758,6 +2835,7 @@ template<> EIGEN_STRONG_INLINE uint32_t predux_max<Packet4ui>(const Packet4ui& a
   const uint32x2_t max = vmax_u32(vget_low_u32(a), vget_high_u32(a));
   return vget_lane_u32(vpmax_u32(max, max), 0);
 }
+#endif
 template<> EIGEN_STRONG_INLINE int64_t predux_max<Packet2l>(const Packet2l& a)
 { return (std::max)(vgetq_lane_s64(a, 0), vgetq_lane_s64(a, 1)); }
 template<> EIGEN_STRONG_INLINE uint64_t predux_max<Packet2ul>(const Packet2ul& a)
@@ -3274,23 +3352,13 @@ template<> EIGEN_STRONG_INLINE Packet4ui psqrt(const Packet4ui& a) {
 }
 
 template<> EIGEN_STRONG_INLINE Packet4f prsqrt(const Packet4f& a) {
-  // Compute approximate reciprocal sqrt.
-  Packet4f x = vrsqrteq_f32(a);
   // Do Newton iterations for 1/sqrt(x).
-  x = vmulq_f32(vrsqrtsq_f32(vmulq_f32(a, x), x), x);
-  x = vmulq_f32(vrsqrtsq_f32(vmulq_f32(a, x), x), x);
-  const Packet4f infinity = pset1<Packet4f>(NumTraits<float>::infinity());
-  return pselect(pcmp_eq(a, pzero(a)), infinity, x);
+  return generic_rsqrt_newton_step<Packet4f, /*Steps=*/2>::run(a, vrsqrteq_f32(a));
 }
 
 template<> EIGEN_STRONG_INLINE Packet2f prsqrt(const Packet2f& a) {
   // Compute approximate reciprocal sqrt.
-  Packet2f x = vrsqrte_f32(a);
-  // Do Newton iterations for 1/sqrt(x).
-  x = vmul_f32(vrsqrts_f32(vmul_f32(a, x), x), x);
-  x = vmul_f32(vrsqrts_f32(vmul_f32(a, x), x), x);
-  const Packet2f infinity = pset1<Packet2f>(NumTraits<float>::infinity());
-  return pselect(pcmp_eq(a, pzero(a)), infinity, x);
+  return generic_rsqrt_newton_step<Packet2f, /*Steps=*/2>::run(a, vrsqrte_f32(a));
 }
 
 // Unfortunately vsqrt_f32 is only available for A64.
@@ -3299,14 +3367,10 @@ template<> EIGEN_STRONG_INLINE Packet4f psqrt(const Packet4f& _x){return vsqrtq_
 template<> EIGEN_STRONG_INLINE Packet2f psqrt(const Packet2f& _x){return vsqrt_f32(_x); }
 #else
 template<> EIGEN_STRONG_INLINE Packet4f psqrt(const Packet4f& a) {
-  const Packet4f infinity = pset1<Packet4f>(NumTraits<float>::infinity());
-  const Packet4f is_zero_or_inf = por(pcmp_eq(a, pzero(a)), pcmp_eq(a, infinity));
-  return pselect(is_zero_or_inf, a, pmul(a, prsqrt(a)));
+  return generic_sqrt_newton_step<Packet4f>::run(a, prsqrt(a));
 }
 template<> EIGEN_STRONG_INLINE Packet2f psqrt(const Packet2f& a) {
-  const Packet2f infinity = pset1<Packet2f>(NumTraits<float>::infinity());
-  const Packet2f is_zero_or_inf = por(pcmp_eq(a, pzero(a)), pcmp_eq(a, infinity));
-  return pselect(is_zero_or_inf, a, pmul(a, prsqrt(a)));
+  return generic_sqrt_newton_step<Packet2f>::run(a, prsqrt(a));
 }
 #endif
 
@@ -3386,7 +3450,7 @@ EIGEN_ALWAYS_INLINE void zip_in_place<Packet4bf>(Packet4bf& p1, Packet4bf& p2) {
 
 EIGEN_STRONG_INLINE Packet4bf F32ToBf16(const Packet4f& p)
 {
-  // See the scalar implemention in BFloat16.h for a comprehensible explanation
+  // See the scalar implementation in BFloat16.h for a comprehensible explanation
   // of this fast rounding algorithm
   Packet4ui input = reinterpret_cast<Packet4ui>(p);
 
@@ -3707,10 +3771,13 @@ template<> struct packet_traits<double>  : default_packet_traits
     HasCeil = 1,
     HasRint = 1,
 
+#if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG
+    HasExp  = 1,
+    HasLog  = 1,
+    HasATan = 1,
+#endif
     HasSin  = 0,
     HasCos  = 0,
-    HasLog  = 1,
-    HasExp  = 1,
     HasSqrt = 1,
     HasRsqrt = 1,
     HasTanh = 0,
@@ -3846,14 +3913,13 @@ template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a)
 
 template<> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a) { return vabsq_f64(a); }
 
-#if EIGEN_COMP_CLANG && defined(__apple_build_version__)
-// workaround ICE, see bug 907
+template <>
+EIGEN_STRONG_INLINE Packet2d psignbit(const Packet2d& a) {
+  return vreinterpretq_f64_s64(vshrq_n_s64(vreinterpretq_s64_f64(a), 63));
+}
+
 template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a)
-{ return (vget_low_f64(a) + vget_high_f64(a))[0]; }
-#else
-template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a)
-{ return vget_lane_f64(vget_low_f64(a) + vget_high_f64(a), 0); }
-#endif
+{ return vaddvq_f64(a); }
 
 // Other reduction functions:
 // mul
@@ -3867,11 +3933,11 @@ template<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a)
 
 // min
 template<> EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a)
-{ return vgetq_lane_f64(vpminq_f64(a,a), 0); }
+{ return vminvq_f64(a); }
 
 // max
 template<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a)
-{ return vgetq_lane_f64(vpmaxq_f64(a,a), 0); }
+{ return vmaxvq_f64(a); }
 
 
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void
@@ -3906,20 +3972,12 @@ template<> EIGEN_STRONG_INLINE Packet2d pset1frombits<Packet2d>(uint64_t from)
 { return vreinterpretq_f64_u64(vdupq_n_u64(from)); }
 
 template<> EIGEN_STRONG_INLINE Packet2d prsqrt(const Packet2d& a) {
-  // Compute approximate reciprocal sqrt.
-  Packet2d x = vrsqrteq_f64(a);
   // Do Newton iterations for 1/sqrt(x).
-  x = vmulq_f64(vrsqrtsq_f64(vmulq_f64(a, x), x), x);
-  x = vmulq_f64(vrsqrtsq_f64(vmulq_f64(a, x), x), x);
-  x = vmulq_f64(vrsqrtsq_f64(vmulq_f64(a, x), x), x);
-  const Packet2d infinity = pset1<Packet2d>(NumTraits<double>::infinity());
-  return pselect(pcmp_eq(a, pzero(a)), infinity, x);
+  return generic_rsqrt_newton_step<Packet2d, /*Steps=*/3>::run(a, vrsqrteq_f64(a));
 }
 
 template<> EIGEN_STRONG_INLINE Packet2d psqrt(const Packet2d& _x){ return vsqrtq_f64(_x); }
 
-#endif // EIGEN_ARCH_ARM64
-
 // Do we have an fp16 types and supporting Neon intrinsics?
 #if EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC
 typedef float16x4_t Packet4hf;
@@ -3961,6 +4019,7 @@ struct packet_traits<Eigen::half> : default_packet_traits {
     HasCos = 0,
     HasLog = 0,
     HasExp = 0,
+    HasTanh = packet_traits<float>::HasTanh,  // tanh<half> calls tanh<float>
     HasSqrt = 1,
     HasRsqrt = 1,
     HasErf = EIGEN_FAST_MATH,
@@ -4420,11 +4479,21 @@ EIGEN_STRONG_INLINE Packet8hf pabs<Packet8hf>(const Packet8hf& a) {
   return vabsq_f16(a);
 }
 
+template<>
+EIGEN_STRONG_INLINE Packet8hf psignbit(const Packet8hf& a) {
+  return vreinterpretq_f16_s16(vshrq_n_s16(vreinterpretq_s16_f16(a), 15));
+}
+
 template <>
 EIGEN_STRONG_INLINE Packet4hf pabs<Packet4hf>(const Packet4hf& a) {
   return vabs_f16(a);
 }
 
+template <>
+EIGEN_STRONG_INLINE Packet4hf psignbit(const Packet4hf& a) {
+  return vreinterpret_f16_s16( vshr_n_s16( vreinterpret_s16_f16(a), 15)); 
+}
+
 template <>
 EIGEN_STRONG_INLINE Eigen::half predux<Packet8hf>(const Packet8hf& a) {
   float16x4_t a_lo, a_hi, sum;
@@ -4476,51 +4545,29 @@ EIGEN_STRONG_INLINE Eigen::half predux_mul<Packet4hf>(const Packet4hf& a) {
 
 template <>
 EIGEN_STRONG_INLINE Eigen::half predux_min<Packet8hf>(const Packet8hf& a) {
-  float16x4_t a_lo, a_hi, min;
-
-  a_lo = vget_low_f16(a);
-  a_hi = vget_high_f16(a);
-  min = vpmin_f16(a_lo, a_hi);
-  min = vpmin_f16(min, min);
-  min = vpmin_f16(min, min);
-
   Eigen::half h;
-  h.x = vget_lane_f16(min, 0);
+  h.x = vminvq_f16(a);
   return h;
 }
 
 template <>
 EIGEN_STRONG_INLINE Eigen::half predux_min<Packet4hf>(const Packet4hf& a) {
-  Packet4hf tmp;
-  tmp = vpmin_f16(a, a);
-  tmp = vpmin_f16(tmp, tmp);
   Eigen::half h;
-  h.x = vget_lane_f16(tmp, 0);
+  h.x = vminv_f16(a);
   return h;
 }
 
 template <>
 EIGEN_STRONG_INLINE Eigen::half predux_max<Packet8hf>(const Packet8hf& a) {
-  float16x4_t a_lo, a_hi, max;
-
-  a_lo = vget_low_f16(a);
-  a_hi = vget_high_f16(a);
-  max = vpmax_f16(a_lo, a_hi);
-  max = vpmax_f16(max, max);
-  max = vpmax_f16(max, max);
-
   Eigen::half h;
-  h.x = vget_lane_f16(max, 0);
+  h.x = vmaxvq_f16(a);
   return h;
 }
 
 template <>
 EIGEN_STRONG_INLINE Eigen::half predux_max<Packet4hf>(const Packet4hf& a) {
-  Packet4hf tmp;
-  tmp = vpmax_f16(a, a);
-  tmp = vpmax_f16(tmp, tmp);
   Eigen::half h;
-  h.x = vget_lane_f16(tmp, 0);
+  h.x = vmaxv_f16(a);
   return h;
 }
 
@@ -4580,6 +4627,8 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet8hf, 8>&
 }
 #endif // end EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC
 
+#endif // EIGEN_ARCH_ARM64
+
 } // end namespace internal
 
 } // end namespace Eigen
diff --git a/libs/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h b/libs/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h
index 54f9733..e5ddab6 100644
--- a/libs/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h
+++ b/libs/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h
@@ -11,6 +11,8 @@
 #ifndef EIGEN_TYPE_CASTING_NEON_H
 #define EIGEN_TYPE_CASTING_NEON_H
 
+#include "../../InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
diff --git a/libs/eigen/Eigen/src/Core/arch/NEON/UnaryFunctors.h b/libs/eigen/Eigen/src/Core/arch/NEON/UnaryFunctors.h
new file mode 100644
index 0000000..67f9dcf
--- /dev/null
+++ b/libs/eigen/Eigen/src/Core/arch/NEON/UnaryFunctors.h
@@ -0,0 +1,63 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_NEON_UNARY_FUNCTORS_H
+#define EIGEN_NEON_UNARY_FUNCTORS_H
+
+#include "../../InternalHeaderCheck.h"
+
+namespace Eigen {
+
+namespace internal {
+
+#if EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC
+/** \internal
+  * \brief Template specialization of the logistic function for Eigen::half.
+  */
+template <>
+struct scalar_logistic_op<Eigen::half> {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  Eigen::half operator()(const Eigen::half& x) const {
+    // Convert to float and call scalar_logistic_op<float>.
+    const scalar_logistic_op<float> float_op;
+    return Eigen::half(float_op(float(x)));
+  }
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  Eigen::half packetOp(const Eigen::half& x) const {
+    return this->operator()(x);
+  }
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  Packet4hf packetOp(const Packet4hf& x) const {
+    const scalar_logistic_op<float> float_op;
+    return vcvt_f16_f32(float_op.packetOp(vcvt_f32_f16(x)));
+  }
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  Packet8hf packetOp(const Packet8hf& x) const {
+    const scalar_logistic_op<float> float_op;
+    return vcombine_f16(
+      vcvt_f16_f32(float_op.packetOp(vcvt_f32_f16(vget_low_f16(x)))),
+      vcvt_f16_f32(float_op.packetOp(vcvt_high_f32_f16(x))));
+  }
+};
+
+template<>
+struct functor_traits<scalar_logistic_op<Eigen::half>> {
+  enum {
+    Cost = functor_traits<scalar_logistic_op<float>>::Cost,
+    PacketAccess = functor_traits<scalar_logistic_op<float>>::PacketAccess,
+  };
+};
+#endif  // EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_NEON_UNARY_FUNCTORS_H
diff --git a/libs/eigen/Eigen/src/Core/arch/SSE/Complex.h b/libs/eigen/Eigen/src/Core/arch/SSE/Complex.h
index 8fe22da..60308ce 100644
--- a/libs/eigen/Eigen/src/Core/arch/SSE/Complex.h
+++ b/libs/eigen/Eigen/src/Core/arch/SSE/Complex.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_COMPLEX_SSE_H
 #define EIGEN_COMPLEX_SSE_H
 
+#include "../../InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
@@ -106,14 +108,9 @@ template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<fl
 
 template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>&  from)
 {
-  Packet2cf res;
-#ifdef EIGEN_VECTORIZE_SSE3
-  res.v = _mm_castpd_ps(_mm_loaddup_pd(reinterpret_cast<double const*>(&from)));
-#else
-  res.v = _mm_castpd_ps(_mm_load_sd(reinterpret_cast<double const*>(&from)));
-  res.v = _mm_movelh_ps(res.v, res.v);
-#endif
-  return res;
+  const float re = std::real(from);
+  const float im = std::imag(from);
+  return Packet2cf(_mm_set_ps(im, re, im, re));
 }
 
 template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) { return pset1<Packet2cf>(*from); }
@@ -140,17 +137,9 @@ template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::co
 
 template<> EIGEN_STRONG_INLINE std::complex<float>  pfirst<Packet2cf>(const Packet2cf& a)
 {
-  #if EIGEN_GNUC_AT_MOST(4,3)
-  // Workaround gcc 4.2 ICE - this is not performance wise ideal, but who cares...
-  // This workaround also fix invalid code generation with gcc 4.3
-  EIGEN_ALIGN16 std::complex<float> res[2];
-  _mm_store_ps((float*)res, a.v);
-  return res[0];
-  #else
-  std::complex<float> res;
+  alignas(alignof(__m64)) std::complex<float> res;
   _mm_storel_pi((__m64*)&res, a.v);
   return res;
-  #endif
 }
 
 template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a) { return Packet2cf(_mm_castpd_ps(preverse(Packet2d(_mm_castps_pd(a.v))))); }
@@ -174,14 +163,9 @@ EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
 
 template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
 {
-  // TODO optimize it for SSE3 and 4
-  Packet2cf res = pmul(a, pconj(b));
-  __m128 s = _mm_mul_ps(b.v,b.v);
-  return Packet2cf(_mm_div_ps(res.v,_mm_add_ps(s,vec4f_swizzle1(s, 1, 0, 3, 2))));
+  return pdiv_complex(a, b);
 }
 
-
-
 //---------- double ----------
 struct Packet1cd
 {
@@ -299,10 +283,7 @@ EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
 
 template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
 {
-  // TODO optimize it for SSE3 and 4
-  Packet1cd res = pmul(a,pconj(b));
-  __m128d s = _mm_mul_pd(b.v,b.v);
-  return Packet1cd(_mm_div_pd(res.v, _mm_add_pd(s,_mm_shuffle_pd(s, s, 0x1))));
+  return pdiv_complex(a, b);
 }
 
 EIGEN_STRONG_INLINE Packet1cd pcplxflip/* <Packet1cd> */(const Packet1cd& x)
diff --git a/libs/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h b/libs/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h
index 8736d0d..f98fb7a 100644
--- a/libs/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h
+++ b/libs/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h
@@ -15,155 +15,123 @@
 #ifndef EIGEN_MATH_FUNCTIONS_SSE_H
 #define EIGEN_MATH_FUNCTIONS_SSE_H
 
+#include "../../InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
 
-template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet4f plog<Packet4f>(const Packet4f& _x) {
   return plog_float(_x);
 }
 
-template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet2d plog<Packet2d>(const Packet2d& _x) {
   return plog_double(_x);
 }
 
-template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet4f plog2<Packet4f>(const Packet4f& _x) {
   return plog2_float(_x);
 }
 
-template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet2d plog2<Packet2d>(const Packet2d& _x) {
   return plog2_double(_x);
 }
 
-template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet4f plog1p<Packet4f>(const Packet4f& _x) {
   return generic_plog1p(_x);
 }
 
-template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet4f pexpm1<Packet4f>(const Packet4f& _x) {
   return generic_expm1(_x);
 }
 
-template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet4f pexp<Packet4f>(const Packet4f& _x)
 {
   return pexp_float(_x);
 }
 
-template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet2d pexp<Packet2d>(const Packet2d& x)
 {
   return pexp_double(x);
 }
 
-template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet4f psin<Packet4f>(const Packet4f& _x)
 {
   return psin_float(_x);
 }
 
-template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet4f pcos<Packet4f>(const Packet4f& _x)
 {
   return pcos_float(_x);
 }
 
-#if EIGEN_FAST_MATH
-
-// Functions for sqrt.
-// The EIGEN_FAST_MATH version uses the _mm_rsqrt_ps approximation and one step
-// of Newton's method, at a cost of 1-2 bits of precision as opposed to the
-// exact solution. It does not handle +inf, or denormalized numbers correctly.
-// The main advantage of this approach is not just speed, but also the fact that
-// it can be inlined and pipelined with other computations, further reducing its
-// effective latency. This is similar to Quake3's fast inverse square root.
-// For detail see here: http://www.beyond3d.com/content/articles/8/
-template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
-Packet4f psqrt<Packet4f>(const Packet4f& _x)
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet4f pacos<Packet4f>(const Packet4f& _x)
 {
-  Packet4f minus_half_x = pmul(_x, pset1<Packet4f>(-0.5f));
-  Packet4f denormal_mask = pandnot(
-      pcmp_lt(_x, pset1<Packet4f>((std::numeric_limits<float>::min)())),
-      pcmp_lt(_x, pzero(_x)));
-
-  // Compute approximate reciprocal sqrt.
-  Packet4f x = _mm_rsqrt_ps(_x);
-  // Do a single step of Newton's iteration.
-  x = pmul(x, pmadd(minus_half_x, pmul(x,x), pset1<Packet4f>(1.5f)));
-  // Flush results for denormals to zero.
-  return pandnot(pmul(_x,x), denormal_mask);
+  return pacos_float(_x);
 }
 
-#else
+template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet2d patan<Packet2d>(const Packet2d& _x) {
+  return patan_double(_x);
+}
 
-template<>EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet4f pasin<Packet4f>(const Packet4f& _x)
+{
+  return pasin_float(_x);
+}
+
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet4f patan<Packet4f>(const Packet4f& _x)
+{
+  return patan_float(_x);
+}
+
+// Notice that for newer processors, it is counterproductive to use Newton
+// iteration for square root. In particular, Skylake and Zen2 processors
+// have approximately doubled throughput of the _mm_sqrt_ps instruction
+// compared to their predecessors.
+template<>EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet4f psqrt<Packet4f>(const Packet4f& x) { return _mm_sqrt_ps(x); }
-
-#endif
-
-template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet2d psqrt<Packet2d>(const Packet2d& x) { return _mm_sqrt_pd(x); }
-
-template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet16b psqrt<Packet16b>(const Packet16b& x) { return x; }
 
 #if EIGEN_FAST_MATH
-
-template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
-Packet4f prsqrt<Packet4f>(const Packet4f& _x) {
-  _EIGEN_DECLARE_CONST_Packet4f(one_point_five, 1.5f);
-  _EIGEN_DECLARE_CONST_Packet4f(minus_half, -0.5f);
-  _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inf, 0x7f800000u);
-  _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(flt_min, 0x00800000u);
-
-  Packet4f neg_half = pmul(_x, p4f_minus_half);
-
-  // Identity infinite, zero, negative and denormal arguments.
-  Packet4f lt_min_mask = _mm_cmplt_ps(_x, p4f_flt_min);
-  Packet4f inf_mask = _mm_cmpeq_ps(_x, p4f_inf);
-  Packet4f not_normal_finite_mask = _mm_or_ps(lt_min_mask, inf_mask);
-
-  // Compute an approximate result using the rsqrt intrinsic.
-  Packet4f y_approx = _mm_rsqrt_ps(_x);
-
-  // Do a single step of Newton-Raphson iteration to improve the approximation.
-  // This uses the formula y_{n+1} = y_n * (1.5 - y_n * (0.5 * x) * y_n).
-  // It is essential to evaluate the inner term like this because forming
-  // y_n^2 may over- or underflow.
-  Packet4f y_newton = pmul(
-      y_approx, pmadd(y_approx, pmul(neg_half, y_approx), p4f_one_point_five));
-
-  // Select the result of the Newton-Raphson step for positive normal arguments.
-  // For other arguments, choose the output of the intrinsic. This will
-  // return rsqrt(+inf) = 0, rsqrt(x) = NaN if x < 0, and rsqrt(x) = +inf if
-  // x is zero or a positive denormalized float (equivalent to flushing positive
-  // denormalized inputs to zero).
-  return pselect<Packet4f>(not_normal_finite_mask, y_approx, y_newton);
-}
-
-#else
-
+// Even on Skylake, using Newton iteration is a win for reciprocal square root.
 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
 Packet4f prsqrt<Packet4f>(const Packet4f& x) {
-  // Unfortunately we can't use the much faster mm_rsqrt_ps since it only provides an approximation.
-  return _mm_div_ps(pset1<Packet4f>(1.0f), _mm_sqrt_ps(x));
+  return generic_rsqrt_newton_step<Packet4f, /*Steps=*/1>::run(x, _mm_rsqrt_ps(x));
 }
 
+#ifdef EIGEN_VECTORIZE_FMA
+// Trying to speed up reciprocal using Newton-Raphson is counterproductive
+// unless FMA is available. Without FMA pdiv(pset1<Packet>(Scalar(1),a)) is
+// 30% faster.
+template<> EIGEN_STRONG_INLINE Packet4f preciprocal<Packet4f>(const Packet4f& x) {
+  return generic_reciprocal_newton_step<Packet4f, /*Steps=*/1>::run(x, _mm_rcp_ps(x));
+}
+#endif
+
 #endif
 
-template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
-Packet2d prsqrt<Packet2d>(const Packet2d& x) {
-  return _mm_div_pd(pset1<Packet2d>(1.0), _mm_sqrt_pd(x));
-}
+
 
 // Hyperbolic Tangent function.
 template <>
-EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f
 ptanh<Packet4f>(const Packet4f& x) {
   return internal::generic_fast_tanh_float(x);
 }
diff --git a/libs/eigen/Eigen/src/Core/arch/SSE/PacketMath.h b/libs/eigen/Eigen/src/Core/arch/SSE/PacketMath.h
old mode 100755
new mode 100644
index db102c7..a0ff359
--- a/libs/eigen/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/libs/eigen/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_PACKET_MATH_SSE_H
 #define EIGEN_PACKET_MATH_SSE_H
 
+#include "../../InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
@@ -30,7 +32,7 @@ namespace internal {
 #endif
 #endif
 
-#if ((defined EIGEN_VECTORIZE_AVX) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_MINGW) && (__GXX_ABI_VERSION < 1004)) || EIGEN_OS_QNX
+#if ((defined EIGEN_VECTORIZE_AVX) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_MINGW || EIGEN_COMP_LCC) && (__GXX_ABI_VERSION < 1004)) || EIGEN_OS_QNX
 // With GCC's default ABI version, a __m128 or __m256 are the same types and therefore we cannot
 // have overloads for both types without linking error.
 // One solution is to increase ABI version using -fabi-version=4 (or greater).
@@ -106,16 +108,16 @@ EIGEN_STRONG_INLINE Packet2d vec2d_unpackhi(const Packet2d& a, const Packet2d& b
 #define vec2d_duplane(a,p) \
   vec2d_swizzle2(a,a,(p<<1)|p)
 
-#define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
+#define EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
   const Packet4f p4f_##NAME = pset1<Packet4f>(X)
 
-#define _EIGEN_DECLARE_CONST_Packet2d(NAME,X) \
+#define EIGEN_DECLARE_CONST_Packet2d(NAME,X) \
   const Packet2d p2d_##NAME = pset1<Packet2d>(X)
 
-#define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
+#define EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
   const Packet4f p4f_##NAME = pset1frombits<Packet4f>(X)
 
-#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
+#define EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
   const Packet4i p4i_##NAME = pset1<Packet4i>(X)
 
 
@@ -134,8 +136,12 @@ struct packet_traits<float> : default_packet_traits {
 
     HasCmp  = 1,
     HasDiv = 1,
+    HasReciprocal = EIGEN_FAST_MATH,
     HasSin = EIGEN_FAST_MATH,
     HasCos = EIGEN_FAST_MATH,
+    HasACos = 1,
+    HasASin = 1,
+    HasATan = 1,
     HasLog = 1,
     HasLog1p = 1,
     HasExpm1 = 1,
@@ -152,7 +158,8 @@ struct packet_traits<float> : default_packet_traits {
 #ifdef EIGEN_VECTORIZE_SSE4_1
     HasRound = 1,
 #endif
-    HasRint = 1
+    HasRint = 1,
+    HasSign = 0   // The manually vectorized version is slightly slower for SSE.
   };
 };
 template <>
@@ -171,6 +178,7 @@ struct packet_traits<double> : default_packet_traits {
     HasExp  = 1,
     HasSqrt = 1,
     HasRsqrt = 1,
+    HasATan = 1,
     HasBlend = 1,
     HasFloor = 1,
     HasCeil = 1,
@@ -180,7 +188,6 @@ struct packet_traits<double> : default_packet_traits {
     HasRint = 1
   };
 };
-#endif
 template<> struct packet_traits<int>    : default_packet_traits
 {
   typedef Packet4i type;
@@ -188,13 +195,15 @@ template<> struct packet_traits<int>    : default_packet_traits
   enum {
     Vectorizable = 1,
     AlignedOnScalar = 1,
+    HasCmp = 1,
+    HasDiv=1,
     size=4,
 
     HasShift = 1,
     HasBlend = 1
   };
 };
-
+#endif
 template<> struct packet_traits<bool> : default_packet_traits
 {
   typedef Packet16b type;
@@ -204,7 +213,7 @@ template<> struct packet_traits<bool> : default_packet_traits
     AlignedOnScalar = 1,
     HasHalfPacket = 0,
     size=16,
-
+    
     HasAdd       = 1,
     HasSub       = 1,
     HasShift     = 0,
@@ -215,7 +224,8 @@ template<> struct packet_traits<bool> : default_packet_traits
     HasMin       = 0,
     HasMax       = 0,
     HasConj      = 0,
-    HasSqrt      = 1
+    HasSqrt      = 1,
+    HasSign      = 0   // Don't try to vectorize psign<bool> = identity.
   };
 };
 
@@ -233,7 +243,7 @@ template<> struct unpacket_traits<Packet2d> {
 template<> struct unpacket_traits<Packet4i> {
   typedef int       type;
   typedef Packet4i  half;
-  enum {size=4, alignment=Aligned16, vectorizable=false, masked_load_available=false, masked_store_available=false};
+  enum {size=4, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false};
 };
 template<> struct unpacket_traits<Packet16b> {
   typedef bool       type;
@@ -246,18 +256,9 @@ template<> struct scalar_div_cost<float,true> { enum { value = 7 }; };
 template<> struct scalar_div_cost<double,true> { enum { value = 8 }; };
 #endif
 
-#if EIGEN_COMP_MSVC==1500
-// Workaround MSVC 9 internal compiler error.
-// TODO: It has been detected with win64 builds (amd64), so let's check whether it also happens in 32bits+SSE mode
-// TODO: let's check whether there does not exist a better fix, like adding a pset0() function. (it crashed on pset1(0)).
-template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float&  from) { return _mm_set_ps(from,from,from,from); }
-template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set_pd(from,from); }
-template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int&    from) { return _mm_set_epi32(from,from,from,from); }
-#else
 template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float&  from) { return _mm_set_ps1(from); }
 template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set1_pd(from); }
 template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int&    from) { return _mm_set1_epi32(from); }
-#endif
 template<> EIGEN_STRONG_INLINE Packet16b pset1<Packet16b>(const bool&    from) { return _mm_set1_epi8(static_cast<char>(from)); }
 
 template<> EIGEN_STRONG_INLINE Packet4f pset1frombits<Packet4f>(unsigned int from) { return _mm_castsi128_ps(pset1<Packet4i>(from)); }
@@ -292,6 +293,10 @@ template<> EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const
 
 template<> EIGEN_STRONG_INLINE Packet16b padd<Packet16b>(const Packet16b& a, const Packet16b& b) { return _mm_or_si128(a,b); }
 
+template<typename Packet> EIGEN_STRONG_INLINE Packet padds(const Packet& a, const Packet& b);
+template<> EIGEN_STRONG_INLINE Packet4f padds<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_add_ss(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d padds<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_add_sd(a,b); }
+
 template<> EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_sub_ps(a,b); }
 template<> EIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_sub_pd(a,b); }
 template<> EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_sub_epi32(a,b); }
@@ -366,11 +371,37 @@ template<> EIGEN_STRONG_INLINE Packet16b pmul<Packet16b>(const Packet16b& a, con
 template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_div_ps(a,b); }
 template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_div_pd(a,b); }
 
+template <>
+EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& a,
+                                            const Packet4i& b) {
+#ifdef EIGEN_VECTORIZE_AVX
+  return _mm256_cvttpd_epi32(
+      _mm256_div_pd(_mm256_cvtepi32_pd(a), _mm256_cvtepi32_pd(b)));
+#else
+  __m128i q_lo = _mm_cvttpd_epi32(_mm_div_pd(_mm_cvtepi32_pd(a), _mm_cvtepi32_pd(b)));
+  __m128i q_hi =
+      _mm_cvttpd_epi32(_mm_div_pd(_mm_cvtepi32_pd(vec4i_swizzle1(a, 2, 3, 0, 1)),
+                                 _mm_cvtepi32_pd(vec4i_swizzle1(b, 2, 3, 0, 1))));
+  return vec4i_swizzle1(_mm_unpacklo_epi32(q_lo, q_hi), 0, 2, 1, 3);
+#endif
+}
+
+
 // for some weird raisons, it has to be overloaded for packet of integers
 template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return padd(pmul(a,b), c); }
 #ifdef EIGEN_VECTORIZE_FMA
 template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return _mm_fmadd_ps(a,b,c); }
 template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return _mm_fmadd_pd(a,b,c); }
+template<> EIGEN_STRONG_INLINE Packet4f pmsub(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return _mm_fmsub_ps(a,b,c); }
+template<> EIGEN_STRONG_INLINE Packet2d pmsub(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return _mm_fmsub_pd(a,b,c); }
+template<> EIGEN_STRONG_INLINE Packet4f pnmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return _mm_fnmadd_ps(a,b,c); }
+template<> EIGEN_STRONG_INLINE Packet2d pnmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return _mm_fnmadd_pd(a,b,c); }
+template<> EIGEN_STRONG_INLINE Packet4f pnmsub(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return _mm_fnmsub_ps(a,b,c); }
+template<> EIGEN_STRONG_INLINE Packet2d pnmsub(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return _mm_fnmsub_pd(a,b,c); }
+
+template<typename Packet> EIGEN_STRONG_INLINE Packet pmadds(const Packet& a, const Packet& b, const Packet& c);
+template<> EIGEN_STRONG_INLINE Packet4f pmadds<Packet4f>(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return _mm_fmadd_ss(a,b,c); }
+template<> EIGEN_STRONG_INLINE Packet2d pmadds<Packet2d>(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return _mm_fmadd_sd(a,b,c); }
 #endif
 
 #ifdef EIGEN_VECTORIZE_SSE4_1
@@ -444,7 +475,7 @@ template<> EIGEN_STRONG_INLINE Packet16b pcmp_eq(const Packet16b& a, const Packe
 template<> EIGEN_STRONG_INLINE Packet4i pcmp_le(const Packet4i& a, const Packet4i& b) { return por(pcmp_lt(a,b), pcmp_eq(a,b)); }
 
 template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) {
-#if EIGEN_COMP_GNUC && EIGEN_COMP_GNUC < 63
+#if EIGEN_COMP_GNUC_STRICT && EIGEN_COMP_GNUC < 63
   // There appears to be a bug in GCC, by which the optimizer may
   // flip the argument order in calls to _mm_min_ps, so we have to
   // resort to inline ASM here. This is supposed to be fixed in gcc6.3,
@@ -463,7 +494,7 @@ template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const
 #endif
 }
 template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) {
-#if EIGEN_COMP_GNUC && EIGEN_COMP_GNUC < 63
+#if EIGEN_COMP_GNUC_STRICT && EIGEN_COMP_GNUC < 63
   // There appears to be a bug in GCC, by which the optimizer may
   // flip the argument order in calls to _mm_min_pd, so we have to
   // resort to inline ASM here. This is supposed to be fixed in gcc6.3,
@@ -494,7 +525,7 @@ template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const
 
 
 template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) {
-#if EIGEN_COMP_GNUC && EIGEN_COMP_GNUC < 63
+#if EIGEN_COMP_GNUC_STRICT && EIGEN_COMP_GNUC < 63
   // There appears to be a bug in GCC, by which the optimizer may
   // flip the argument order in calls to _mm_max_ps, so we have to
   // resort to inline ASM here. This is supposed to be fixed in gcc6.3,
@@ -513,7 +544,7 @@ template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const
 #endif
 }
 template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) {
-#if EIGEN_COMP_GNUC && EIGEN_COMP_GNUC < 63
+#if EIGEN_COMP_GNUC_STRICT && EIGEN_COMP_GNUC < 63
   // There appears to be a bug in GCC, by which the optimizer may
   // flip the argument order in calls to _mm_max_pd, so we have to
   // resort to inline ASM here. This is supposed to be fixed in gcc6.3,
@@ -618,10 +649,21 @@ template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a)
   #endif
 }
 
+template<> EIGEN_STRONG_INLINE Packet4f psignbit(const Packet4f& a) { return _mm_castsi128_ps(_mm_srai_epi32(_mm_castps_si128(a), 31)); }
+template<> EIGEN_STRONG_INLINE Packet2d psignbit(const Packet2d& a)
+{
+    Packet4f tmp = psignbit<Packet4f>(_mm_castpd_ps(a));
+#ifdef EIGEN_VECTORIZE_AVX
+    return _mm_castps_pd(_mm_permute_ps(tmp, (shuffle_mask<1, 1, 3, 3>::mask)));
+#else
+    return _mm_castps_pd(_mm_shuffle_ps(tmp, tmp, (shuffle_mask<1, 1, 3, 3>::mask)));
+#endif // EIGEN_VECTORIZE_AVX
+}
+
 #ifdef EIGEN_VECTORIZE_SSE4_1
 template<> EIGEN_STRONG_INLINE Packet4f pround<Packet4f>(const Packet4f& a)
 {
-  // Unfortunatly _mm_round_ps doesn't have a rounding mode to implement numext::round.
+  // Unfortunately _mm_round_ps doesn't have a rounding mode to implement numext::round.
   const Packet4f mask = pset1frombits<Packet4f>(0x80000000u);
   const Packet4f prev0dot5 = pset1frombits<Packet4f>(0x3EFFFFFFu);
   return _mm_round_ps(padd(por(pand(a, mask), prev0dot5), a), _MM_FROUND_TO_ZERO);
@@ -720,15 +762,7 @@ template<> EIGEN_STRONG_INLINE Packet16b pload<Packet16b>(const bool*     from)
 #if EIGEN_COMP_MSVC
   template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float*  from) {
     EIGEN_DEBUG_UNALIGNED_LOAD
-    #if (EIGEN_COMP_MSVC==1600)
-    // NOTE Some version of MSVC10 generates bad code when using _mm_loadu_ps
-    // (i.e., it does not generate an unaligned load!!
-    __m128 res = _mm_loadl_pi(_mm_set1_ps(0.0f), (const __m64*)(from));
-    res = _mm_loadh_pi(res, (const __m64*)(from+2));
-    return res;
-    #else
     return _mm_loadu_ps(from);
-    #endif
   }
 #else
 // NOTE: with the code below, MSVC's compiler crashes!
@@ -755,6 +789,15 @@ template<> EIGEN_STRONG_INLINE Packet16b ploadu<Packet16b>(const bool*     from)
   return _mm_loadu_si128(reinterpret_cast<const __m128i*>(from));
 }
 
+// Load lower part of packet zero extending.
+template<typename Packet> EIGEN_STRONG_INLINE Packet ploadl(const typename unpacket_traits<Packet>::type* from);
+template<> EIGEN_STRONG_INLINE Packet4f ploadl<Packet4f>(const float*  from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_castpd_ps(_mm_load_sd(reinterpret_cast<const double*>(from))); }
+template<> EIGEN_STRONG_INLINE Packet2d ploadl<Packet2d>(const double* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_load_sd(from); }
+
+// Load scalar
+template<typename Packet> EIGEN_STRONG_INLINE Packet ploads(const typename unpacket_traits<Packet>::type* from);
+template<> EIGEN_STRONG_INLINE Packet4f ploads<Packet4f>(const float*  from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_load_ss(from); }
+template<> EIGEN_STRONG_INLINE Packet2d ploads<Packet2d>(const double* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_load_sd(from); }
 
 template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float*   from)
 {
@@ -796,6 +839,14 @@ template<> EIGEN_STRONG_INLINE void pstoreu<float>(float*   to, const Packet4f&
 template<> EIGEN_STRONG_INLINE void pstoreu<int>(int*       to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_si128(reinterpret_cast<__m128i*>(to), from); }
 template<> EIGEN_STRONG_INLINE void pstoreu<bool>(bool*     to, const Packet16b& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_storeu_si128(reinterpret_cast<__m128i*>(to), from); }
 
+template<typename Scalar, typename Packet> EIGEN_STRONG_INLINE void pstorel(Scalar* to, const Packet& from);
+template<> EIGEN_STRONG_INLINE void pstorel(float*   to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm_storel_pi(reinterpret_cast<__m64*>(to), from); }
+template<> EIGEN_STRONG_INLINE void pstorel(double*  to, const Packet2d& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm_storel_pd(to, from); }
+
+template<typename Scalar, typename Packet> EIGEN_STRONG_INLINE void pstores(Scalar* to, const Packet& from);
+template<> EIGEN_STRONG_INLINE void pstores(float*   to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm_store_ss(to, from); }
+template<> EIGEN_STRONG_INLINE void pstores(double*  to, const Packet2d& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm_store_sd(to, from); }
+
 template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, Index stride)
 {
  return _mm_set_ps(from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
@@ -1120,6 +1171,11 @@ template<> EIGEN_STRONG_INLINE bool predux_any(const Packet4f& x)
   return _mm_movemask_ps(x) != 0x0;
 }
 
+template<> EIGEN_STRONG_INLINE bool predux_any(const Packet4i& x)
+{
+  return _mm_movemask_ps(_mm_castsi128_ps(x)) != 0x0;
+}
+
 EIGEN_DEVICE_FUNC inline void
 ptranspose(PacketBlock<Packet4f,4>& kernel) {
   _MM_TRANSPOSE4_PS(kernel.packet[0], kernel.packet[1], kernel.packet[2], kernel.packet[3]);
@@ -1278,8 +1334,126 @@ template<> EIGEN_STRONG_INLINE float pmadd(const float& a, const float& b, const
 template<> EIGEN_STRONG_INLINE double pmadd(const double& a, const double& b, const double& c) {
   return ::fma(a,b,c);
 }
+template<> EIGEN_STRONG_INLINE float pmsub(const float& a, const float& b, const float& c) {
+  return ::fmaf(a,b,-c);
+}
+template<> EIGEN_STRONG_INLINE double pmsub(const double& a, const double& b, const double& c) {
+  return ::fma(a,b,-c);
+}
+template<> EIGEN_STRONG_INLINE float pnmadd(const float& a, const float& b, const float& c) {
+  return ::fmaf(-a,b,c);
+}
+template<> EIGEN_STRONG_INLINE double pnmadd(const double& a, const double& b, const double& c) {
+  return ::fma(-a,b,c);
+}
+template<> EIGEN_STRONG_INLINE float pnmsub(const float& a, const float& b, const float& c) {
+  return ::fmaf(-a,b,-c);
+}
+template<> EIGEN_STRONG_INLINE double pnmsub(const double& a, const double& b, const double& c) {
+  return ::fma(-a,b,-c);
+}
 #endif
 
+#ifdef EIGEN_VECTORIZE_SSE4_1
+// Helpers for half->float and float->half conversions.
+// Currently only used by the AVX code.
+EIGEN_STRONG_INLINE __m128i half2floatsse(__m128i h) {
+ __m128i input = _mm_cvtepu16_epi32(h);
+
+  // Direct vectorization of half_to_float, C parts in the comments.
+  __m128i shifted_exp = _mm_set1_epi32(0x7c00 << 13);
+  // o.u = (h.x & 0x7fff) << 13; // exponent/mantissa bits
+  __m128i ou = _mm_slli_epi32(_mm_and_si128(input, _mm_set1_epi32(0x7fff)), 13);
+  // exp = shifted_exp & o.u;   // just the exponent
+  __m128i exp = _mm_and_si128(ou, shifted_exp);
+  // o.u += (127 - 15) << 23;
+  ou = _mm_add_epi32(ou, _mm_set1_epi32((127 - 15) << 23));
+
+  // Inf/NaN?
+  __m128i naninf_mask = _mm_cmpeq_epi32(exp, shifted_exp);
+  // Inf/NaN adjust
+  __m128i naninf_adj =
+      _mm_and_si128(_mm_set1_epi32((128 - 16) << 23), naninf_mask);
+  // extra exp adjust for  Inf/NaN
+  ou = _mm_add_epi32(ou, naninf_adj);
+
+  // Zero/Denormal?
+  __m128i zeroden_mask = _mm_cmpeq_epi32(exp, _mm_setzero_si128());
+  __m128i zeroden_adj = _mm_and_si128(zeroden_mask, _mm_set1_epi32(1 << 23));
+  // o.u += 1 << 23;
+  ou = _mm_add_epi32(ou, zeroden_adj);
+  // magic.u = 113 << 23
+  __m128i magic = _mm_and_si128(zeroden_mask, _mm_set1_epi32(113 << 23));
+  // o.f -= magic.f
+  ou = _mm_castps_si128(
+      _mm_sub_ps(_mm_castsi128_ps(ou), _mm_castsi128_ps(magic)));
+
+  __m128i sign =
+      _mm_slli_epi32(_mm_and_si128(input, _mm_set1_epi32(0x8000)), 16);
+  // o.u |= (h.x & 0x8000) << 16;    // sign bit
+  ou = _mm_or_si128(ou, sign);
+  // return o.f;
+  // We are actually returning uint version, to make
+  // _mm256_insertf128_si256 work.
+  return ou;
+}
+
+EIGEN_STRONG_INLINE __m128i float2half(__m128 f) {
+  __m128i o = _mm_setzero_si128();
+
+  // unsigned int sign_mask = 0x80000000u;
+  __m128i sign = _mm_set1_epi32(0x80000000u);
+  // unsigned int sign = f.u & sign_mask;
+  sign = _mm_and_si128(sign, _mm_castps_si128(f));
+  // f.u ^= sign;
+  f = _mm_xor_ps(f, _mm_castsi128_ps(sign));
+
+  __m128i fu = _mm_castps_si128(f);
+
+  __m128i f16max = _mm_set1_epi32((127 + 16) << 23);
+  __m128i f32infty = _mm_set1_epi32(255 << 23);
+  // if (f.u >= f16max.u) // result is Inf or NaN (all exponent bits set)
+  // there is no _mm_cmpge_epi32, so use lt and swap operands
+  __m128i infnan_mask = _mm_cmplt_epi32(f16max, _mm_castps_si128(f));
+  __m128i inf_mask = _mm_cmpgt_epi32(_mm_castps_si128(f), f32infty);
+  __m128i nan_mask = _mm_andnot_si128(inf_mask, infnan_mask);
+  __m128i inf_value = _mm_and_si128(inf_mask, _mm_set1_epi32(0x7e00));
+  __m128i nan_value = _mm_and_si128(nan_mask, _mm_set1_epi32(0x7c00));
+  // o.x = (f.u > f32infty.u) ? 0x7e00 : 0x7c00; // NaN->qNaN and Inf->Inf
+  __m128i naninf_value = _mm_or_si128(inf_value, nan_value);
+
+  __m128i denorm_magic = _mm_set1_epi32(((127 - 15) + (23 - 10) + 1) << 23);
+  __m128i subnorm_mask =
+      _mm_cmplt_epi32(_mm_castps_si128(f), _mm_set1_epi32(113 << 23));
+  //  f.f += denorm_magic.f;
+  f = _mm_add_ps(f, _mm_castsi128_ps(denorm_magic));
+  // f.u - denorm_magic.u
+  o = _mm_sub_epi32(_mm_castps_si128(f), denorm_magic);
+  o = _mm_and_si128(o, subnorm_mask);
+  // Correct result for inf/nan/zero/subnormal, 0 otherwise
+  o = _mm_or_si128(o, naninf_value);
+
+  __m128i mask = _mm_or_si128(infnan_mask, subnorm_mask);
+  o = _mm_and_si128(o, mask);
+
+  // mant_odd = (f.u >> 13) & 1;
+  __m128i mand_odd = _mm_and_si128(_mm_srli_epi32(fu, 13), _mm_set1_epi32(0x1));
+  // f.u += 0xc8000fffU;
+  fu = _mm_add_epi32(fu, _mm_set1_epi32(0xc8000fffU));
+  // f.u += mant_odd;
+  fu = _mm_add_epi32(fu, mand_odd);
+  fu = _mm_andnot_si128(mask, fu);
+  // f.u >> 13
+  fu = _mm_srli_epi32(fu, 13);
+  o = _mm_or_si128(fu, o);
+
+  // o.x |= static_cast<numext::uint16_t>(sign >> 16);
+  o = _mm_or_si128(o, _mm_srli_epi32(sign, 16));
+
+  // 16 bit values
+  return _mm_and_si128(o, _mm_set1_epi32(0xffff));
+}
+#endif
 
 // Packet math for Eigen::half
 // Disable the following code since it's broken on too many platforms / compilers.
diff --git a/libs/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h b/libs/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h
index d2a0037..a6346ea 100644
--- a/libs/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h
+++ b/libs/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_TYPE_CASTING_SSE_H
 #define EIGEN_TYPE_CASTING_SSE_H
 
+#include "../../InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
@@ -69,6 +71,14 @@ template<> EIGEN_STRONG_INLINE Packet2d pcast<Packet4f, Packet2d>(const Packet4f
   return _mm_cvtps_pd(a);
 }
 
+template<> EIGEN_STRONG_INLINE Packet2d preinterpret<Packet2d, Packet4f>(const Packet4f& a) {
+  return _mm_castps_pd(a);
+}
+
+template<> EIGEN_STRONG_INLINE Packet4f preinterpret<Packet4f, Packet2d>(const Packet2d& a) {
+  return _mm_castpd_ps(a);
+}
+
 template<> EIGEN_STRONG_INLINE Packet4i preinterpret<Packet4i,Packet4f>(const Packet4f& a) {
   return _mm_castps_si128(a);
 }
diff --git a/libs/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h b/libs/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h
index b139ea2..8b588b1 100644
--- a/libs/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h
+++ b/libs/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h
@@ -10,32 +10,34 @@
 #ifndef EIGEN_MATH_FUNCTIONS_SVE_H
 #define EIGEN_MATH_FUNCTIONS_SVE_H
 
+#include "../../InternalHeaderCheck.h"
+
 namespace Eigen {
 namespace internal {
 
 template <>
-EIGEN_STRONG_INLINE EIGEN_UNUSED PacketXf pexp<PacketXf>(const PacketXf& x) {
+EIGEN_STRONG_INLINE PacketXf pexp<PacketXf>(const PacketXf& x) {
   return pexp_float(x);
 }
 
 template <>
-EIGEN_STRONG_INLINE EIGEN_UNUSED PacketXf plog<PacketXf>(const PacketXf& x) {
+EIGEN_STRONG_INLINE PacketXf plog<PacketXf>(const PacketXf& x) {
   return plog_float(x);
 }
 
 template <>
-EIGEN_STRONG_INLINE EIGEN_UNUSED PacketXf psin<PacketXf>(const PacketXf& x) {
+EIGEN_STRONG_INLINE PacketXf psin<PacketXf>(const PacketXf& x) {
   return psin_float(x);
 }
 
 template <>
-EIGEN_STRONG_INLINE EIGEN_UNUSED PacketXf pcos<PacketXf>(const PacketXf& x) {
+EIGEN_STRONG_INLINE PacketXf pcos<PacketXf>(const PacketXf& x) {
   return pcos_float(x);
 }
 
 // Hyperbolic Tangent function.
 template <>
-EIGEN_STRONG_INLINE EIGEN_UNUSED PacketXf ptanh<PacketXf>(const PacketXf& x) {
+EIGEN_STRONG_INLINE PacketXf ptanh<PacketXf>(const PacketXf& x) {
   return internal::generic_fast_tanh_float(x);
 }
 }  // end namespace internal
diff --git a/libs/eigen/Eigen/src/Core/arch/SVE/PacketMath.h b/libs/eigen/Eigen/src/Core/arch/SVE/PacketMath.h
index 9060b37..9c106b3 100644
--- a/libs/eigen/Eigen/src/Core/arch/SVE/PacketMath.h
+++ b/libs/eigen/Eigen/src/Core/arch/SVE/PacketMath.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_PACKET_MATH_SVE_H
 #define EIGEN_PACKET_MATH_SVE_H
 
+#include "../../InternalHeaderCheck.h"
+
 namespace Eigen
 {
 namespace internal
@@ -149,7 +151,7 @@ EIGEN_STRONG_INLINE PacketXi pmax<PacketXi>(const PacketXi& a, const PacketXi& b
 template <>
 EIGEN_STRONG_INLINE PacketXi pcmp_le<PacketXi>(const PacketXi& a, const PacketXi& b)
 {
-  return svdup_n_s32_z(svcmplt_s32(svptrue_b32(), a, b), 0xffffffffu);
+  return svdup_n_s32_z(svcmple_s32(svptrue_b32(), a, b), 0xffffffffu);
 }
 
 template <>
@@ -209,13 +211,13 @@ EIGEN_STRONG_INLINE PacketXi parithmetic_shift_right(PacketXi a)
 template <int N>
 EIGEN_STRONG_INLINE PacketXi plogical_shift_right(PacketXi a)
 {
-  return svreinterpret_s32_u32(svlsr_u32_z(svptrue_b32(), svreinterpret_u32_s32(a), svdup_n_u32_z(svptrue_b32(), N)));
+  return svreinterpret_s32_u32(svlsr_n_u32_z(svptrue_b32(), svreinterpret_u32_s32(a), N));
 }
 
 template <int N>
 EIGEN_STRONG_INLINE PacketXi plogical_shift_left(PacketXi a)
 {
-  return svlsl_s32_z(svptrue_b32(), a, svdup_n_u32_z(svptrue_b32(), N));
+  return svlsl_n_s32_z(svptrue_b32(), a, N);
 }
 
 template <>
@@ -523,7 +525,7 @@ EIGEN_STRONG_INLINE PacketXf pmax<PropagateNumbers, PacketXf>(const PacketXf& a,
 template <>
 EIGEN_STRONG_INLINE PacketXf pcmp_le<PacketXf>(const PacketXf& a, const PacketXf& b)
 {
-  return svreinterpret_f32_u32(svdup_n_u32_z(svcmplt_f32(svptrue_b32(), a, b), 0xffffffffu));
+  return svreinterpret_f32_u32(svdup_n_u32_z(svcmple_f32(svptrue_b32(), a, b), 0xffffffffu));
 }
 
 template <>
diff --git a/libs/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h b/libs/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h
index 7ba5d9c..1067a41 100644
--- a/libs/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h
+++ b/libs/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_TYPE_CASTING_SVE_H
 #define EIGEN_TYPE_CASTING_SVE_H
 
+#include "../../InternalHeaderCheck.h"
+
 namespace Eigen {
 namespace internal {
 
diff --git a/libs/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h b/libs/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h
index 10856ff..57bfb69 100644
--- a/libs/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h
+++ b/libs/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h
@@ -21,6 +21,8 @@
 #ifndef EIGEN_INTEROP_HEADERS_SYCL_H
 #define EIGEN_INTEROP_HEADERS_SYCL_H
 
+#include "../../InternalHeaderCheck.h"
+
 namespace Eigen {
 
 #if !defined(EIGEN_DONT_VECTORIZE_SYCL)
diff --git a/libs/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h b/libs/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h
index 2ab0f2a..9eb46bb 100644
--- a/libs/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h
+++ b/libs/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h
@@ -20,6 +20,8 @@
 
 #ifndef EIGEN_MATH_FUNCTIONS_SYCL_H
 #define EIGEN_MATH_FUNCTIONS_SYCL_H
+#include "../../InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
diff --git a/libs/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h b/libs/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h
index 87badc0..5bc3235 100644
--- a/libs/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h
+++ b/libs/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h
@@ -21,6 +21,8 @@
 #ifndef EIGEN_PACKET_MATH_SYCL_H
 #define EIGEN_PACKET_MATH_SYCL_H
 #include <type_traits>
+#include "../../InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
@@ -475,25 +477,19 @@ pabs<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) {
 template <typename Packet>
 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet sycl_pcmp_le(const Packet &a,
                                                           const Packet &b) {
-  return ((a <= b)
-              .template convert<typename unpacket_traits<Packet>::type,
-                                cl::sycl::rounding_mode::automatic>());
+  return (a <= b).template as<Packet>();
 }
 
 template <typename Packet>
 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet sycl_pcmp_lt(const Packet &a,
                                                           const Packet &b) {
-  return ((a < b)
-              .template convert<typename unpacket_traits<Packet>::type,
-                                cl::sycl::rounding_mode::automatic>());
+  return (a < b).template as<Packet>();
 }
 
 template <typename Packet>
 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet sycl_pcmp_eq(const Packet &a,
                                                           const Packet &b) {
-  return ((a == b)
-              .template convert<typename unpacket_traits<Packet>::type,
-                                cl::sycl::rounding_mode::automatic>());
+  return (a == b).template as<Packet>();
 }
 
 #define SYCL_PCMP(OP, TYPE)                                                    \
@@ -511,76 +507,6 @@ SYCL_PCMP(lt, cl::sycl::cl_double2)
 SYCL_PCMP(eq, cl::sycl::cl_double2)
 #undef SYCL_PCMP
 
-template <typename T> struct convert_to_integer;
-
-template <> struct convert_to_integer<float> {
-  using type = std::int32_t;
-  using packet_type = cl::sycl::cl_int4;
-};
-template <> struct convert_to_integer<double> {
-  using type = std::int64_t;
-  using packet_type = cl::sycl::cl_long2;
-};
-
-template <typename PacketIn>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename convert_to_integer<
-    typename unpacket_traits<PacketIn>::type>::packet_type
-vector_as_int(const PacketIn &p) {
-  return (
-      p.template convert<typename convert_to_integer<
-                             typename unpacket_traits<PacketIn>::type>::type,
-                         cl::sycl::rounding_mode::automatic>());
-}
-
-template <typename packetOut, typename PacketIn>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packetOut
-convert_vector(const PacketIn &p) {
-  return (p.template convert<typename unpacket_traits<packetOut>::type,
-                             cl::sycl::rounding_mode::automatic>());
-}
-
-#define SYCL_PAND(TYPE)                                                        \
-  template <>                                                                  \
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TYPE pand<TYPE>(const TYPE &a,         \
-                                                        const TYPE &b) {       \
-    return convert_vector<TYPE>(vector_as_int(a) & vector_as_int(b));          \
-  }
-SYCL_PAND(cl::sycl::cl_float4)
-SYCL_PAND(cl::sycl::cl_double2)
-#undef SYCL_PAND
-
-#define SYCL_POR(TYPE)                                                         \
-  template <>                                                                  \
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TYPE por<TYPE>(const TYPE &a,          \
-                                                       const TYPE &b) {        \
-    return convert_vector<TYPE>(vector_as_int(a) | vector_as_int(b));          \
-  }
-
-SYCL_POR(cl::sycl::cl_float4)
-SYCL_POR(cl::sycl::cl_double2)
-#undef SYCL_POR
-
-#define SYCL_PXOR(TYPE)                                                        \
-  template <>                                                                  \
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TYPE pxor<TYPE>(const TYPE &a,         \
-                                                        const TYPE &b) {       \
-    return convert_vector<TYPE>(vector_as_int(a) ^ vector_as_int(b));          \
-  }
-
-SYCL_PXOR(cl::sycl::cl_float4)
-SYCL_PXOR(cl::sycl::cl_double2)
-#undef SYCL_PXOR
-
-#define SYCL_PANDNOT(TYPE)                                                     \
-  template <>                                                                  \
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TYPE pandnot<TYPE>(const TYPE &a,      \
-                                                           const TYPE &b) {    \
-    return convert_vector<TYPE>(vector_as_int(a) & (~vector_as_int(b)));       \
-  }
-SYCL_PANDNOT(cl::sycl::cl_float4)
-SYCL_PANDNOT(cl::sycl::cl_double2)
-#undef SYCL_PANDNOT
-
 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void ptranspose(
     PacketBlock<cl::sycl::cl_float4, 4>& kernel) {
   float tmp = kernel.packet[0].y();
diff --git a/libs/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h b/libs/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h
index f81e59d..54eedfa 100644
--- a/libs/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h
+++ b/libs/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h
@@ -33,6 +33,8 @@
 #include <set>
 #include <unordered_map>
 
+#include "../../InternalHeaderCheck.h"
+
 namespace Eigen {
 namespace TensorSycl {
 namespace internal {
@@ -139,7 +141,7 @@ class PointerMapper {
 
   /* basic type for all buffers
    */
-  using buffer_t = cl::sycl::buffer_mem;
+  using buffer_t = cl::sycl::buffer<buffer_data_type_t>;
 
   /**
    * Node that stores information about a device allocation.
@@ -166,7 +168,7 @@ class PointerMapper {
   /**
    * Obtain the insertion point in the pointer map for
    * a pointer of the given size.
-   * \param requiredSize Size attemted to reclaim
+   * \param requiredSize Size attempted to reclaim
    */
   typename pointerMap_t::iterator get_insertion_point(size_t requiredSize) {
     typename pointerMap_t::iterator retVal;
@@ -235,17 +237,14 @@ class PointerMapper {
   template <typename buffer_data_type = buffer_data_type_t>
   cl::sycl::buffer<buffer_data_type, 1> get_buffer(
       const virtual_pointer_t ptr) {
-    using sycl_buffer_t = cl::sycl::buffer<buffer_data_type, 1>;
 
-    // get_node() returns a `buffer_mem`, so we need to cast it to a `buffer<>`.
-    // We can do this without the `buffer_mem` being a pointer, as we
-    // only declare member variables in the base class (`buffer_mem`) and not in
-    // the child class (`buffer<>).
     auto node = get_node(ptr);
+    auto& map_node = node->second;
     eigen_assert(node->first == ptr || node->first < ptr);
-    eigen_assert(ptr < static_cast<virtual_pointer_t>(node->second.m_size +
+    eigen_assert(ptr < static_cast<virtual_pointer_t>(map_node.m_size +
                                                       node->first));
-    return *(static_cast<sycl_buffer_t *>(&node->second.m_buffer));
+    return map_node.m_buffer.reinterpret<buffer_data_type>(
+        cl::sycl::range<1>{map_node.m_size / sizeof(buffer_data_type)});
   }
 
   /**
@@ -427,8 +426,11 @@ class PointerMapper {
   template <class BufferT>
   virtual_pointer_t add_pointer_impl(BufferT b) {
     virtual_pointer_t retVal = nullptr;
-    size_t bufSize = b.get_count();
-    pMapNode_t p{b, bufSize, false};
+    size_t bufSize = b.get_count() * sizeof(buffer_data_type_t);
+    auto byte_buffer =
+        b.template reinterpret<buffer_data_type_t>(cl::sycl::range<1>{bufSize});
+    pMapNode_t p{byte_buffer, bufSize, false};
+
     // If this is the first pointer:
     if (m_pointerMap.empty()) {
       virtual_pointer_t initialVal{m_baseAddress};
diff --git a/libs/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h b/libs/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h
index 9208ab2..613e823 100644
--- a/libs/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h
+++ b/libs/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h
@@ -21,6 +21,8 @@
 #ifndef EIGEN_TYPE_CASTING_SYCL_H
 #define EIGEN_TYPE_CASTING_SYCL_H
 
+#include "../../InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
diff --git a/libs/eigen/Eigen/src/Core/arch/ZVector/Complex.h b/libs/eigen/Eigen/src/Core/arch/ZVector/Complex.h
index 0b9b33d..df5c8d4 100644
--- a/libs/eigen/Eigen/src/Core/arch/ZVector/Complex.h
+++ b/libs/eigen/Eigen/src/Core/arch/ZVector/Complex.h
@@ -8,8 +8,10 @@
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
-#ifndef EIGEN_COMPLEX32_ALTIVEC_H
-#define EIGEN_COMPLEX32_ALTIVEC_H
+#ifndef EIGEN_COMPLEX32_ZVECTOR_H
+#define EIGEN_COMPLEX32_ZVECTOR_H
+
+#include "../../InternalHeaderCheck.h"
 
 namespace Eigen {
 
@@ -91,8 +93,18 @@ template<> struct packet_traits<std::complex<double> >  : default_packet_traits
   };
 };
 
-template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float>  type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet2cf half; };
-template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet1cd half; };
+template<> struct unpacket_traits<Packet2cf> {
+  typedef std::complex<float>  type;
+  enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false};
+  typedef Packet2cf half;
+  typedef Packet4f as_real;
+};
+template<> struct unpacket_traits<Packet1cd> {
+  typedef std::complex<double> type;
+  enum {size=1, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false};
+  typedef Packet1cd half;
+  typedef Packet2d as_real;
+};
 
 /* Forward declaration */
 EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2cf,2>& kernel);
@@ -150,7 +162,7 @@ template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::c
 
 template<> EIGEN_STRONG_INLINE std::complex<double>  pfirst<Packet1cd>(const Packet1cd& a)
 {
-  std::complex<double> EIGEN_ALIGN16 res;
+  EIGEN_ALIGN16 std::complex<double> res;
   pstore<std::complex<double> >(&res, a);
 
   return res;
@@ -169,10 +181,7 @@ EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
 
 template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
 {
-  // TODO optimize it for AltiVec
-  Packet1cd res = pmul(a,pconj(b));
-  Packet2d s = vec_madd(b.v, b.v, p2d_ZERO_);
-  return Packet1cd(pdiv(res.v, s + vec_perm(s, s, p16uc_REVERSE64)));
+  return pdiv_complex(a, b);
 }
 
 EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x)
@@ -195,7 +204,7 @@ template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<f
 
 template<> EIGEN_STRONG_INLINE std::complex<float>  pfirst<Packet2cf>(const Packet2cf& a)
 {
-  std::complex<float> EIGEN_ALIGN16 res[2];
+  EIGEN_ALIGN16 std::complex<float> res[2];
   pstore<std::complex<float> >(res, a);
 
   return res[0];
@@ -225,14 +234,14 @@ template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<flo
 
 template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, Index stride)
 {
-  std::complex<float> EIGEN_ALIGN16 af[2];
+  EIGEN_ALIGN16 std::complex<float> af[2];
   af[0] = from[0*stride];
   af[1] = from[1*stride];
   return pload<Packet2cf>(af);
 }
 template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, Index stride)
 {
-  std::complex<float> EIGEN_ALIGN16 af[2];
+  EIGEN_ALIGN16 std::complex<float> af[2];
   pstore<std::complex<float> >((std::complex<float> *) af, from);
   to[0*stride] = af[0];
   to[1*stride] = af[1];
@@ -308,11 +317,7 @@ EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
 
 template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
 {
-  // TODO optimize it for AltiVec
-  Packet2cf res;
-  res.cd[0] = pdiv<Packet1cd>(a.cd[0], b.cd[0]);
-  res.cd[1] = pdiv<Packet1cd>(a.cd[1], b.cd[1]);
-  return res;
+  return pdiv_complex(a, b);
 }
 
 EIGEN_STRONG_INLINE Packet2cf pcplxflip/*<Packet2cf>*/(const Packet2cf& x)
@@ -394,10 +399,7 @@ EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
 
 template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
 {
-  // TODO optimize it for AltiVec
-  Packet2cf res = pmul(a, pconj(b));
-  Packet4f s = pmul<Packet4f>(b.v, b.v);
-  return Packet2cf(pdiv(res.v, padd<Packet4f>(s, vec_perm(s, s, p16uc_COMPLEX32_REV))));
+  return pdiv_complex(a, b);
 }
 
 template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& x)
@@ -423,4 +425,4 @@ template<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, con
 
 } // end namespace Eigen
 
-#endif // EIGEN_COMPLEX32_ALTIVEC_H
+#endif // EIGEN_COMPLEX32_ZVECTOR_H
diff --git a/libs/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h b/libs/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h
index 1635e12..1f2da26 100644
--- a/libs/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h
+++ b/libs/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h
@@ -13,79 +13,81 @@
  * Julien Pommier's sse math library: http://gruntthepeon.free.fr/ssemath/
  */
 
-#ifndef EIGEN_MATH_FUNCTIONS_ALTIVEC_H
-#define EIGEN_MATH_FUNCTIONS_ALTIVEC_H
+#ifndef EIGEN_MATH_FUNCTIONS_ZVECTOR_H
+#define EIGEN_MATH_FUNCTIONS_ZVECTOR_H
+
+#include "../../InternalHeaderCheck.h"
 
 namespace Eigen {
 
 namespace internal {
 
 #if !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ >= 12)
-static _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
-static _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
-static _EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);
-static _EIGEN_DECLARE_CONST_Packet4i(23, 23);
+static EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
+static EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
+static EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);
+static EIGEN_DECLARE_CONST_Packet4i(23, 23);
 
-static _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inv_mant_mask, ~0x7f800000);
+static EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inv_mant_mask, ~0x7f800000);
 
 /* the smallest non denormalized float number */
-static _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(min_norm_pos,  0x00800000);
-static _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(minus_inf,     0xff800000); // -1.f/0.f
-static _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(minus_nan,     0xffffffff);
+static EIGEN_DECLARE_CONST_Packet4f_FROM_INT(min_norm_pos,  0x00800000);
+static EIGEN_DECLARE_CONST_Packet4f_FROM_INT(minus_inf,     0xff800000); // -1.f/0.f
+static EIGEN_DECLARE_CONST_Packet4f_FROM_INT(minus_nan,     0xffffffff);
   
 /* natural logarithm computed for 4 simultaneous float
   return NaN for x <= 0
 */
-static _EIGEN_DECLARE_CONST_Packet4f(cephes_SQRTHF, 0.707106781186547524f);
-static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p0, 7.0376836292E-2f);
-static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p1, - 1.1514610310E-1f);
-static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p2, 1.1676998740E-1f);
-static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p3, - 1.2420140846E-1f);
-static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p4, + 1.4249322787E-1f);
-static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p5, - 1.6668057665E-1f);
-static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p6, + 2.0000714765E-1f);
-static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p7, - 2.4999993993E-1f);
-static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p8, + 3.3333331174E-1f);
-static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q1, -2.12194440e-4f);
-static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q2, 0.693359375f);
+static EIGEN_DECLARE_CONST_Packet4f(cephes_SQRTHF, 0.707106781186547524f);
+static EIGEN_DECLARE_CONST_Packet4f(cephes_log_p0, 7.0376836292E-2f);
+static EIGEN_DECLARE_CONST_Packet4f(cephes_log_p1, - 1.1514610310E-1f);
+static EIGEN_DECLARE_CONST_Packet4f(cephes_log_p2, 1.1676998740E-1f);
+static EIGEN_DECLARE_CONST_Packet4f(cephes_log_p3, - 1.2420140846E-1f);
+static EIGEN_DECLARE_CONST_Packet4f(cephes_log_p4, + 1.4249322787E-1f);
+static EIGEN_DECLARE_CONST_Packet4f(cephes_log_p5, - 1.6668057665E-1f);
+static EIGEN_DECLARE_CONST_Packet4f(cephes_log_p6, + 2.0000714765E-1f);
+static EIGEN_DECLARE_CONST_Packet4f(cephes_log_p7, - 2.4999993993E-1f);
+static EIGEN_DECLARE_CONST_Packet4f(cephes_log_p8, + 3.3333331174E-1f);
+static EIGEN_DECLARE_CONST_Packet4f(cephes_log_q1, -2.12194440e-4f);
+static EIGEN_DECLARE_CONST_Packet4f(cephes_log_q2, 0.693359375f);
 
-static _EIGEN_DECLARE_CONST_Packet4f(exp_hi,  88.3762626647950f);
-static _EIGEN_DECLARE_CONST_Packet4f(exp_lo, -88.3762626647949f);
+static EIGEN_DECLARE_CONST_Packet4f(exp_hi,  88.3762626647950f);
+static EIGEN_DECLARE_CONST_Packet4f(exp_lo, -88.3762626647949f);
 
-static _EIGEN_DECLARE_CONST_Packet4f(cephes_LOG2EF, 1.44269504088896341f);
-static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C1, 0.693359375f);
-static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C2, -2.12194440e-4f);
+static EIGEN_DECLARE_CONST_Packet4f(cephes_LOG2EF, 1.44269504088896341f);
+static EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C1, 0.693359375f);
+static EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C2, -2.12194440e-4f);
 
-static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p0, 1.9875691500E-4f);
-static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p1, 1.3981999507E-3f);
-static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p2, 8.3334519073E-3f);
-static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p3, 4.1665795894E-2f);
-static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p4, 1.6666665459E-1f);
-static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p5, 5.0000001201E-1f);
+static EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p0, 1.9875691500E-4f);
+static EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p1, 1.3981999507E-3f);
+static EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p2, 8.3334519073E-3f);
+static EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p3, 4.1665795894E-2f);
+static EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p4, 1.6666665459E-1f);
+static EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p5, 5.0000001201E-1f);
 #endif
 
-static _EIGEN_DECLARE_CONST_Packet2d(1 , 1.0);
-static _EIGEN_DECLARE_CONST_Packet2d(2 , 2.0);
-static _EIGEN_DECLARE_CONST_Packet2d(half, 0.5);
+static EIGEN_DECLARE_CONST_Packet2d(1 , 1.0);
+static EIGEN_DECLARE_CONST_Packet2d(2 , 2.0);
+static EIGEN_DECLARE_CONST_Packet2d(half, 0.5);
 
-static _EIGEN_DECLARE_CONST_Packet2d(exp_hi,  709.437);
-static _EIGEN_DECLARE_CONST_Packet2d(exp_lo, -709.436139303);
+static EIGEN_DECLARE_CONST_Packet2d(exp_hi,  709.437);
+static EIGEN_DECLARE_CONST_Packet2d(exp_lo, -709.436139303);
 
-static _EIGEN_DECLARE_CONST_Packet2d(cephes_LOG2EF, 1.4426950408889634073599);
+static EIGEN_DECLARE_CONST_Packet2d(cephes_LOG2EF, 1.4426950408889634073599);
 
-static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p0, 1.26177193074810590878e-4);
-static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p1, 3.02994407707441961300e-2);
-static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p2, 9.99999999999999999910e-1);
+static EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p0, 1.26177193074810590878e-4);
+static EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p1, 3.02994407707441961300e-2);
+static EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p2, 9.99999999999999999910e-1);
 
-static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q0, 3.00198505138664455042e-6);
-static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q1, 2.52448340349684104192e-3);
-static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q2, 2.27265548208155028766e-1);
-static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q3, 2.00000000000000000009e0);
+static EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q0, 3.00198505138664455042e-6);
+static EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q1, 2.52448340349684104192e-3);
+static EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q2, 2.27265548208155028766e-1);
+static EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q3, 2.00000000000000000009e0);
 
-static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C1, 0.693145751953125);
-static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C2, 1.42860682030941723212e-6);
+static EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C1, 0.693145751953125);
+static EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C2, 1.42860682030941723212e-6);
 
-template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet2d pexp<Packet2d>(const Packet2d& _x)
 {
   Packet2d x = _x;
@@ -136,7 +138,7 @@ Packet2d pexp<Packet2d>(const Packet2d& _x)
                  isnumber_mask);
 }
 
-template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet4f pexp<Packet4f>(const Packet4f& _x)
 {
 #if !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ >= 12)
@@ -183,13 +185,13 @@ Packet4f pexp<Packet4f>(const Packet4f& _x)
 #endif
 }
 
-template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet2d psqrt<Packet2d>(const Packet2d& x)
 {
   return vec_sqrt(x);
 }
 
-template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet4f psqrt<Packet4f>(const Packet4f& x)
 {
   Packet4f res;
@@ -202,12 +204,12 @@ Packet4f psqrt<Packet4f>(const Packet4f& x)
   return res;
 }
 
-template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet2d prsqrt<Packet2d>(const Packet2d& x) {
   return pset1<Packet2d>(1.0) / psqrt<Packet2d>(x);
 }
 
-template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet4f prsqrt<Packet4f>(const Packet4f& x) {
   Packet4f res;
 #if !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ >= 12)
@@ -221,7 +223,7 @@ Packet4f prsqrt<Packet4f>(const Packet4f& x) {
 
 // Hyperbolic Tangent function.
 template <>
-EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f
 ptanh<Packet4f>(const Packet4f& x) {
   return internal::generic_fast_tanh_float(x);
 }
@@ -230,4 +232,4 @@ ptanh<Packet4f>(const Packet4f& x) {
 
 }  // end namespace Eigen
 
-#endif  // EIGEN_MATH_FUNCTIONS_ALTIVEC_H
+#endif  // EIGEN_MATH_FUNCTIONS_ZVECTOR_H
diff --git a/libs/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h b/libs/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h
old mode 100755
new mode 100644
index 1f55a90..26b6f0d
--- a/libs/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h
+++ b/libs/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_PACKET_MATH_ZVECTOR_H
 #define EIGEN_PACKET_MATH_ZVECTOR_H
 
+#include "../../InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
@@ -64,48 +66,48 @@ typedef union {
 // We don't want to write the same code all the time, but we need to reuse the constants
 // and it doesn't really work to declare them global, so we define macros instead
 
-#define _EIGEN_DECLARE_CONST_FAST_Packet4i(NAME,X) \
+#define EIGEN_DECLARE_CONST_FAST_Packet4i(NAME,X) \
   Packet4i p4i_##NAME = reinterpret_cast<Packet4i>(vec_splat_s32(X))
 
-#define _EIGEN_DECLARE_CONST_FAST_Packet2d(NAME,X) \
+#define EIGEN_DECLARE_CONST_FAST_Packet2d(NAME,X) \
   Packet2d p2d_##NAME = reinterpret_cast<Packet2d>(vec_splat_s64(X))
 
-#define _EIGEN_DECLARE_CONST_FAST_Packet2l(NAME,X) \
+#define EIGEN_DECLARE_CONST_FAST_Packet2l(NAME,X) \
   Packet2l p2l_##NAME = reinterpret_cast<Packet2l>(vec_splat_s64(X))
 
-#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
+#define EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
   Packet4i p4i_##NAME = pset1<Packet4i>(X)
 
-#define _EIGEN_DECLARE_CONST_Packet2d(NAME,X) \
+#define EIGEN_DECLARE_CONST_Packet2d(NAME,X) \
   Packet2d p2d_##NAME = pset1<Packet2d>(X)
 
-#define _EIGEN_DECLARE_CONST_Packet2l(NAME,X) \
+#define EIGEN_DECLARE_CONST_Packet2l(NAME,X) \
   Packet2l p2l_##NAME = pset1<Packet2l>(X)
 
 // These constants are endian-agnostic
-static _EIGEN_DECLARE_CONST_FAST_Packet4i(ZERO, 0); //{ 0, 0, 0, 0,}
-static _EIGEN_DECLARE_CONST_FAST_Packet4i(ONE, 1); //{ 1, 1, 1, 1}
+static EIGEN_DECLARE_CONST_FAST_Packet4i(ZERO, 0); //{ 0, 0, 0, 0,}
+static EIGEN_DECLARE_CONST_FAST_Packet4i(ONE, 1); //{ 1, 1, 1, 1}
 
-static _EIGEN_DECLARE_CONST_FAST_Packet2d(ZERO, 0);
-static _EIGEN_DECLARE_CONST_FAST_Packet2l(ZERO, 0);
-static _EIGEN_DECLARE_CONST_FAST_Packet2l(ONE, 1);
+static EIGEN_DECLARE_CONST_FAST_Packet2d(ZERO, 0);
+static EIGEN_DECLARE_CONST_FAST_Packet2l(ZERO, 0);
+static EIGEN_DECLARE_CONST_FAST_Packet2l(ONE, 1);
 
 static Packet2d p2d_ONE = { 1.0, 1.0 };
-static Packet2d p2d_ZERO_ = { numext::bit_cast<double>0x8000000000000000ull),
-                              numext::bit_cast<double>0x8000000000000000ull) };
+static Packet2d p2d_ZERO_ = { numext::bit_cast<double>(0x8000000000000000ull),
+                              numext::bit_cast<double>(0x8000000000000000ull) };
 
 #if !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ >= 12)
-#define _EIGEN_DECLARE_CONST_FAST_Packet4f(NAME,X) \
+#define EIGEN_DECLARE_CONST_FAST_Packet4f(NAME,X) \
   Packet4f p4f_##NAME = reinterpret_cast<Packet4f>(vec_splat_s32(X))
 
-#define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
+#define EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
   Packet4f p4f_##NAME = pset1<Packet4f>(X)
 
-#define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
+#define EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
   const Packet4f p4f_##NAME = reinterpret_cast<Packet4f>(pset1<Packet4i>(X))
 
-static _EIGEN_DECLARE_CONST_FAST_Packet4f(ZERO, 0); //{ 0.0, 0.0, 0.0, 0.0}
-static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS1,-1); //{ -1, -1, -1, -1}
+static EIGEN_DECLARE_CONST_FAST_Packet4f(ZERO, 0); //{ 0.0, 0.0, 0.0, 0.0}
+static EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS1,-1); //{ -1, -1, -1, -1}
 static Packet4f p4f_MZERO = { 0x80000000, 0x80000000, 0x80000000, 0x80000000};
 #endif
 
@@ -117,9 +119,9 @@ static Packet16uc p16uc_PSET64_HI = { 0,1,2,3, 4,5,6,7, 0,1,2,3, 4,5,6,7 };
 static Packet16uc p16uc_DUPLICATE32_HI = { 0,1,2,3, 0,1,2,3, 4,5,6,7, 4,5,6,7 };
 
 // Mask alignment
-#define _EIGEN_MASK_ALIGNMENT	0xfffffffffffffff0
+#define EIGEN_MASK_ALIGNMENT	0xfffffffffffffff0
 
-#define _EIGEN_ALIGNED_PTR(x)	((std::ptrdiff_t)(x) & _EIGEN_MASK_ALIGNMENT)
+#define EIGEN_ALIGNED_PTR(x)	((std::ptrdiff_t)(x) & EIGEN_MASK_ALIGNMENT)
 
 // Handle endianness properly while loading constants
 // Define global static constants:
@@ -358,7 +360,7 @@ pbroadcast4<Packet2d>(const double *a,
 
 template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, Index stride)
 {
-  int EIGEN_ALIGN16 ai[4];
+  EIGEN_ALIGN16 int ai[4];
   ai[0] = from[0*stride];
   ai[1] = from[1*stride];
   ai[2] = from[2*stride];
@@ -368,7 +370,7 @@ template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* f
 
 template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, Index stride)
 {
-  double EIGEN_ALIGN16 af[2];
+  EIGEN_ALIGN16 double af[2];
   af[0] = from[0*stride];
   af[1] = from[1*stride];
  return pload<Packet2d>(af);
@@ -376,7 +378,7 @@ template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const dou
 
 template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, Index stride)
 {
-  int EIGEN_ALIGN16 ai[4];
+  EIGEN_ALIGN16 int ai[4];
   pstore<int>((int *)ai, from);
   to[0*stride] = ai[0];
   to[1*stride] = ai[1];
@@ -386,7 +388,7 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const
 
 template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, Index stride)
 {
-  double EIGEN_ALIGN16 af[2];
+  EIGEN_ALIGN16 double af[2];
   pstore<double>(af, from);
   to[0*stride] = af[0];
   to[1*stride] = af[1];
@@ -460,8 +462,8 @@ template<> EIGEN_STRONG_INLINE void pstoreu<double>(double*  to, const Packet2d&
 template<> EIGEN_STRONG_INLINE void prefetch<int>(const int*       addr) { EIGEN_ZVECTOR_PREFETCH(addr); }
 template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { EIGEN_ZVECTOR_PREFETCH(addr); }
 
-template<> EIGEN_STRONG_INLINE int    pfirst<Packet4i>(const Packet4i& a) { int    EIGEN_ALIGN16 x[4]; pstore(x, a); return x[0]; }
-template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { double EIGEN_ALIGN16 x[2]; pstore(x, a); return x[0]; }
+template<> EIGEN_STRONG_INLINE int    pfirst<Packet4i>(const Packet4i& a) { EIGEN_ALIGN16 int    x[4]; pstore(x, a); return x[0]; }
+template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { EIGEN_ALIGN16 double x[2]; pstore(x, a); return x[0]; }
 
 template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a)
 {
@@ -639,7 +641,7 @@ pbroadcast4<Packet4f>(const float *a,
 
 template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, Index stride)
 {
-  float EIGEN_ALIGN16 ai[4];
+  EIGEN_ALIGN16 float ai[4];
   ai[0] = from[0*stride];
   ai[1] = from[1*stride];
   ai[2] = from[2*stride];
@@ -649,7 +651,7 @@ template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const floa
 
 template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, Index stride)
 {
-  float EIGEN_ALIGN16 ai[4];
+  EIGEN_ALIGN16 float ai[4];
   pstore<float>((float *)ai, from);
   to[0*stride] = ai[0];
   to[1*stride] = ai[1];
@@ -785,7 +787,7 @@ template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float*    from)
   return p;
 }
 
-template<> EIGEN_STRONG_INLINE float  pfirst<Packet4f>(const Packet4f& a) { float  EIGEN_ALIGN16 x[2]; vec_st2f(a.v4f[0], &x[0]); return x[0]; }
+template<> EIGEN_STRONG_INLINE float  pfirst<Packet4f>(const Packet4f& a) { EIGEN_ALIGN16 float x[2]; vec_st2f(a.v4f[0], &x[0]); return x[0]; }
 
 template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a)
 {
@@ -943,7 +945,7 @@ pbroadcast4<Packet4f>(const float *a,
 
 template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, Index stride)
 {
-  float EIGEN_ALIGN16 af[4];
+  EIGEN_ALIGN16 float af[4];
   af[0] = from[0*stride];
   af[1] = from[1*stride];
   af[2] = from[2*stride];
@@ -953,7 +955,7 @@ template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const floa
 
 template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, Index stride)
 {
-  float EIGEN_ALIGN16 af[4];
+  EIGEN_ALIGN16 float af[4];
   pstore<float>((float*)af, from);
   to[0*stride] = af[0];
   to[1*stride] = af[1];
@@ -978,7 +980,7 @@ template<> EIGEN_STRONG_INLINE Packet4f pround<Packet4f> (const Packet4f& a) { r
 template<> EIGEN_STRONG_INLINE Packet4f pceil<Packet4f>  (const Packet4f& a) { return vec_ceil(a); }
 template<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f> (const Packet4f& a) { return vec_floor(a); }
 template<> EIGEN_STRONG_INLINE Packet4f pabs<Packet4f>   (const Packet4f& a) { return vec_abs(a); }
-template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x[4]; pstore(x, a); return x[0]; }
+template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { EIGEN_ALIGN16 float x[4]; pstore(x, a); return x[0]; }
 
 template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
 {
diff --git a/libs/eigen/Eigen/src/Core/functors/AssignmentFunctors.h b/libs/eigen/Eigen/src/Core/functors/AssignmentFunctors.h
index bf64ef4..c9d80e6 100644
--- a/libs/eigen/Eigen/src/Core/functors/AssignmentFunctors.h
+++ b/libs/eigen/Eigen/src/Core/functors/AssignmentFunctors.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_ASSIGNMENT_FUNCTORS_H
 #define EIGEN_ASSIGNMENT_FUNCTORS_H
 
+#include "../InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
@@ -20,9 +22,8 @@ namespace internal {
   */
 template<typename DstScalar,typename SrcScalar> struct assign_op {
 
-  EIGEN_EMPTY_STRUCT_CTOR(assign_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a = b; }
-  
+
   template<int Alignment, typename Packet>
   EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const
   { internal::pstoret<DstScalar,Packet,Alignment>(a,b); }
@@ -45,9 +46,8 @@ struct functor_traits<assign_op<DstScalar,SrcScalar> > {
   */
 template<typename DstScalar,typename SrcScalar> struct add_assign_op {
 
-  EIGEN_EMPTY_STRUCT_CTOR(add_assign_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a += b; }
-  
+
   template<int Alignment, typename Packet>
   EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const
   { internal::pstoret<DstScalar,Packet,Alignment>(a,internal::padd(internal::ploadt<Packet,Alignment>(a),b)); }
@@ -66,9 +66,8 @@ struct functor_traits<add_assign_op<DstScalar,SrcScalar> > {
   */
 template<typename DstScalar,typename SrcScalar> struct sub_assign_op {
 
-  EIGEN_EMPTY_STRUCT_CTOR(sub_assign_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a -= b; }
-  
+
   template<int Alignment, typename Packet>
   EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const
   { internal::pstoret<DstScalar,Packet,Alignment>(a,internal::psub(internal::ploadt<Packet,Alignment>(a),b)); }
@@ -88,9 +87,8 @@ struct functor_traits<sub_assign_op<DstScalar,SrcScalar> > {
 template<typename DstScalar, typename SrcScalar=DstScalar>
 struct mul_assign_op {
 
-  EIGEN_EMPTY_STRUCT_CTOR(mul_assign_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a *= b; }
-  
+
   template<int Alignment, typename Packet>
   EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const
   { internal::pstoret<DstScalar,Packet,Alignment>(a,internal::pmul(internal::ploadt<Packet,Alignment>(a),b)); }
@@ -109,9 +107,8 @@ struct functor_traits<mul_assign_op<DstScalar,SrcScalar> > {
   */
 template<typename DstScalar, typename SrcScalar=DstScalar> struct div_assign_op {
 
-  EIGEN_EMPTY_STRUCT_CTOR(div_assign_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a /= b; }
-  
+
   template<int Alignment, typename Packet>
   EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const
   { internal::pstoret<DstScalar,Packet,Alignment>(a,internal::pdiv(internal::ploadt<Packet,Alignment>(a),b)); }
@@ -141,7 +138,6 @@ struct functor_traits<div_assign_op<DstScalar,SrcScalar> > {
   */
 template<typename Scalar> struct swap_assign_op {
 
-  EIGEN_EMPTY_STRUCT_CTOR(swap_assign_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const
   {
 #ifdef EIGEN_GPUCC
diff --git a/libs/eigen/Eigen/src/Core/functors/BinaryFunctors.h b/libs/eigen/Eigen/src/Core/functors/BinaryFunctors.h
index 63f09ab..c8bb4e7 100644
--- a/libs/eigen/Eigen/src/Core/functors/BinaryFunctors.h
+++ b/libs/eigen/Eigen/src/Core/functors/BinaryFunctors.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_BINARY_FUNCTORS_H
 #define EIGEN_BINARY_FUNCTORS_H
 
+#include "../InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
@@ -32,9 +34,7 @@ template<typename LhsScalar,typename RhsScalar>
 struct scalar_sum_op : binary_op_base<LhsScalar,RhsScalar>
 {
   typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_sum_op>::ReturnType result_type;
-#ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_sum_op)
-#else
+#ifdef EIGEN_SCALAR_BINARY_OP_PLUGIN
   scalar_sum_op() {
     EIGEN_SCALAR_BINARY_OP_PLUGIN
   }
@@ -70,9 +70,7 @@ template<typename LhsScalar,typename RhsScalar>
 struct scalar_product_op  : binary_op_base<LhsScalar,RhsScalar>
 {
   typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_product_op>::ReturnType result_type;
-#ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_product_op)
-#else
+#ifdef EIGEN_SCALAR_BINARY_OP_PLUGIN
   scalar_product_op() {
     EIGEN_SCALAR_BINARY_OP_PLUGIN
   }
@@ -110,13 +108,12 @@ struct scalar_conj_product_op  : binary_op_base<LhsScalar,RhsScalar>
   enum {
     Conj = NumTraits<LhsScalar>::IsComplex
   };
-  
+
   typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_conj_product_op>::ReturnType result_type;
-  
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_conj_product_op)
+
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const LhsScalar& a, const RhsScalar& b) const
   { return conj_helper<LhsScalar,RhsScalar,Conj,false>().pmul(a,b); }
-  
+
   template<typename Packet>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a, const Packet& b) const
   { return conj_helper<Packet,Packet,Conj,false>().pmul(a,b); }
@@ -138,7 +135,6 @@ template<typename LhsScalar,typename RhsScalar, int NaNPropagation>
 struct scalar_min_op : binary_op_base<LhsScalar,RhsScalar>
 {
   typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_min_op>::ReturnType result_type;
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_min_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const LhsScalar& a, const RhsScalar& b) const {
     return internal::pmin<NaNPropagation>(a, b);
   }
@@ -171,7 +167,6 @@ template<typename LhsScalar,typename RhsScalar, int NaNPropagation>
 struct scalar_max_op : binary_op_base<LhsScalar,RhsScalar>
 {
   typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_max_op>::ReturnType result_type;
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_max_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const LhsScalar& a, const RhsScalar& b) const {
     return internal::pmax<NaNPropagation>(a,b);
   }
@@ -205,7 +200,11 @@ template<typename LhsScalar, typename RhsScalar, ComparisonName cmp>
 struct functor_traits<scalar_cmp_op<LhsScalar,RhsScalar, cmp> > {
   enum {
     Cost = (NumTraits<LhsScalar>::AddCost+NumTraits<RhsScalar>::AddCost)/2,
-    PacketAccess = false
+    PacketAccess = is_same<LhsScalar, RhsScalar>::value &&
+        packet_traits<LhsScalar>::HasCmp &&
+        // Since return type is bool, we currently require the inputs
+        // to be bool to enable packet access.
+        is_same<LhsScalar, bool>::value
   };
 };
 
@@ -219,50 +218,64 @@ template<typename LhsScalar, typename RhsScalar>
 struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_EQ> : binary_op_base<LhsScalar,RhsScalar>
 {
   typedef bool result_type;
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a==b;}
+  template<typename Packet>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a, const Packet& b) const
+  { return internal::pcmp_eq(a,b); }
 };
 template<typename LhsScalar, typename RhsScalar>
 struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_LT> : binary_op_base<LhsScalar,RhsScalar>
 {
   typedef bool result_type;
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a<b;}
+  template<typename Packet>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a, const Packet& b) const
+  { return internal::pcmp_lt(a,b); }
 };
 template<typename LhsScalar, typename RhsScalar>
 struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_LE> : binary_op_base<LhsScalar,RhsScalar>
 {
   typedef bool result_type;
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a<=b;}
+  template<typename Packet>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a, const Packet& b) const
+  { return internal::pcmp_le(a,b); }
 };
 template<typename LhsScalar, typename RhsScalar>
 struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_GT> : binary_op_base<LhsScalar,RhsScalar>
 {
   typedef bool result_type;
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a>b;}
+  template<typename Packet>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a, const Packet& b) const
+  { return internal::pcmp_lt(b,a); }
 };
 template<typename LhsScalar, typename RhsScalar>
 struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_GE> : binary_op_base<LhsScalar,RhsScalar>
 {
   typedef bool result_type;
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a>=b;}
+  template<typename Packet>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a, const Packet& b) const
+  { return internal::pcmp_le(b,a); }
 };
 template<typename LhsScalar, typename RhsScalar>
 struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_UNORD> : binary_op_base<LhsScalar,RhsScalar>
 {
   typedef bool result_type;
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return !(a<=b || b<=a);}
+  template<typename Packet>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a, const Packet& b) const
+  { return internal::pcmp_eq(internal::por(internal::pcmp_le(a, b), internal::pcmp_le(b, a)), internal::pzero(a)); }
 };
 template<typename LhsScalar, typename RhsScalar>
 struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_NEQ> : binary_op_base<LhsScalar,RhsScalar>
 {
   typedef bool result_type;
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a!=b;}
+  template<typename Packet>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a, const Packet& b) const
+  { return internal::pcmp_eq(internal::pcmp_eq(a, b), internal::pzero(a)); }
 };
 
 /** \internal
@@ -273,8 +286,6 @@ struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_NEQ> : binary_op_base<LhsScalar,Rh
 template<typename Scalar>
 struct scalar_hypot_op<Scalar,Scalar> : binary_op_base<Scalar,Scalar>
 {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_hypot_op)
-
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar &x, const Scalar &y) const
   {
     // This functor is used by hypotNorm only for which it is faster to first apply abs
@@ -304,9 +315,7 @@ template<typename Scalar, typename Exponent>
 struct scalar_pow_op  : binary_op_base<Scalar,Exponent>
 {
   typedef typename ScalarBinaryOpTraits<Scalar,Exponent,scalar_pow_op>::ReturnType result_type;
-#ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_pow_op)
-#else
+#ifdef EIGEN_SCALAR_BINARY_OP_PLUGIN
   scalar_pow_op() {
     typedef Scalar LhsScalar;
     typedef Exponent RhsScalar;
@@ -331,7 +340,7 @@ struct functor_traits<scalar_pow_op<Scalar,Exponent> > {
     PacketAccess = (!NumTraits<Scalar>::IsComplex && !NumTraits<Scalar>::IsInteger &&
                     packet_traits<Scalar>::HasExp && packet_traits<Scalar>::HasLog &&
                     packet_traits<Scalar>::HasRound && packet_traits<Scalar>::HasCmp &&
-                    // Temporarly disable packet access for half/bfloat16 until
+                    // Temporarily disable packet access for half/bfloat16 until
                     // accuracy is improved.
                     !is_same<Scalar, half>::value && !is_same<Scalar, bfloat16>::value
                     )
@@ -349,9 +358,7 @@ template<typename LhsScalar,typename RhsScalar>
 struct scalar_difference_op : binary_op_base<LhsScalar,RhsScalar>
 {
   typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_difference_op>::ReturnType result_type;
-#ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_difference_op)
-#else
+#ifdef EIGEN_SCALAR_BINARY_OP_PLUGIN
   scalar_difference_op() {
     EIGEN_SCALAR_BINARY_OP_PLUGIN
   }
@@ -369,6 +376,28 @@ struct functor_traits<scalar_difference_op<LhsScalar,RhsScalar> > {
   };
 };
 
+template <typename Packet, bool IsInteger = NumTraits<typename unpacket_traits<Packet>::type>::IsInteger>
+struct maybe_raise_div_by_zero {
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Packet x) {
+    EIGEN_UNUSED_VARIABLE(x);
+  }
+};
+
+#ifndef EIGEN_GPU_COMPILE_PHASE
+template <typename Packet>
+struct maybe_raise_div_by_zero<Packet, true> {
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Packet x) {
+    if (EIGEN_PREDICT_FALSE(predux_any(pcmp_eq(x, pzero(x))))) {
+      // Use volatile variables to force a division by zero, which will
+      // result in the default platform behaviour (usually SIGFPE).
+      volatile typename unpacket_traits<Packet>::type zero = 0;
+      volatile typename unpacket_traits<Packet>::type val = 1;
+      val = val / zero;
+    }
+  }
+};
+#endif
+
 /** \internal
   * \brief Template functor to compute the quotient of two scalars
   *
@@ -378,17 +407,17 @@ template<typename LhsScalar,typename RhsScalar>
 struct scalar_quotient_op  : binary_op_base<LhsScalar,RhsScalar>
 {
   typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_quotient_op>::ReturnType result_type;
-#ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_quotient_op)
-#else
+#ifdef EIGEN_SCALAR_BINARY_OP_PLUGIN
   scalar_quotient_op() {
     EIGEN_SCALAR_BINARY_OP_PLUGIN
   }
 #endif
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a / b; }
   template<typename Packet>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
-  { return internal::pdiv(a,b); }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const {
+    maybe_raise_div_by_zero<Packet>::run(b);
+    return internal::pdiv(a,b);
+  }
 };
 template<typename LhsScalar,typename RhsScalar>
 struct functor_traits<scalar_quotient_op<LhsScalar,RhsScalar> > {
@@ -407,7 +436,6 @@ struct functor_traits<scalar_quotient_op<LhsScalar,RhsScalar> > {
   * \sa class CwiseBinaryOp, ArrayBase::operator&&
   */
 struct scalar_boolean_and_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_and_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a && b; }
   template<typename Packet>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
@@ -426,7 +454,6 @@ template<> struct functor_traits<scalar_boolean_and_op> {
   * \sa class CwiseBinaryOp, ArrayBase::operator||
   */
 struct scalar_boolean_or_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_or_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a || b; }
   template<typename Packet>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
@@ -445,7 +472,6 @@ template<> struct functor_traits<scalar_boolean_or_op> {
  * \sa class CwiseBinaryOp, ArrayBase::operator^
  */
 struct scalar_boolean_xor_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_xor_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a ^ b; }
   template<typename Packet>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
@@ -467,9 +493,7 @@ template<typename LhsScalar,typename RhsScalar>
 struct scalar_absolute_difference_op : binary_op_base<LhsScalar,RhsScalar>
 {
   typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_absolute_difference_op>::ReturnType result_type;
-#ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_absolute_difference_op)
-#else
+#ifdef EIGEN_SCALAR_BINARY_OP_PLUGIN
   scalar_absolute_difference_op() {
     EIGEN_SCALAR_BINARY_OP_PLUGIN
   }
@@ -489,6 +513,73 @@ struct functor_traits<scalar_absolute_difference_op<LhsScalar,RhsScalar> > {
 };
 
 
+template <typename LhsScalar, typename RhsScalar>
+struct scalar_atan2_op {
+  using Scalar = LhsScalar;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::enable_if_t<is_same<LhsScalar,RhsScalar>::value, Scalar>
+  operator()(const Scalar& y, const Scalar& x) const {
+    EIGEN_USING_STD(atan2);
+    return static_cast<Scalar>(atan2(y, x));
+  }
+  template <typename Packet>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+      std::enable_if_t<is_same<LhsScalar, RhsScalar>::value, Packet>
+      packetOp(const Packet& y, const Packet& x) const {
+    // See https://en.cppreference.com/w/cpp/numeric/math/atan2
+    // for how corner cases are supposed to be handled according to the
+    // IEEE floating-point standard (IEC 60559).
+    const Packet kSignMask = pset1<Packet>(-Scalar(0));
+    const Packet kPi = pset1<Packet>(Scalar(EIGEN_PI));
+    const Packet kPiO2 = pset1<Packet>(Scalar(EIGEN_PI / 2));
+    const Packet kPiO4 = pset1<Packet>(Scalar(EIGEN_PI / 4));
+    const Packet k3PiO4 = pset1<Packet>(Scalar(3.0 * (EIGEN_PI / 4)));
+
+    // Various predicates about the inputs.
+    Packet x_signbit = pand(x, kSignMask);
+    Packet x_has_signbit = pcmp_lt(por(x_signbit, kPi), pzero(x));
+    Packet x_is_zero = pcmp_eq(x, pzero(x));
+    Packet x_neg = pandnot(x_has_signbit, x_is_zero);
+
+    Packet y_signbit = pand(y, kSignMask);
+    Packet y_is_zero = pcmp_eq(y, pzero(y));
+    Packet x_is_not_nan = pcmp_eq(x, x);
+    Packet y_is_not_nan = pcmp_eq(y, y);
+
+    // Compute the normal case. Notice that we expect that
+    // finite/infinite = +/-0 here.
+    Packet result = patan(pdiv(y, x));
+
+    // Compute shift for when x != 0 and y != 0.
+    Packet shift = pselect(x_neg, por(kPi, y_signbit), pzero(x));
+
+    // Special cases:
+    //   Handle  x = +/-inf && y = +/-inf.
+    Packet is_not_nan = pcmp_eq(result, result);
+    result =
+        pselect(is_not_nan, padd(shift, result),
+                pselect(x_neg, por(k3PiO4, y_signbit), por(kPiO4, y_signbit)));
+    //   Handle x == +/-0.
+    result = pselect(
+        x_is_zero, pselect(y_is_zero, pzero(y), por(y_signbit, kPiO2)), result);
+    //   Handle y == +/-0.
+    result = pselect(
+        y_is_zero,
+        pselect(x_has_signbit, por(y_signbit, kPi), por(y_signbit, pzero(y))),
+        result);
+    // Handle NaN inputs.
+    Packet kQNaN = pset1<Packet>(NumTraits<Scalar>::quiet_NaN());
+    return pselect(pand(x_is_not_nan, y_is_not_nan), result, kQNaN);
+  }
+};
+
+template<typename LhsScalar,typename RhsScalar>
+    struct functor_traits<scalar_atan2_op<LhsScalar, RhsScalar>> {
+  enum {
+    PacketAccess = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasATan && packet_traits<LhsScalar>::HasDiv && !NumTraits<LhsScalar>::IsInteger && !NumTraits<LhsScalar>::IsComplex,
+    Cost =
+        scalar_div_cost<LhsScalar, PacketAccess>::value + 5 * NumTraits<LhsScalar>::MulCost + 5 * NumTraits<LhsScalar>::AddCost
+  };
+};
 
 //---------- binary functors bound to a constant, thus appearing as a unary functor ----------
 
diff --git a/libs/eigen/Eigen/src/Core/functors/NullaryFunctors.h b/libs/eigen/Eigen/src/Core/functors/NullaryFunctors.h
index 192f225..4943d87 100644
--- a/libs/eigen/Eigen/src/Core/functors/NullaryFunctors.h
+++ b/libs/eigen/Eigen/src/Core/functors/NullaryFunctors.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_NULLARY_FUNCTORS_H
 #define EIGEN_NULLARY_FUNCTORS_H
 
+#include "../InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
@@ -29,7 +31,6 @@ struct functor_traits<scalar_constant_op<Scalar> >
          PacketAccess = packet_traits<Scalar>::Vectorizable, IsRepeatable = true }; };
 
 template<typename Scalar> struct scalar_identity_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_identity_op)
   template<typename IndexType>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (IndexType row, IndexType col) const { return row==col ? Scalar(1) : Scalar(0); }
 };
@@ -144,6 +145,39 @@ template <typename Scalar> struct linspaced_op
   const linspaced_op_impl<Scalar,NumTraits<Scalar>::IsInteger> impl;
 };
 
+template <typename Scalar>
+struct equalspaced_op {
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+
+  EIGEN_DEVICE_FUNC equalspaced_op(const Scalar& start, const Scalar& step) : m_start(start), m_step(step) {}
+  template <typename IndexType>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(IndexType i) const {
+    return m_start + m_step * static_cast<Scalar>(i);
+  }
+  template <typename Packet, typename IndexType>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(IndexType i) const {
+    const Packet cst_start = pset1<Packet>(m_start);
+    const Packet cst_step = pset1<Packet>(m_step);
+    const Packet cst_lin0 = plset<Packet>(Scalar(0));
+    const Packet cst_offset = pmadd(cst_lin0, cst_step, cst_start);
+
+    Packet i_packet = pset1<Packet>(static_cast<Scalar>(i));
+    return pmadd(i_packet, cst_step, cst_offset);
+  }
+  const Scalar m_start;
+  const Scalar m_step;
+};
+
+template <typename Scalar>
+struct functor_traits<equalspaced_op<Scalar> > {
+  enum {
+    Cost = NumTraits<Scalar>::AddCost + NumTraits<Scalar>::MulCost,
+    PacketAccess =
+        packet_traits<Scalar>::HasSetLinear && packet_traits<Scalar>::HasMul && packet_traits<Scalar>::HasAdd,
+    IsRepeatable = true
+  };
+};
+
 // Linear access is automatically determined from the operator() prototypes available for the given functor.
 // If it exposes an operator()(i,j), then we assume the i and j coefficients are required independently
 // and linear access is not possible. In all other cases, linear access is enabled.
@@ -152,7 +186,7 @@ template<typename Functor> struct functor_has_linear_access { enum { ret = !has_
 
 // For unreliable compilers, let's specialize the has_*ary_operator
 // helpers so that at least built-in nullary functors work fine.
-#if !( (EIGEN_COMP_MSVC>1600) || (EIGEN_GNUC_AT_LEAST(4,8)) || (EIGEN_COMP_ICC>=1600))
+#if !( EIGEN_COMP_MSVC || EIGEN_COMP_GNUC || (EIGEN_COMP_ICC>=1600))
 template<typename Scalar,typename IndexType>
 struct has_nullary_operator<scalar_constant_op<Scalar>,IndexType> { enum { value = 1}; };
 template<typename Scalar,typename IndexType>
diff --git a/libs/eigen/Eigen/src/Core/functors/StlFunctors.h b/libs/eigen/Eigen/src/Core/functors/StlFunctors.h
index 4570c9b..5971075 100644
--- a/libs/eigen/Eigen/src/Core/functors/StlFunctors.h
+++ b/libs/eigen/Eigen/src/Core/functors/StlFunctors.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_STL_FUNCTORS_H
 #define EIGEN_STL_FUNCTORS_H
 
+#include "../InternalHeaderCheck.h"
+
 namespace Eigen {
 
 // Portable replacements for certain functors.
@@ -102,17 +104,6 @@ template<typename T>
 struct functor_traits<numext::not_equal_to<T> >
   : functor_traits<std::not_equal_to<T> > {};
 
-#if (EIGEN_COMP_CXXVER < 11)
-// std::binder* are deprecated since c++11 and will be removed in c++17
-template<typename T>
-struct functor_traits<std::binder2nd<T> >
-{ enum { Cost = functor_traits<T>::Cost, PacketAccess = false }; };
-
-template<typename T>
-struct functor_traits<std::binder1st<T> >
-{ enum { Cost = functor_traits<T>::Cost, PacketAccess = false }; };
-#endif
-
 #if (EIGEN_COMP_CXXVER < 17)
 // std::unary_negate is deprecated since c++17 and will be removed in c++20
 template<typename T>
diff --git a/libs/eigen/Eigen/src/Core/functors/TernaryFunctors.h b/libs/eigen/Eigen/src/Core/functors/TernaryFunctors.h
index b254e96..41c0d5f 100644
--- a/libs/eigen/Eigen/src/Core/functors/TernaryFunctors.h
+++ b/libs/eigen/Eigen/src/Core/functors/TernaryFunctors.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_TERNARY_FUNCTORS_H
 #define EIGEN_TERNARY_FUNCTORS_H
 
+#include "../InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
diff --git a/libs/eigen/Eigen/src/Core/functors/UnaryFunctors.h b/libs/eigen/Eigen/src/Core/functors/UnaryFunctors.h
index 16136d1..3485369 100644
--- a/libs/eigen/Eigen/src/Core/functors/UnaryFunctors.h
+++ b/libs/eigen/Eigen/src/Core/functors/UnaryFunctors.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_UNARY_FUNCTORS_H
 #define EIGEN_UNARY_FUNCTORS_H
 
+#include "../InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
@@ -20,7 +22,6 @@ namespace internal {
   * \sa class CwiseUnaryOp, MatrixBase::operator-
   */
 template<typename Scalar> struct scalar_opposite_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_opposite_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return -a; }
   template<typename Packet>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
@@ -39,7 +40,6 @@ struct functor_traits<scalar_opposite_op<Scalar> >
   * \sa class CwiseUnaryOp, Cwise::abs
   */
 template<typename Scalar> struct scalar_abs_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_abs_op)
   typedef typename NumTraits<Scalar>::Real result_type;
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { return numext::abs(a); }
   template<typename Packet>
@@ -70,14 +70,12 @@ struct functor_traits<scalar_score_coeff_op<Scalar> > : functor_traits<scalar_ab
 /* Avoid recomputing abs when we know the score and they are the same. Not a true Eigen functor.  */
 template<typename Scalar, typename=void> struct abs_knowing_score
 {
-  EIGEN_EMPTY_STRUCT_CTOR(abs_knowing_score)
   typedef typename NumTraits<Scalar>::Real result_type;
   template<typename Score>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a, const Score&) const { return numext::abs(a); }
 };
 template<typename Scalar> struct abs_knowing_score<Scalar, typename scalar_score_coeff_op<Scalar>::Score_is_abs>
 {
-  EIGEN_EMPTY_STRUCT_CTOR(abs_knowing_score)
   typedef typename NumTraits<Scalar>::Real result_type;
   template<typename Scal>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scal&, const result_type& a) const { return a; }
@@ -89,7 +87,6 @@ template<typename Scalar> struct abs_knowing_score<Scalar, typename scalar_score
   * \sa class CwiseUnaryOp, Cwise::abs2
   */
 template<typename Scalar> struct scalar_abs2_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_abs2_op)
   typedef typename NumTraits<Scalar>::Real result_type;
   EIGEN_DEVICE_FUNC
   EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { return numext::abs2(a); }
@@ -107,7 +104,6 @@ struct functor_traits<scalar_abs2_op<Scalar> >
   * \sa class CwiseUnaryOp, MatrixBase::conjugate()
   */
 template<typename Scalar> struct scalar_conjugate_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_conjugate_op)
   EIGEN_DEVICE_FUNC
   EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return numext::conj(a); }
   template<typename Packet>
@@ -136,7 +132,6 @@ struct functor_traits<scalar_conjugate_op<Scalar> >
   * \sa class CwiseUnaryOp, Cwise::arg
   */
 template<typename Scalar> struct scalar_arg_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_arg_op)
   typedef typename NumTraits<Scalar>::Real result_type;
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { return numext::arg(a); }
   template<typename Packet>
@@ -158,7 +153,6 @@ struct functor_traits<scalar_arg_op<Scalar> >
   */
 template<typename Scalar, typename NewType>
 struct scalar_cast_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
   typedef NewType result_type;
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const NewType operator() (const Scalar& a) const { return cast<Scalar, NewType>(a); }
 };
@@ -173,7 +167,6 @@ struct functor_traits<scalar_cast_op<Scalar,NewType> >
   */
 template<typename Scalar, int N>
 struct scalar_shift_right_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_shift_right_op)
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const
   { return a >> N; }
@@ -192,8 +185,6 @@ struct functor_traits<scalar_shift_right_op<Scalar,N> >
   */
 template<typename Scalar, int N>
 struct scalar_shift_left_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_shift_left_op)
-
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const
   { return a << N; }
   template<typename Packet>
@@ -211,7 +202,6 @@ struct functor_traits<scalar_shift_left_op<Scalar,N> >
   */
 template<typename Scalar>
 struct scalar_real_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_real_op)
   typedef typename NumTraits<Scalar>::Real result_type;
   EIGEN_DEVICE_FUNC
   EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return numext::real(a); }
@@ -227,7 +217,6 @@ struct functor_traits<scalar_real_op<Scalar> >
   */
 template<typename Scalar>
 struct scalar_imag_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_imag_op)
   typedef typename NumTraits<Scalar>::Real result_type;
   EIGEN_DEVICE_FUNC
   EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return numext::imag(a); }
@@ -243,7 +232,6 @@ struct functor_traits<scalar_imag_op<Scalar> >
   */
 template<typename Scalar>
 struct scalar_real_ref_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_real_ref_op)
   typedef typename NumTraits<Scalar>::Real result_type;
   EIGEN_DEVICE_FUNC
   EIGEN_STRONG_INLINE result_type& operator() (const Scalar& a) const { return numext::real_ref(*const_cast<Scalar*>(&a)); }
@@ -259,7 +247,6 @@ struct functor_traits<scalar_real_ref_op<Scalar> >
   */
 template<typename Scalar>
 struct scalar_imag_ref_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_imag_ref_op)
   typedef typename NumTraits<Scalar>::Real result_type;
   EIGEN_DEVICE_FUNC
   EIGEN_STRONG_INLINE result_type& operator() (const Scalar& a) const { return numext::imag_ref(*const_cast<Scalar*>(&a)); }
@@ -275,8 +262,7 @@ struct functor_traits<scalar_imag_ref_op<Scalar> >
   * \sa class CwiseUnaryOp, Cwise::exp()
   */
 template<typename Scalar> struct scalar_exp_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_exp_op)
-  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::exp(a); }
+  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return internal::pexp(a); }
   template <typename Packet>
   EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pexp(a); }
 };
@@ -315,7 +301,6 @@ struct functor_traits<scalar_exp_op<Scalar> > {
   * \sa class CwiseUnaryOp, ArrayBase::expm1()
   */
 template<typename Scalar> struct scalar_expm1_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_expm1_op)
   EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::expm1(a); }
   template <typename Packet>
   EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pexpm1(a); }
@@ -335,7 +320,6 @@ struct functor_traits<scalar_expm1_op<Scalar> > {
   * \sa class CwiseUnaryOp, ArrayBase::log()
   */
 template<typename Scalar> struct scalar_log_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_log_op)
   EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::log(a); }
   template <typename Packet>
   EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::plog(a); }
@@ -366,7 +350,6 @@ struct functor_traits<scalar_log_op<Scalar> > {
   * \sa class CwiseUnaryOp, ArrayBase::log1p()
   */
 template<typename Scalar> struct scalar_log1p_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_log1p_op)
   EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::log1p(a); }
   template <typename Packet>
   EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::plog1p(a); }
@@ -386,7 +369,6 @@ struct functor_traits<scalar_log1p_op<Scalar> > {
   * \sa class CwiseUnaryOp, Cwise::log10()
   */
 template<typename Scalar> struct scalar_log10_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_log10_op)
   EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { EIGEN_USING_STD(log10) return log10(a); }
   template <typename Packet>
   EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::plog10(a); }
@@ -402,7 +384,6 @@ struct functor_traits<scalar_log10_op<Scalar> >
   * \sa class CwiseUnaryOp, Cwise::log2()
   */
 template<typename Scalar> struct scalar_log2_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_log2_op)
   EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return Scalar(EIGEN_LOG2E) * numext::log(a); }
   template <typename Packet>
   EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::plog2(a); }
@@ -416,7 +397,6 @@ struct functor_traits<scalar_log2_op<Scalar> >
   * \sa class CwiseUnaryOp, Cwise::sqrt()
   */
 template<typename Scalar> struct scalar_sqrt_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_sqrt_op)
   EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::sqrt(a); }
   template <typename Packet>
   EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::psqrt(a); }
@@ -440,7 +420,6 @@ struct functor_traits<scalar_sqrt_op<Scalar> > {
 
 // Boolean specialization to eliminate -Wimplicit-conversion-floating-point-to-bool warnings.
 template<> struct scalar_sqrt_op<bool> {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_sqrt_op)
   EIGEN_DEPRECATED EIGEN_DEVICE_FUNC inline bool operator() (const bool& a) const { return a; }
   template <typename Packet>
   EIGEN_DEPRECATED EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return a; }
@@ -455,7 +434,6 @@ struct functor_traits<scalar_sqrt_op<bool> > {
   * \sa class CwiseUnaryOp, Cwise::rsqrt()
   */
 template<typename Scalar> struct scalar_rsqrt_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_rsqrt_op)
   EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::rsqrt(a); }
   template <typename Packet>
   EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::prsqrt(a); }
@@ -474,7 +452,6 @@ struct functor_traits<scalar_rsqrt_op<Scalar> >
   * \sa class CwiseUnaryOp, ArrayBase::cos()
   */
 template<typename Scalar> struct scalar_cos_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_cos_op)
   EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return numext::cos(a); }
   template <typename Packet>
   EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pcos(a); }
@@ -493,7 +470,6 @@ struct functor_traits<scalar_cos_op<Scalar> >
   * \sa class CwiseUnaryOp, ArrayBase::sin()
   */
 template<typename Scalar> struct scalar_sin_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_sin_op)
   EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::sin(a); }
   template <typename Packet>
   EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::psin(a); }
@@ -513,7 +489,6 @@ struct functor_traits<scalar_sin_op<Scalar> >
   * \sa class CwiseUnaryOp, ArrayBase::tan()
   */
 template<typename Scalar> struct scalar_tan_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_tan_op)
   EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::tan(a); }
   template <typename Packet>
   EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::ptan(a); }
@@ -532,7 +507,6 @@ struct functor_traits<scalar_tan_op<Scalar> >
   * \sa class CwiseUnaryOp, ArrayBase::acos()
   */
 template<typename Scalar> struct scalar_acos_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_acos_op)
   EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::acos(a); }
   template <typename Packet>
   EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pacos(a); }
@@ -551,7 +525,6 @@ struct functor_traits<scalar_acos_op<Scalar> >
   * \sa class CwiseUnaryOp, ArrayBase::asin()
   */
 template<typename Scalar> struct scalar_asin_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_asin_op)
   EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::asin(a); }
   template <typename Packet>
   EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pasin(a); }
@@ -571,7 +544,6 @@ struct functor_traits<scalar_asin_op<Scalar> >
   * \sa class CwiseUnaryOp, ArrayBase::atan()
   */
 template<typename Scalar> struct scalar_atan_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_atan_op)
   EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::atan(a); }
   template <typename Packet>
   EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::patan(a); }
@@ -591,7 +563,6 @@ struct functor_traits<scalar_atan_op<Scalar> >
   */
 template <typename Scalar>
 struct scalar_tanh_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_tanh_op)
   EIGEN_DEVICE_FUNC inline const Scalar operator()(const Scalar& a) const { return numext::tanh(a); }
   template <typename Packet>
   EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& x) const { return ptanh(x); }
@@ -622,14 +593,12 @@ struct functor_traits<scalar_tanh_op<Scalar> > {
   };
 };
 
-#if EIGEN_HAS_CXX11_MATH
 /** \internal
   * \brief Template functor to compute the atanh of a scalar
   * \sa class CwiseUnaryOp, ArrayBase::atanh()
   */
 template <typename Scalar>
 struct scalar_atanh_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_atanh_op)
   EIGEN_DEVICE_FUNC inline const Scalar operator()(const Scalar& a) const { return numext::atanh(a); }
 };
 
@@ -637,14 +606,12 @@ template <typename Scalar>
 struct functor_traits<scalar_atanh_op<Scalar> > {
   enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false };
 };
-#endif
 
 /** \internal
   * \brief Template functor to compute the sinh of a scalar
   * \sa class CwiseUnaryOp, ArrayBase::sinh()
   */
 template<typename Scalar> struct scalar_sinh_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_sinh_op)
   EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::sinh(a); }
   template <typename Packet>
   EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::psinh(a); }
@@ -658,14 +625,12 @@ struct functor_traits<scalar_sinh_op<Scalar> >
   };
 };
 
-#if EIGEN_HAS_CXX11_MATH
 /** \internal
   * \brief Template functor to compute the asinh of a scalar
   * \sa class CwiseUnaryOp, ArrayBase::asinh()
   */
 template <typename Scalar>
 struct scalar_asinh_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_asinh_op)
   EIGEN_DEVICE_FUNC inline const Scalar operator()(const Scalar& a) const { return numext::asinh(a); }
 };
 
@@ -673,14 +638,12 @@ template <typename Scalar>
 struct functor_traits<scalar_asinh_op<Scalar> > {
   enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false };
 };
-#endif
 
 /** \internal
   * \brief Template functor to compute the cosh of a scalar
   * \sa class CwiseUnaryOp, ArrayBase::cosh()
   */
 template<typename Scalar> struct scalar_cosh_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_cosh_op)
   EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::cosh(a); }
   template <typename Packet>
   EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pcosh(a); }
@@ -694,14 +657,12 @@ struct functor_traits<scalar_cosh_op<Scalar> >
   };
 };
 
-#if EIGEN_HAS_CXX11_MATH
 /** \internal
   * \brief Template functor to compute the acosh of a scalar
   * \sa class CwiseUnaryOp, ArrayBase::acosh()
   */
 template <typename Scalar>
 struct scalar_acosh_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_acosh_op)
   EIGEN_DEVICE_FUNC inline const Scalar operator()(const Scalar& a) const { return numext::acosh(a); }
 };
 
@@ -709,7 +670,6 @@ template <typename Scalar>
 struct functor_traits<scalar_acosh_op<Scalar> > {
   enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false };
 };
-#endif
 
 /** \internal
   * \brief Template functor to compute the inverse of a scalar
@@ -717,17 +677,21 @@ struct functor_traits<scalar_acosh_op<Scalar> > {
   */
 template<typename Scalar>
 struct scalar_inverse_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_inverse_op)
   EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return Scalar(1)/a; }
   template<typename Packet>
   EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const
-  { return internal::pdiv(pset1<Packet>(Scalar(1)),a); }
+  { return internal::preciprocal(a); }
 };
 template <typename Scalar>
 struct functor_traits<scalar_inverse_op<Scalar> > {
   enum {
     PacketAccess = packet_traits<Scalar>::HasDiv,
-    Cost = scalar_div_cost<Scalar, PacketAccess>::value
+    // If packet_traits<Scalar>::HasReciprocal then the Estimated cost is that
+    // of computing an approximation plus a single Newton-Raphson step, which
+    // consists of 1 pmul + 1 pmadd.
+    Cost = (packet_traits<Scalar>::HasReciprocal
+                ? 4 * NumTraits<Scalar>::MulCost
+                : scalar_div_cost<Scalar, PacketAccess>::value)
   };
 };
 
@@ -737,7 +701,6 @@ struct functor_traits<scalar_inverse_op<Scalar> > {
   */
 template<typename Scalar>
 struct scalar_square_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_square_op)
   EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a*a; }
   template<typename Packet>
   EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const
@@ -750,7 +713,6 @@ struct functor_traits<scalar_square_op<Scalar> >
 // Boolean specialization to avoid -Wint-in-bool-context warnings on GCC.
 template<>
 struct scalar_square_op<bool> {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_square_op)
   EIGEN_DEPRECATED EIGEN_DEVICE_FUNC inline bool operator() (const bool& a) const { return a; }
   template<typename Packet>
   EIGEN_DEPRECATED EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const
@@ -766,7 +728,6 @@ struct functor_traits<scalar_square_op<bool> >
   */
 template<typename Scalar>
 struct scalar_cube_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_cube_op)
   EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a*a*a; }
   template<typename Packet>
   EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const
@@ -779,7 +740,6 @@ struct functor_traits<scalar_cube_op<Scalar> >
 // Boolean specialization to avoid -Wint-in-bool-context warnings on GCC.
 template<>
 struct scalar_cube_op<bool> {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_cube_op)
   EIGEN_DEPRECATED EIGEN_DEVICE_FUNC inline bool operator() (const bool& a) const { return a; }
   template<typename Packet>
   EIGEN_DEPRECATED EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const
@@ -794,7 +754,6 @@ struct functor_traits<scalar_cube_op<bool> >
   * \sa class CwiseUnaryOp, ArrayBase::round()
   */
 template<typename Scalar> struct scalar_round_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_round_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return numext::round(a); }
   template <typename Packet>
   EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pround(a); }
@@ -813,7 +772,6 @@ struct functor_traits<scalar_round_op<Scalar> >
   * \sa class CwiseUnaryOp, ArrayBase::floor()
   */
 template<typename Scalar> struct scalar_floor_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_floor_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return numext::floor(a); }
   template <typename Packet>
   EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pfloor(a); }
@@ -832,7 +790,6 @@ struct functor_traits<scalar_floor_op<Scalar> >
   * \sa class CwiseUnaryOp, ArrayBase::rint()
   */
 template<typename Scalar> struct scalar_rint_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_rint_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return numext::rint(a); }
   template <typename Packet>
   EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::print(a); }
@@ -851,7 +808,6 @@ struct functor_traits<scalar_rint_op<Scalar> >
   * \sa class CwiseUnaryOp, ArrayBase::ceil()
   */
 template<typename Scalar> struct scalar_ceil_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_ceil_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return numext::ceil(a); }
   template <typename Packet>
   EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pceil(a); }
@@ -870,7 +826,6 @@ struct functor_traits<scalar_ceil_op<Scalar> >
   * \sa class CwiseUnaryOp, ArrayBase::isnan()
   */
 template<typename Scalar> struct scalar_isnan_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_isnan_op)
   typedef bool result_type;
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const {
 #if defined(SYCL_DEVICE_ONLY)
@@ -894,7 +849,6 @@ struct functor_traits<scalar_isnan_op<Scalar> >
   * \sa class CwiseUnaryOp, ArrayBase::isinf()
   */
 template<typename Scalar> struct scalar_isinf_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_isinf_op)
   typedef bool result_type;
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const {
 #if defined(SYCL_DEVICE_ONLY)
@@ -918,7 +872,6 @@ struct functor_traits<scalar_isinf_op<Scalar> >
   * \sa class CwiseUnaryOp, ArrayBase::isfinite()
   */
 template<typename Scalar> struct scalar_isfinite_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_isfinite_op)
   typedef bool result_type;
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const {
 #if defined(SYCL_DEVICE_ONLY)
@@ -943,7 +896,6 @@ struct functor_traits<scalar_isfinite_op<Scalar> >
   * \sa class CwiseUnaryOp, ArrayBase::operator!
   */
 template<typename Scalar> struct scalar_boolean_not_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_not_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator() (const bool& a) const { return !a; }
 };
 template<typename Scalar>
@@ -958,47 +910,19 @@ struct functor_traits<scalar_boolean_not_op<Scalar> > {
   * \brief Template functor to compute the signum of a scalar
   * \sa class CwiseUnaryOp, Cwise::sign()
   */
-template<typename Scalar,bool is_complex=(NumTraits<Scalar>::IsComplex!=0), bool is_integer=(NumTraits<Scalar>::IsInteger!=0) > struct scalar_sign_op;
 template<typename Scalar>
-struct scalar_sign_op<Scalar, false, true> {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_sign_op)
+struct scalar_sign_op {
   EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const
   {
-      return Scalar( (a>Scalar(0)) - (a<Scalar(0)) );
+    return numext::sign(a);
+  }
+
+  template <typename Packet>
+  EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const {
+    return internal::psign(a);
   }
-  //TODO
-  //template <typename Packet>
-  //EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::psign(a); }
 };
 
-template<typename Scalar>
-struct scalar_sign_op<Scalar, false, false> {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_sign_op)
-  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const
-  {
-    return (numext::isnan)(a) ? a : Scalar( (a>Scalar(0)) - (a<Scalar(0)) );
-  }
-  //TODO
-  //template <typename Packet>
-  //EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::psign(a); }
-};
-
-template<typename Scalar, bool is_integer>
-struct scalar_sign_op<Scalar,true, is_integer> {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_sign_op)
-  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const
-  {
-    typedef typename NumTraits<Scalar>::Real real_type;
-    real_type aa = numext::abs(a);
-    if (aa==real_type(0))
-      return Scalar(0);
-    aa = real_type(1)/aa;
-    return Scalar(a.real()*aa, a.imag()*aa );
-  }
-  //TODO
-  //template <typename Packet>
-  //EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::psign(a); }
-};
 template<typename Scalar>
 struct functor_traits<scalar_sign_op<Scalar> >
 { enum {
@@ -1006,7 +930,7 @@ struct functor_traits<scalar_sign_op<Scalar> >
         NumTraits<Scalar>::IsComplex
         ? ( 8*NumTraits<Scalar>::MulCost  ) // roughly
         : ( 3*NumTraits<Scalar>::AddCost),
-    PacketAccess = packet_traits<Scalar>::HasSign
+    PacketAccess = packet_traits<Scalar>::HasSign && packet_traits<Scalar>::Vectorizable
   };
 };
 
@@ -1016,7 +940,6 @@ struct functor_traits<scalar_sign_op<Scalar> >
   */
 template <typename T>
 struct scalar_logistic_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_logistic_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator()(const T& x) const {
     return packetOp(x);
   }
@@ -1024,87 +947,104 @@ struct scalar_logistic_op {
   template <typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   Packet packetOp(const Packet& x) const {
     const Packet one = pset1<Packet>(T(1));
-    return pdiv(one, padd(one, pexp(pnegate(x))));
+    const Packet inf = pset1<Packet>(NumTraits<T>::infinity());
+    const Packet e = pexp(x);
+    const Packet inf_mask = pcmp_eq(e, inf);
+    return pselect(inf_mask, one, pdiv(e, padd(one, e)));
   }
 };
 
-#ifndef EIGEN_GPU_COMPILE_PHASE
+// TODO(rmlarsen): Enable the following on host when integer_packet is defined
+// for the relevant packet types.
+#ifdef EIGEN_GPU_CC
+
 /** \internal
   * \brief Template specialization of the logistic function for float.
-  *
-  *  Uses just a 9/10-degree rational interpolant which
-  *  interpolates 1/(1+exp(-x)) - 0.5 up to a couple of ulps in the range
-  *  [-9, 18]. Below -9 we use the more accurate approximation
-  *  1/(1+exp(-x)) ~= exp(x), and above 18 the logistic function is 1 withing
-  *  one ulp. The shifted logistic is interpolated because it was easier to
-  *  make the fit converge.
-  *
+  * Computes S(x) = exp(x) / (1 + exp(x)), where exp(x) is implemented
+  * using an algorithm partly adopted from the implementation of
+  * pexp_float. See the individual steps described in the code below.
+  * Note that compared to pexp, we use an additional outer multiplicative
+  * range reduction step using the identity exp(x) = exp(x/2)^2.
+  * This prevert us from having to call ldexp on values that could produce
+  * a denormal result, which allows us to call the faster implementation in
+  * pldexp_fast_impl<Packet>::run(p, m).
+  * The final squaring, however, doubles the error bound on the final
+  * approximation. Exhaustive testing shows that we have a worst case error
+  * of 4.5 ulps (compared to computing S(x) in double precision), which is
+  * acceptable.
   */
 template <>
 struct scalar_logistic_op<float> {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_logistic_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float operator()(const float& x) const {
-    return packetOp(x);
+    // Truncate at the first point where the interpolant is exactly one.
+    const float cst_exp_hi = 16.6355324f;
+    const float e = numext::exp(numext::mini(x, cst_exp_hi));
+    return e / (1.0f + e);
   }
 
-  template <typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  Packet packetOp(const Packet& _x) const {
-    const Packet cutoff_lower = pset1<Packet>(-9.f);
-    const Packet lt_mask = pcmp_lt<Packet>(_x, cutoff_lower);
-    const bool any_small = predux_any(lt_mask);
+  template <typename Packet>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet
+  packetOp(const Packet& _x) const {
+    const Packet cst_zero = pset1<Packet>(0.0f);
+    const Packet cst_one = pset1<Packet>(1.0f);
+    const Packet cst_half = pset1<Packet>(0.5f);
+    // Truncate at the first point where the interpolant is exactly one.
+    const Packet cst_exp_hi = pset1<Packet>(16.6355324f);
+    const Packet cst_exp_lo = pset1<Packet>(-104.f);
 
-    // The upper cut-off is the smallest x for which the rational approximation evaluates to 1.
-    // Choosing this value saves us a few instructions clamping the results at the end.
-#ifdef EIGEN_VECTORIZE_FMA
-    const Packet cutoff_upper = pset1<Packet>(15.7243833541870117f);
-#else
-    const Packet cutoff_upper = pset1<Packet>(15.6437711715698242f);
-#endif
-    const Packet x = pmin(_x, cutoff_upper);
+    // Clamp x to the non-trivial range where S(x). Outside this
+    // interval the correctly rounded value of S(x) is either zero
+    // or one.
+    Packet zero_mask = pcmp_lt(_x, cst_exp_lo);
+    Packet x = pmin(_x, cst_exp_hi);
 
-    // The monomial coefficients of the numerator polynomial (odd).
-    const Packet alpha_1 = pset1<Packet>(2.48287947061529e-01f);
-    const Packet alpha_3 = pset1<Packet>(8.51377133304701e-03f);
-    const Packet alpha_5 = pset1<Packet>(6.08574864600143e-05f);
-    const Packet alpha_7 = pset1<Packet>(1.15627324459942e-07f);
-    const Packet alpha_9 = pset1<Packet>(4.37031012579801e-11f);
+    // 1. Multiplicative range reduction:
+    // Reduce the range of x by a factor of 2. This avoids having
+    // to compute exp(x) accurately where the result is a denormalized
+    // value.
+    x = pmul(x, cst_half);
 
-    // The monomial coefficients of the denominator polynomial (even).
-    const Packet beta_0 = pset1<Packet>(9.93151921023180e-01f);
-    const Packet beta_2 = pset1<Packet>(1.16817656904453e-01f);
-    const Packet beta_4 = pset1<Packet>(1.70198817374094e-03f);
-    const Packet beta_6 = pset1<Packet>(6.29106785017040e-06f);
-    const Packet beta_8 = pset1<Packet>(5.76102136993427e-09f);
-    const Packet beta_10 = pset1<Packet>(6.10247389755681e-13f);
+    // 2. Subtractive range reduction:
+    // Express exp(x) as exp(m*ln(2) + r) = 2^m*exp(r), start by extracting
+    // m = floor(x/ln(2) + 0.5), such that x = m*ln(2) + r.
+    const Packet cst_cephes_LOG2EF = pset1<Packet>(1.44269504088896341f);
+    Packet m = pfloor(pmadd(x, cst_cephes_LOG2EF, cst_half));
+    // Get r = x - m*ln(2). We use a trick from Cephes where the term
+    // m*ln(2) is subtracted out in two parts, m*C1+m*C2 = m*ln(2),
+    // to avoid accumulating truncation errors.
+    const Packet cst_cephes_exp_C1 = pset1<Packet>(-0.693359375f);
+    const Packet cst_cephes_exp_C2 = pset1<Packet>(2.12194440e-4f);
+    Packet r = pmadd(m, cst_cephes_exp_C1, x);
+    r = pmadd(m, cst_cephes_exp_C2, r);
 
-    // Since the polynomials are odd/even, we need x^2.
-    const Packet x2 = pmul(x, x);
+    // 3. Compute an approximation to exp(r) using a degree 5 minimax polynomial.
+    // We compute even and odd terms separately to increase instruction level
+    // parallelism.
+    Packet r2 = pmul(r, r);
+    const Packet cst_p2 = pset1<Packet>(0.49999141693115234375f);
+    const Packet cst_p3 = pset1<Packet>(0.16666877269744873046875f);
+    const Packet cst_p4 = pset1<Packet>(4.1898667812347412109375e-2f);
+    const Packet cst_p5 = pset1<Packet>(8.33471305668354034423828125e-3f);
 
-    // Evaluate the numerator polynomial p.
-    Packet p = pmadd(x2, alpha_9, alpha_7);
-    p = pmadd(x2, p, alpha_5);
-    p = pmadd(x2, p, alpha_3);
-    p = pmadd(x2, p, alpha_1);
-    p = pmul(x, p);
+    const Packet p_even = pmadd(r2, cst_p4, cst_p2);
+    const Packet p_odd = pmadd(r2, cst_p5, cst_p3);
+    const Packet p_low = padd(r, cst_one);
+    Packet p = pmadd(r, p_odd, p_even);
+    p = pmadd(r2, p, p_low);
 
-    // Evaluate the denominator polynomial q.
-    Packet q = pmadd(x2, beta_10, beta_8);
-    q = pmadd(x2, q, beta_6);
-    q = pmadd(x2, q, beta_4);
-    q = pmadd(x2, q, beta_2);
-    q = pmadd(x2, q, beta_0);
-    // Divide the numerator by the denominator and shift it up.
-    const Packet logistic = padd(pdiv(p, q), pset1<Packet>(0.5f));
-    if (EIGEN_PREDICT_FALSE(any_small)) {
-      const Packet exponential = pexp(_x);
-      return pselect(lt_mask, exponential, logistic);
-    } else {
-      return logistic;
-    }
+    // 4. Undo subtractive range reduction exp(m*ln(2) + r) = 2^m * exp(r).
+    Packet e = pldexp_fast_impl<Packet>::run(p, m);
+
+    // 5. Undo multiplicative range reduction by using exp(r) = exp(r/2)^2.
+    e = pmul(e, e);
+
+    // Return exp(x) / (1 + exp(x))
+    return pselect(zero_mask, cst_zero, pdiv(e, padd(cst_one, e)));
   }
 };
 #endif  // #ifndef EIGEN_GPU_COMPILE_PHASE
 
+
 template <typename T>
 struct functor_traits<scalar_logistic_op<T> > {
   enum {
@@ -1124,6 +1064,97 @@ struct functor_traits<scalar_logistic_op<T> > {
   };
 };
 
+template <typename Scalar, typename ExponentScalar, 
+          bool IsBaseInteger = NumTraits<Scalar>::IsInteger,
+          bool IsExponentInteger = NumTraits<ExponentScalar>::IsInteger,
+          bool IsBaseComplex = NumTraits<Scalar>::IsComplex,
+          bool IsExponentComplex = NumTraits<ExponentScalar>::IsComplex>
+struct scalar_unary_pow_op {
+  typedef typename internal::promote_scalar_arg<
+      Scalar, ExponentScalar,
+      internal::has_ReturnType<ScalarBinaryOpTraits<Scalar,ExponentScalar,scalar_unary_pow_op> >::value>::type PromotedExponent;
+  typedef typename ScalarBinaryOpTraits<Scalar, PromotedExponent, scalar_unary_pow_op>::ReturnType result_type;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_unary_pow_op(const ExponentScalar& exponent) : m_exponent(exponent) {}
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator()(const Scalar& a) const {
+    EIGEN_USING_STD(pow);
+    return static_cast<result_type>(pow(a, m_exponent));
+  }
+
+ private:
+  const ExponentScalar m_exponent;
+  scalar_unary_pow_op() {}
+};
+
+template <typename T>
+constexpr int exponent_digits() {
+  return CHAR_BIT * sizeof(T) - NumTraits<T>::digits() - NumTraits<T>::IsSigned;
+}
+
+template<typename From, typename To>
+struct is_floating_exactly_representable {
+  // TODO(rmlarsen): Add radix to NumTraits and enable this check.
+  // (NumTraits<To>::radix == NumTraits<From>::radix) &&
+  static constexpr bool value = (exponent_digits<To>() >= exponent_digits<From>() &&
+                                  NumTraits<To>::digits() >= NumTraits<From>::digits());
+};
+
+
+// Specialization for real, non-integer types, non-complex types.
+template <typename Scalar, typename ExponentScalar>
+struct scalar_unary_pow_op<Scalar, ExponentScalar, false, false, false, false> {
+  template <bool IsExactlyRepresentable = is_floating_exactly_representable<ExponentScalar, Scalar>::value>
+  std::enable_if_t<IsExactlyRepresentable, void> check_is_representable() const {}
+
+  // Issue a deprecation warning if we do a narrowing conversion on the exponent.
+  template <bool IsExactlyRepresentable = is_floating_exactly_representable<ExponentScalar, Scalar>::value>
+  EIGEN_DEPRECATED std::enable_if_t<!IsExactlyRepresentable, void> check_is_representable() const {}
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+      scalar_unary_pow_op(const ExponentScalar& exponent) : m_exponent(static_cast<Scalar>(exponent)) {
+    check_is_representable();
+  }
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const Scalar& a) const {
+    EIGEN_USING_STD(pow);
+    return static_cast<Scalar>(pow(a, m_exponent));
+  }
+  template <typename Packet>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a) const {
+    return unary_pow_impl<Packet, Scalar>::run(a, m_exponent);
+  }
+
+ private:
+  const Scalar m_exponent;
+  scalar_unary_pow_op() {}
+};
+
+template <typename Scalar, typename ExponentScalar, bool BaseIsInteger>
+struct scalar_unary_pow_op<Scalar, ExponentScalar, BaseIsInteger, true, false, false> {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_unary_pow_op(const ExponentScalar& exponent) : m_exponent(exponent) {}
+  // TODO: error handling logic for complex^real_integer
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const Scalar& a) const {
+    return unary_pow_impl<Scalar, ExponentScalar>::run(a, m_exponent);
+  }
+  template <typename Packet>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a) const {
+    return unary_pow_impl<Packet, ExponentScalar>::run(a, m_exponent);
+  }
+
+ private:
+  const ExponentScalar m_exponent;
+  scalar_unary_pow_op() {}
+};
+
+template <typename Scalar, typename ExponentScalar>
+struct functor_traits<scalar_unary_pow_op<Scalar, ExponentScalar>> {
+  enum {
+    GenPacketAccess = functor_traits<scalar_pow_op<Scalar, ExponentScalar>>::PacketAccess,
+    IntPacketAccess = !NumTraits<Scalar>::IsComplex && packet_traits<Scalar>::HasMul && (packet_traits<Scalar>::HasDiv || NumTraits<Scalar>::IsInteger) && packet_traits<Scalar>::HasCmp,
+    PacketAccess = NumTraits<ExponentScalar>::IsInteger ? IntPacketAccess : (IntPacketAccess && GenPacketAccess),
+    Cost = functor_traits<scalar_pow_op<Scalar, ExponentScalar>>::Cost
+  };
+};
+
 } // end namespace internal
 
 } // end namespace Eigen
diff --git a/libs/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/libs/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h
index f35b760..4a6cef5 100644
--- a/libs/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h
+++ b/libs/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h
@@ -11,6 +11,8 @@
 #define EIGEN_GENERAL_BLOCK_PANEL_H
 
 
+#include "../InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
@@ -21,7 +23,7 @@ enum GEBPPacketSizeType {
   GEBPPacketQuarter
 };
 
-template<typename _LhsScalar, typename _RhsScalar, bool _ConjLhs=false, bool _ConjRhs=false, int Arch=Architecture::Target, int _PacketSize=GEBPPacketFull>
+template<typename LhsScalar_, typename RhsScalar_, bool ConjLhs_=false, bool ConjRhs_=false, int Arch=Architecture::Target, int PacketSize_=GEBPPacketFull>
 class gebp_traits;
 
 
@@ -55,8 +57,13 @@ const std::ptrdiff_t defaultL2CacheSize = EIGEN_SET_DEFAULT_L2_CACHE_SIZE(256*10
 const std::ptrdiff_t defaultL3CacheSize = EIGEN_SET_DEFAULT_L3_CACHE_SIZE(2*1024*1024);
 #elif EIGEN_ARCH_PPC
 const std::ptrdiff_t defaultL1CacheSize = EIGEN_SET_DEFAULT_L1_CACHE_SIZE(64*1024);
+#ifdef _ARCH_PWR10
+const std::ptrdiff_t defaultL2CacheSize = EIGEN_SET_DEFAULT_L2_CACHE_SIZE(2*1024*1024);
+const std::ptrdiff_t defaultL3CacheSize = EIGEN_SET_DEFAULT_L3_CACHE_SIZE(8*1024*1024);
+#else
 const std::ptrdiff_t defaultL2CacheSize = EIGEN_SET_DEFAULT_L2_CACHE_SIZE(512*1024);
 const std::ptrdiff_t defaultL3CacheSize = EIGEN_SET_DEFAULT_L3_CACHE_SIZE(4*1024*1024);
+#endif
 #else
 const std::ptrdiff_t defaultL1CacheSize = EIGEN_SET_DEFAULT_L1_CACHE_SIZE(16*1024);
 const std::ptrdiff_t defaultL2CacheSize = EIGEN_SET_DEFAULT_L2_CACHE_SIZE(512*1024);
@@ -352,9 +359,9 @@ inline void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_
 template <typename RhsPacket, typename RhsPacketx4, int registers_taken>
 struct RhsPanelHelper {
  private:
-  static const int remaining_registers = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS - registers_taken;
+  static constexpr int remaining_registers = (std::max)(int(EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS) - registers_taken, 0);
  public:
-  typedef typename conditional<remaining_registers>=4, RhsPacketx4, RhsPacket>::type type;
+  typedef std::conditional_t<remaining_registers>=4, RhsPacketx4, RhsPacket> type;
 };
 
 template <typename Packet>
@@ -376,12 +383,12 @@ struct packet_conditional<GEBPPacketFull, T1, T2, T3> { typedef T1 type; };
 template <typename T1, typename T2, typename T3>
 struct packet_conditional<GEBPPacketHalf, T1, T2, T3> { typedef T2 type; };
 
-#define PACKET_DECL_COND_PREFIX(prefix, name, packet_size)         \
+#define PACKET_DECL_COND_POSTFIX(postfix, name, packet_size)       \
   typedef typename packet_conditional<packet_size,                 \
                                       typename packet_traits<name ## Scalar>::type, \
                                       typename packet_traits<name ## Scalar>::half, \
                                       typename unpacket_traits<typename packet_traits<name ## Scalar>::half>::half>::type \
-  prefix ## name ## Packet
+  name ## Packet ## postfix
 
 #define PACKET_DECL_COND(name, packet_size)                        \
   typedef typename packet_conditional<packet_size,                 \
@@ -390,12 +397,12 @@ struct packet_conditional<GEBPPacketHalf, T1, T2, T3> { typedef T2 type; };
                                       typename unpacket_traits<typename packet_traits<name ## Scalar>::half>::half>::type \
   name ## Packet
 
-#define PACKET_DECL_COND_SCALAR_PREFIX(prefix, packet_size)        \
+#define PACKET_DECL_COND_SCALAR_POSTFIX(postfix, packet_size)      \
   typedef typename packet_conditional<packet_size,                 \
                                       typename packet_traits<Scalar>::type, \
                                       typename packet_traits<Scalar>::half, \
                                       typename unpacket_traits<typename packet_traits<Scalar>::half>::half>::type \
-  prefix ## ScalarPacket
+  ScalarPacket ## postfix
 
 #define PACKET_DECL_COND_SCALAR(packet_size)                       \
   typedef typename packet_conditional<packet_size,                 \
@@ -414,25 +421,25 @@ struct packet_conditional<GEBPPacketHalf, T1, T2, T3> { typedef T2 type; };
  *  cplx*real : unpack rhs to constant packets, ...
  *  real*cplx : load lhs as (a0,a0,a1,a1), and mul as usual
  */
-template<typename _LhsScalar, typename _RhsScalar, bool _ConjLhs, bool _ConjRhs, int Arch, int _PacketSize>
+template<typename LhsScalar_, typename RhsScalar_, bool ConjLhs_, bool ConjRhs_, int Arch, int PacketSize_>
 class gebp_traits
 {
 public:
-  typedef _LhsScalar LhsScalar;
-  typedef _RhsScalar RhsScalar;
+  typedef LhsScalar_ LhsScalar;
+  typedef RhsScalar_ RhsScalar;
   typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
 
-  PACKET_DECL_COND_PREFIX(_, Lhs, _PacketSize);
-  PACKET_DECL_COND_PREFIX(_, Rhs, _PacketSize);
-  PACKET_DECL_COND_PREFIX(_, Res, _PacketSize);
+  PACKET_DECL_COND_POSTFIX(_, Lhs, PacketSize_);
+  PACKET_DECL_COND_POSTFIX(_, Rhs, PacketSize_);
+  PACKET_DECL_COND_POSTFIX(_, Res, PacketSize_);
 
   enum {
-    ConjLhs = _ConjLhs,
-    ConjRhs = _ConjRhs,
-    Vectorizable = unpacket_traits<_LhsPacket>::vectorizable && unpacket_traits<_RhsPacket>::vectorizable,
-    LhsPacketSize = Vectorizable ? unpacket_traits<_LhsPacket>::size : 1,
-    RhsPacketSize = Vectorizable ? unpacket_traits<_RhsPacket>::size : 1,
-    ResPacketSize = Vectorizable ? unpacket_traits<_ResPacket>::size : 1,
+    ConjLhs = ConjLhs_,
+    ConjRhs = ConjRhs_,
+    Vectorizable = unpacket_traits<LhsPacket_>::vectorizable && unpacket_traits<RhsPacket_>::vectorizable,
+    LhsPacketSize = Vectorizable ? unpacket_traits<LhsPacket_>::size : 1,
+    RhsPacketSize = Vectorizable ? unpacket_traits<RhsPacket_>::size : 1,
+    ResPacketSize = Vectorizable ? unpacket_traits<ResPacket_>::size : 1,
     
     NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS,
 
@@ -440,7 +447,7 @@ public:
     nr = 4,
 
     // register block size along the M direction (currently, this one cannot be modified)
-    default_mr = (EIGEN_PLAIN_ENUM_MIN(16,NumberOfRegisters)/2/nr)*LhsPacketSize,
+    default_mr = (plain_enum_min(16, NumberOfRegisters)/2/nr)*LhsPacketSize,
 #if defined(EIGEN_HAS_SINGLE_INSTRUCTION_MADD) && !defined(EIGEN_VECTORIZE_ALTIVEC) && !defined(EIGEN_VECTORIZE_VSX) \
     && ((!EIGEN_COMP_MSVC) || (EIGEN_COMP_MSVC>=1914))
     // we assume 16 registers or more
@@ -457,9 +464,9 @@ public:
   };
 
 
-  typedef typename conditional<Vectorizable,_LhsPacket,LhsScalar>::type LhsPacket;
-  typedef typename conditional<Vectorizable,_RhsPacket,RhsScalar>::type RhsPacket;
-  typedef typename conditional<Vectorizable,_ResPacket,ResScalar>::type ResPacket;
+  typedef std::conditional_t<Vectorizable,LhsPacket_,LhsScalar> LhsPacket;
+  typedef std::conditional_t<Vectorizable,RhsPacket_,RhsScalar> RhsPacket;
+  typedef std::conditional_t<Vectorizable,ResPacket_,ResScalar> ResPacket;
   typedef LhsPacket LhsPacket4Packing;
 
   typedef QuadPacket<RhsPacket> RhsPacketx4;
@@ -543,25 +550,25 @@ public:
 
 };
 
-template<typename RealScalar, bool _ConjLhs, int Arch, int _PacketSize>
-class gebp_traits<std::complex<RealScalar>, RealScalar, _ConjLhs, false, Arch, _PacketSize>
+template<typename RealScalar, bool ConjLhs_, int Arch, int PacketSize_>
+class gebp_traits<std::complex<RealScalar>, RealScalar, ConjLhs_, false, Arch, PacketSize_>
 {
 public:
   typedef std::complex<RealScalar> LhsScalar;
   typedef RealScalar RhsScalar;
   typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
 
-  PACKET_DECL_COND_PREFIX(_, Lhs, _PacketSize);
-  PACKET_DECL_COND_PREFIX(_, Rhs, _PacketSize);
-  PACKET_DECL_COND_PREFIX(_, Res, _PacketSize);
+  PACKET_DECL_COND_POSTFIX(_, Lhs, PacketSize_);
+  PACKET_DECL_COND_POSTFIX(_, Rhs, PacketSize_);
+  PACKET_DECL_COND_POSTFIX(_, Res, PacketSize_);
 
   enum {
-    ConjLhs = _ConjLhs,
+    ConjLhs = ConjLhs_,
     ConjRhs = false,
-    Vectorizable = unpacket_traits<_LhsPacket>::vectorizable && unpacket_traits<_RhsPacket>::vectorizable,
-    LhsPacketSize = Vectorizable ? unpacket_traits<_LhsPacket>::size : 1,
-    RhsPacketSize = Vectorizable ? unpacket_traits<_RhsPacket>::size : 1,
-    ResPacketSize = Vectorizable ? unpacket_traits<_ResPacket>::size : 1,
+    Vectorizable = unpacket_traits<LhsPacket_>::vectorizable && unpacket_traits<RhsPacket_>::vectorizable,
+    LhsPacketSize = Vectorizable ? unpacket_traits<LhsPacket_>::size : 1,
+    RhsPacketSize = Vectorizable ? unpacket_traits<RhsPacket_>::size : 1,
+    ResPacketSize = Vectorizable ? unpacket_traits<ResPacket_>::size : 1,
     
     NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS,
     nr = 4,
@@ -569,16 +576,16 @@ public:
     // we assume 16 registers
     mr = 3*LhsPacketSize,
 #else
-    mr = (EIGEN_PLAIN_ENUM_MIN(16,NumberOfRegisters)/2/nr)*LhsPacketSize,
+    mr = (plain_enum_min(16, NumberOfRegisters)/2/nr)*LhsPacketSize,
 #endif
 
     LhsProgress = LhsPacketSize,
     RhsProgress = 1
   };
 
-  typedef typename conditional<Vectorizable,_LhsPacket,LhsScalar>::type LhsPacket;
-  typedef typename conditional<Vectorizable,_RhsPacket,RhsScalar>::type RhsPacket;
-  typedef typename conditional<Vectorizable,_ResPacket,ResScalar>::type ResPacket;
+  typedef std::conditional_t<Vectorizable,LhsPacket_,LhsScalar> LhsPacket;
+  typedef std::conditional_t<Vectorizable,RhsPacket_,RhsScalar> RhsPacket;
+  typedef std::conditional_t<Vectorizable,ResPacket_,ResScalar> ResPacket;
   typedef LhsPacket LhsPacket4Packing;
 
   typedef QuadPacket<RhsPacket> RhsPacketx4;
@@ -612,7 +619,7 @@ public:
   
   EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, RhsPacket& dest) const
   {
-    loadRhsQuad_impl(b,dest, typename conditional<RhsPacketSize==16,true_type,false_type>::type());
+    loadRhsQuad_impl(b,dest, std::conditional_t<RhsPacketSize==16,true_type,false_type>());
   }
 
   EIGEN_STRONG_INLINE void loadRhsQuad_impl(const RhsScalar* b, RhsPacket& dest, const true_type&) const
@@ -643,7 +650,7 @@ public:
   template <typename LhsPacketType, typename RhsPacketType, typename AccPacketType, typename LaneIdType>
   EIGEN_STRONG_INLINE void madd(const LhsPacketType& a, const RhsPacketType& b, AccPacketType& c, RhsPacketType& tmp, const LaneIdType&) const
   {
-    madd_impl(a, b, c, tmp, typename conditional<Vectorizable,true_type,false_type>::type());
+    madd_impl(a, b, c, tmp, std::conditional_t<Vectorizable,true_type,false_type>());
   }
 
   template <typename LhsPacketType, typename RhsPacketType, typename AccPacketType>
@@ -701,7 +708,7 @@ DoublePacket<Packet> padd(const DoublePacket<Packet> &a, const DoublePacket<Pack
 template<typename Packet>
 const DoublePacket<Packet>&
 predux_half_dowto4(const DoublePacket<Packet> &a,
-                   typename enable_if<unpacket_traits<Packet>::size<=8>::type* = 0)
+                   std::enable_if_t<unpacket_traits<Packet>::size<=8>* = 0)
 {
   return a;
 }
@@ -709,7 +716,7 @@ predux_half_dowto4(const DoublePacket<Packet> &a,
 template<typename Packet>
 DoublePacket<typename unpacket_traits<Packet>::half>
 predux_half_dowto4(const DoublePacket<Packet> &a,
-                   typename enable_if<unpacket_traits<Packet>::size==16>::type* = 0)
+                   std::enable_if_t<unpacket_traits<Packet>::size==16>* = 0)
 {
   // yes, that's pretty hackish :(
   DoublePacket<typename unpacket_traits<Packet>::half> res;
@@ -723,7 +730,7 @@ predux_half_dowto4(const DoublePacket<Packet> &a,
 // same here, "quad" actually means "8" in terms of real coefficients
 template<typename Scalar, typename RealPacket>
 void loadQuadToDoublePacket(const Scalar* b, DoublePacket<RealPacket>& dest,
-                            typename enable_if<unpacket_traits<RealPacket>::size<=8>::type* = 0)
+                            std::enable_if_t<unpacket_traits<RealPacket>::size<=8>* = 0)
 {
   dest.first  = pset1<RealPacket>(numext::real(*b));
   dest.second = pset1<RealPacket>(numext::imag(*b));
@@ -731,7 +738,7 @@ void loadQuadToDoublePacket(const Scalar* b, DoublePacket<RealPacket>& dest,
 
 template<typename Scalar, typename RealPacket>
 void loadQuadToDoublePacket(const Scalar* b, DoublePacket<RealPacket>& dest,
-                            typename enable_if<unpacket_traits<RealPacket>::size==16>::type* = 0)
+                            std::enable_if_t<unpacket_traits<RealPacket>::size==16>* = 0)
 {
   // yes, that's pretty hackish too :(
   typedef typename NumTraits<Scalar>::Real RealScalar;
@@ -744,6 +751,9 @@ void loadQuadToDoublePacket(const Scalar* b, DoublePacket<RealPacket>& dest,
 
 template<typename Packet> struct unpacket_traits<DoublePacket<Packet> > {
   typedef DoublePacket<typename unpacket_traits<Packet>::half> half;
+  enum{
+    size = 2 * unpacket_traits<Packet>::size
+  };
 };
 // template<typename Packet>
 // DoublePacket<Packet> pmadd(const DoublePacket<Packet> &a, const DoublePacket<Packet> &b)
@@ -754,8 +764,8 @@ template<typename Packet> struct unpacket_traits<DoublePacket<Packet> > {
 //   return res;
 // }
 
-template<typename RealScalar, bool _ConjLhs, bool _ConjRhs, int Arch, int _PacketSize>
-class gebp_traits<std::complex<RealScalar>, std::complex<RealScalar>, _ConjLhs, _ConjRhs, Arch, _PacketSize >
+template<typename RealScalar, bool ConjLhs_, bool ConjRhs_, int Arch, int PacketSize_>
+class gebp_traits<std::complex<RealScalar>, std::complex<RealScalar>, ConjLhs_, ConjRhs_, Arch, PacketSize_ >
 {
 public:
   typedef std::complex<RealScalar>  Scalar;
@@ -763,19 +773,19 @@ public:
   typedef std::complex<RealScalar>  RhsScalar;
   typedef std::complex<RealScalar>  ResScalar;
   
-  PACKET_DECL_COND_PREFIX(_, Lhs, _PacketSize);
-  PACKET_DECL_COND_PREFIX(_, Rhs, _PacketSize);
-  PACKET_DECL_COND_PREFIX(_, Res, _PacketSize);
-  PACKET_DECL_COND(Real, _PacketSize);
-  PACKET_DECL_COND_SCALAR(_PacketSize);
+  PACKET_DECL_COND_POSTFIX(_, Lhs, PacketSize_);
+  PACKET_DECL_COND_POSTFIX(_, Rhs, PacketSize_);
+  PACKET_DECL_COND_POSTFIX(_, Res, PacketSize_);
+  PACKET_DECL_COND(Real, PacketSize_);
+  PACKET_DECL_COND_SCALAR(PacketSize_);
 
   enum {
-    ConjLhs = _ConjLhs,
-    ConjRhs = _ConjRhs,
+    ConjLhs = ConjLhs_,
+    ConjRhs = ConjRhs_,
     Vectorizable = unpacket_traits<RealPacket>::vectorizable
                 && unpacket_traits<ScalarPacket>::vectorizable,
-    ResPacketSize   = Vectorizable ? unpacket_traits<_ResPacket>::size : 1,
-    LhsPacketSize = Vectorizable ? unpacket_traits<_LhsPacket>::size : 1,
+    ResPacketSize   = Vectorizable ? unpacket_traits<ResPacket_>::size : 1,
+    LhsPacketSize = Vectorizable ? unpacket_traits<LhsPacket_>::size : 1,
     RhsPacketSize = Vectorizable ? unpacket_traits<RhsScalar>::size : 1,
     RealPacketSize  = Vectorizable ? unpacket_traits<RealPacket>::size : 1,
 
@@ -789,13 +799,13 @@ public:
   
   typedef DoublePacket<RealPacket>                 DoublePacketType;
 
-  typedef typename conditional<Vectorizable,ScalarPacket,Scalar>::type LhsPacket4Packing;
-  typedef typename conditional<Vectorizable,RealPacket,  Scalar>::type LhsPacket;
-  typedef typename conditional<Vectorizable,DoublePacketType,Scalar>::type RhsPacket;
-  typedef typename conditional<Vectorizable,ScalarPacket,Scalar>::type ResPacket;
-  typedef typename conditional<Vectorizable,DoublePacketType,Scalar>::type AccPacket;
+  typedef std::conditional_t<Vectorizable,ScalarPacket,Scalar> LhsPacket4Packing;
+  typedef std::conditional_t<Vectorizable,RealPacket,  Scalar> LhsPacket;
+  typedef std::conditional_t<Vectorizable,DoublePacketType,Scalar> RhsPacket;
+  typedef std::conditional_t<Vectorizable,ScalarPacket,Scalar> ResPacket;
+  typedef std::conditional_t<Vectorizable,DoublePacketType,Scalar> AccPacket;
 
-  // this actualy holds 8 packets!
+  // this actually holds 8 packets!
   typedef QuadPacket<RhsPacket> RhsPacketx4;
   
   EIGEN_STRONG_INLINE void initAcc(Scalar& p) { p = Scalar(0); }
@@ -866,7 +876,7 @@ public:
 
   template<typename LhsPacketType, typename RhsPacketType, typename ResPacketType, typename TmpType, typename LaneIdType>
   EIGEN_STRONG_INLINE
-  typename enable_if<!is_same<RhsPacketType,RhsPacketx4>::value>::type
+  std::enable_if_t<!is_same<RhsPacketType,RhsPacketx4>::value>
   madd(const LhsPacketType& a, const RhsPacketType& b, DoublePacket<ResPacketType>& c, TmpType& /*tmp*/, const LaneIdType&) const
   {
     c.first   = padd(pmul(a,b.first), c.first);
@@ -920,8 +930,8 @@ protected:
   conj_helper<LhsScalar,RhsScalar,ConjLhs,ConjRhs> cj;
 };
 
-template<typename RealScalar, bool _ConjRhs, int Arch, int _PacketSize>
-class gebp_traits<RealScalar, std::complex<RealScalar>, false, _ConjRhs, Arch, _PacketSize >
+template<typename RealScalar, bool ConjRhs_, int Arch, int PacketSize_>
+class gebp_traits<RealScalar, std::complex<RealScalar>, false, ConjRhs_, Arch, PacketSize_ >
 {
 public:
   typedef std::complex<RealScalar>  Scalar;
@@ -929,38 +939,38 @@ public:
   typedef Scalar      RhsScalar;
   typedef Scalar      ResScalar;
 
-  PACKET_DECL_COND_PREFIX(_, Lhs, _PacketSize);
-  PACKET_DECL_COND_PREFIX(_, Rhs, _PacketSize);
-  PACKET_DECL_COND_PREFIX(_, Res, _PacketSize);
-  PACKET_DECL_COND_PREFIX(_, Real, _PacketSize);
-  PACKET_DECL_COND_SCALAR_PREFIX(_, _PacketSize);
+  PACKET_DECL_COND_POSTFIX(_, Lhs, PacketSize_);
+  PACKET_DECL_COND_POSTFIX(_, Rhs, PacketSize_);
+  PACKET_DECL_COND_POSTFIX(_, Res, PacketSize_);
+  PACKET_DECL_COND_POSTFIX(_, Real, PacketSize_);
+  PACKET_DECL_COND_SCALAR_POSTFIX(_, PacketSize_);
 
-#undef PACKET_DECL_COND_SCALAR_PREFIX
-#undef PACKET_DECL_COND_PREFIX
+#undef PACKET_DECL_COND_SCALAR_POSTFIX
+#undef PACKET_DECL_COND_POSTFIX
 #undef PACKET_DECL_COND_SCALAR
 #undef PACKET_DECL_COND
 
   enum {
     ConjLhs = false,
-    ConjRhs = _ConjRhs,
-    Vectorizable = unpacket_traits<_RealPacket>::vectorizable
-                && unpacket_traits<_ScalarPacket>::vectorizable,
-    LhsPacketSize = Vectorizable ? unpacket_traits<_LhsPacket>::size : 1,
-    RhsPacketSize = Vectorizable ? unpacket_traits<_RhsPacket>::size : 1,
-    ResPacketSize = Vectorizable ? unpacket_traits<_ResPacket>::size : 1,
+    ConjRhs = ConjRhs_,
+    Vectorizable = unpacket_traits<RealPacket_>::vectorizable
+                && unpacket_traits<ScalarPacket_>::vectorizable,
+    LhsPacketSize = Vectorizable ? unpacket_traits<LhsPacket_>::size : 1,
+    RhsPacketSize = Vectorizable ? unpacket_traits<RhsPacket_>::size : 1,
+    ResPacketSize = Vectorizable ? unpacket_traits<ResPacket_>::size : 1,
     
     NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS,
     // FIXME: should depend on NumberOfRegisters
     nr = 4,
-    mr = (EIGEN_PLAIN_ENUM_MIN(16,NumberOfRegisters)/2/nr)*ResPacketSize,
+    mr = (plain_enum_min(16, NumberOfRegisters)/2/nr)*ResPacketSize,
 
     LhsProgress = ResPacketSize,
     RhsProgress = 1
   };
 
-  typedef typename conditional<Vectorizable,_LhsPacket,LhsScalar>::type LhsPacket;
-  typedef typename conditional<Vectorizable,_RhsPacket,RhsScalar>::type RhsPacket;
-  typedef typename conditional<Vectorizable,_ResPacket,ResScalar>::type ResPacket;
+  typedef std::conditional_t<Vectorizable,LhsPacket_,LhsScalar> LhsPacket;
+  typedef std::conditional_t<Vectorizable,RhsPacket_,RhsScalar> RhsPacket;
+  typedef std::conditional_t<Vectorizable,ResPacket_,ResScalar> ResPacket;
   typedef LhsPacket LhsPacket4Packing;
   typedef QuadPacket<RhsPacket> RhsPacketx4;
   typedef ResPacket AccPacket;
@@ -1009,7 +1019,7 @@ public:
   template <typename LhsPacketType, typename RhsPacketType, typename AccPacketType, typename LaneIdType>
   EIGEN_STRONG_INLINE void madd(const LhsPacketType& a, const RhsPacketType& b, AccPacketType& c, RhsPacketType& tmp, const LaneIdType&) const
   {
-    madd_impl(a, b, c, tmp, typename conditional<Vectorizable,true_type,false_type>::type());
+    madd_impl(a, b, c, tmp, std::conditional_t<Vectorizable,true_type,false_type>());
   }
 
   template <typename LhsPacketType, typename RhsPacketType, typename AccPacketType>
@@ -1068,6 +1078,7 @@ struct gebp_kernel
   typedef typename Traits::RhsPacketx4 RhsPacketx4;
 
   typedef typename RhsPanelHelper<RhsPacket, RhsPacketx4, 15>::type RhsPanel15;
+  typedef typename RhsPanelHelper<RhsPacket, RhsPacketx4, 27>::type RhsPanel27;
 
   typedef gebp_traits<RhsScalar,LhsScalar,ConjugateRhs,ConjugateLhs,Architecture::Target> SwappedTraits;
 
@@ -1201,7 +1212,7 @@ struct lhs_process_one_packet
     traits.madd(*A0, *rhs_panel, *C1, *T0, fix<1>);
     traits.madd(*A0, *rhs_panel, *C2, *T0, fix<2>);
     traits.madd(*A0, *rhs_panel, *C3, *T0, fix<3>);
-    #if EIGEN_GNUC_AT_LEAST(6,0) && defined(EIGEN_VECTORIZE_SSE)
+    #if EIGEN_GNUC_AT_LEAST(6,0) && defined(EIGEN_VECTORIZE_SSE) && !(EIGEN_COMP_LCC)
     __asm__  ("" : "+x,m" (*A0));
     #endif
     EIGEN_ASM_COMMENT("end step of gebp micro kernel 1X4");
@@ -1213,13 +1224,140 @@ struct lhs_process_one_packet
     int prefetch_res_offset, Index peeled_kc, Index pk, Index cols, Index depth, Index packet_cols4)
   {
     GEBPTraits traits;
-
+    Index packet_cols8 = nr>=8 ? (cols/8) * 8 : 0;
     // loops on each largest micro horizontal panel of lhs
     // (LhsProgress x depth)
     for(Index i=peelStart; i<peelEnd; i+=LhsProgress)
     {
+#if EIGEN_ARCH_ARM64
+      EIGEN_IF_CONSTEXPR(nr>=8) {
+      for(Index j2=0; j2<packet_cols8; j2+=8)
+      {
+        const LhsScalar* blA = &blockA[i*strideA+offsetA*(LhsProgress)];
+        prefetch(&blA[0]);
+
+        // gets res block as register
+        AccPacket C0, C1, C2, C3, C4, C5, C6, C7;
+        traits.initAcc(C0);
+        traits.initAcc(C1);
+        traits.initAcc(C2);
+        traits.initAcc(C3);
+        traits.initAcc(C4);
+        traits.initAcc(C5);
+        traits.initAcc(C6);
+        traits.initAcc(C7);
+
+        LinearMapper r0 = res.getLinearMapper(i, j2 + 0);
+        LinearMapper r1 = res.getLinearMapper(i, j2 + 1);
+        LinearMapper r2 = res.getLinearMapper(i, j2 + 2);
+        LinearMapper r3 = res.getLinearMapper(i, j2 + 3);
+        LinearMapper r4 = res.getLinearMapper(i, j2 + 4);
+        LinearMapper r5 = res.getLinearMapper(i, j2 + 5);
+        LinearMapper r6 = res.getLinearMapper(i, j2 + 6);
+        LinearMapper r7 = res.getLinearMapper(i, j2 + 7);
+        r0.prefetch(prefetch_res_offset);
+        r1.prefetch(prefetch_res_offset);
+        r2.prefetch(prefetch_res_offset);
+        r3.prefetch(prefetch_res_offset);
+        r4.prefetch(prefetch_res_offset);
+        r5.prefetch(prefetch_res_offset);
+        r6.prefetch(prefetch_res_offset);
+        r7.prefetch(prefetch_res_offset);
+        const RhsScalar* blB = &blockB[j2*strideB+offsetB*8];
+        prefetch(&blB[0]);
+
+        LhsPacket A0;
+        for(Index k=0; k<peeled_kc; k+=pk)
+        {
+            RhsPacketx4 rhs_panel;
+            RhsPacket T0;
+#define EIGEN_GEBGP_ONESTEP(K)                                                \
+            do {                                                              \
+                EIGEN_ASM_COMMENT("begin step of gebp micro kernel 1pX8");    \
+                traits.loadLhs(&blA[(0 + 1 * K) * LhsProgress], A0);          \
+                traits.loadRhs(&blB[(0 + 8 * K) * RhsProgress], rhs_panel);   \
+                traits.madd(A0, rhs_panel, C0, T0, fix<0>);                   \
+                traits.updateRhs(&blB[(1 + 8 * K) * RhsProgress], rhs_panel); \
+                traits.madd(A0, rhs_panel, C1, T0, fix<1>);                   \
+                traits.updateRhs(&blB[(2 + 8 * K) * RhsProgress], rhs_panel); \
+                traits.madd(A0, rhs_panel, C2, T0, fix<2>);                   \
+                traits.updateRhs(&blB[(3 + 8 * K) * RhsProgress], rhs_panel); \
+                traits.madd(A0, rhs_panel, C3, T0, fix<3>);                   \
+                traits.loadRhs(&blB[(4 + 8 * K) * RhsProgress], rhs_panel);   \
+                traits.madd(A0, rhs_panel, C4, T0, fix<0>);                   \
+                traits.updateRhs(&blB[(5 + 8 * K) * RhsProgress], rhs_panel); \
+                traits.madd(A0, rhs_panel, C5, T0, fix<1>);                   \
+                traits.updateRhs(&blB[(6 + 8 * K) * RhsProgress], rhs_panel); \
+                traits.madd(A0, rhs_panel, C6, T0, fix<2>);                   \
+                traits.updateRhs(&blB[(7 + 8 * K) * RhsProgress], rhs_panel); \
+                traits.madd(A0, rhs_panel, C7, T0, fix<3>);                   \
+                EIGEN_ASM_COMMENT("end step of gebp micro kernel 1pX8");      \
+            } while (false)
+
+            EIGEN_ASM_COMMENT("begin gebp micro kernel 1pX8");
+
+            EIGEN_GEBGP_ONESTEP(0);
+            EIGEN_GEBGP_ONESTEP(1);
+            EIGEN_GEBGP_ONESTEP(2);
+            EIGEN_GEBGP_ONESTEP(3);
+            EIGEN_GEBGP_ONESTEP(4);
+            EIGEN_GEBGP_ONESTEP(5);
+            EIGEN_GEBGP_ONESTEP(6);
+            EIGEN_GEBGP_ONESTEP(7);
+
+            blB += pk*8*RhsProgress;
+            blA += pk*(1*LhsProgress);
+
+            EIGEN_ASM_COMMENT("end gebp micro kernel 1pX8");
+          }
+          // process remaining peeled loop
+          for(Index k=peeled_kc; k<depth; k++)
+          {
+            RhsPacketx4 rhs_panel;
+            RhsPacket T0;
+            EIGEN_GEBGP_ONESTEP(0);
+            blB += 8*RhsProgress;
+            blA += 1*LhsProgress;
+          }
+
+#undef EIGEN_GEBGP_ONESTEP
+
+          ResPacket R0, R1;
+          ResPacket alphav = pset1<ResPacket>(alpha);
+
+          R0 = r0.template loadPacket<ResPacket>(0);
+          R1 = r1.template loadPacket<ResPacket>(0);
+          traits.acc(C0, alphav, R0);
+          traits.acc(C1, alphav, R1);
+          r0.storePacket(0, R0);
+          r1.storePacket(0, R1);
+
+          R0 = r2.template loadPacket<ResPacket>(0);
+          R1 = r3.template loadPacket<ResPacket>(0);
+          traits.acc(C2,  alphav, R0);
+          traits.acc(C3,  alphav, R1);
+          r2.storePacket(0, R0);
+          r3.storePacket(0, R1);
+
+          R0 = r4.template loadPacket<ResPacket>(0);
+          R1 = r5.template loadPacket<ResPacket>(0);
+          traits.acc(C4,  alphav, R0);
+          traits.acc(C5,  alphav, R1);
+          r4.storePacket(0, R0);
+          r5.storePacket(0, R1);
+
+          R0 = r6.template loadPacket<ResPacket>(0);
+          R1 = r7.template loadPacket<ResPacket>(0);
+          traits.acc(C6, alphav, R0);
+          traits.acc(C7, alphav, R1);
+          r6.storePacket(0, R0);
+          r7.storePacket(0, R1);
+      }
+      }
+#endif
+      
       // loops on each largest micro vertical panel of rhs (depth * nr)
-      for(Index j2=0; j2<packet_cols4; j2+=nr)
+      for(Index j2=packet_cols8; j2<packet_cols4; j2+=4)
       {
         // We select a LhsProgress x nr micro block of res
         // which is entirely stored into 1 x nr registers.
@@ -1255,7 +1393,7 @@ struct lhs_process_one_packet
         r3.prefetch(prefetch_res_offset);
 
         // performs "inner" products
-        const RhsScalar* blB = &blockB[j2*strideB+offsetB*nr];
+        const RhsScalar* blB = &blockB[j2*strideB+offsetB*4];
         prefetch(&blB[0]);
         LhsPacket A0, A1;
 
@@ -1413,6 +1551,7 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
     if(strideB==-1) strideB = depth;
     conj_helper<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> cj;
     Index packet_cols4 = nr>=4 ? (cols/4) * 4 : 0;
+    Index packet_cols8 = nr>=8 ? (cols/8) * 8 : 0;
     const Index peeled_mc3 = mr>=3*Traits::LhsProgress ? (rows/(3*LhsProgress))*(3*LhsProgress) : 0;
     const Index peeled_mc2 = mr>=2*Traits::LhsProgress ? peeled_mc3+((rows-peeled_mc3)/(2*LhsProgress))*(2*LhsProgress) : 0;
     const Index peeled_mc1 = mr>=1*Traits::LhsProgress ? peeled_mc2+((rows-peeled_mc2)/(1*LhsProgress))*(1*LhsProgress) : 0;
@@ -1441,7 +1580,220 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
       for(Index i1=0; i1<peeled_mc3; i1+=actual_panel_rows)
       {
         const Index actual_panel_end = (std::min)(i1+actual_panel_rows, peeled_mc3);
-        for(Index j2=0; j2<packet_cols4; j2+=nr)
+#if EIGEN_ARCH_ARM64
+        EIGEN_IF_CONSTEXPR(nr>=8) {
+        for(Index j2=0; j2<packet_cols8; j2+=8)
+        {
+          for(Index i=i1; i<actual_panel_end; i+=3*LhsProgress)
+          {
+            const LhsScalar* blA = &blockA[i*strideA+offsetA*(3*LhsProgress)];
+            prefetch(&blA[0]);
+            // gets res block as register
+            AccPacket C0, C1, C2, C3, C4, C5, C6, C7,
+                      C8, C9, C10, C11, C12, C13, C14, C15,
+                      C16, C17, C18, C19, C20, C21, C22, C23;
+            traits.initAcc(C0);  traits.initAcc(C1);  traits.initAcc(C2);  traits.initAcc(C3);
+            traits.initAcc(C4);  traits.initAcc(C5);  traits.initAcc(C6);  traits.initAcc(C7);
+            traits.initAcc(C8);  traits.initAcc(C9);  traits.initAcc(C10); traits.initAcc(C11);
+            traits.initAcc(C12);  traits.initAcc(C13);  traits.initAcc(C14);  traits.initAcc(C15);
+            traits.initAcc(C16);  traits.initAcc(C17);  traits.initAcc(C18);  traits.initAcc(C19);
+            traits.initAcc(C20);  traits.initAcc(C21);  traits.initAcc(C22); traits.initAcc(C23);
+
+            LinearMapper r0 = res.getLinearMapper(i, j2 + 0);
+            LinearMapper r1 = res.getLinearMapper(i, j2 + 1);
+            LinearMapper r2 = res.getLinearMapper(i, j2 + 2);
+            LinearMapper r3 = res.getLinearMapper(i, j2 + 3);
+            LinearMapper r4 = res.getLinearMapper(i, j2 + 4);
+            LinearMapper r5 = res.getLinearMapper(i, j2 + 5);
+            LinearMapper r6 = res.getLinearMapper(i, j2 + 6);
+            LinearMapper r7 = res.getLinearMapper(i, j2 + 7);
+
+            r0.prefetch(0);
+            r1.prefetch(0);
+            r2.prefetch(0);
+            r3.prefetch(0);
+            r4.prefetch(0);
+            r5.prefetch(0);
+            r6.prefetch(0);
+            r7.prefetch(0);
+
+            // performs "inner" products
+            const RhsScalar* blB = &blockB[j2*strideB+offsetB*8];
+            prefetch(&blB[0]);
+            LhsPacket A0, A1;
+            for(Index k=0; k<peeled_kc; k+=pk)
+            {
+              EIGEN_ASM_COMMENT("begin gebp micro kernel 3pX8");
+              // 27 registers are taken (24 for acc, 3 for lhs).
+              RhsPanel27 rhs_panel;
+              RhsPacket T0;
+              LhsPacket A2;
+            #if EIGEN_COMP_GNUC_STRICT && EIGEN_ARCH_ARM64 && defined(EIGEN_VECTORIZE_NEON) && !(EIGEN_GNUC_AT_LEAST(9,0))
+            // see http://eigen.tuxfamily.org/bz/show_bug.cgi?id=1633
+            // without this workaround A0, A1, and A2 are loaded in the same register,
+            // which is not good for pipelining
+            #define EIGEN_GEBP_3Px8_REGISTER_ALLOC_WORKAROUND __asm__  ("" : "+w,m" (A0), "+w,m" (A1), "+w,m" (A2));
+            #else
+            #define EIGEN_GEBP_3Px8_REGISTER_ALLOC_WORKAROUND
+            #endif
+
+#define EIGEN_GEBP_ONESTEP(K)                                                         \
+            do {                                                                      \
+                EIGEN_ASM_COMMENT("begin step of gebp micro kernel 3pX8");            \
+                traits.loadLhs(&blA[(0 + 3 * K) * LhsProgress], A0);                  \
+                traits.loadLhs(&blA[(1 + 3 * K) * LhsProgress], A1);                  \
+                traits.loadLhs(&blA[(2 + 3 * K) * LhsProgress], A2);                  \
+                EIGEN_GEBP_3Px8_REGISTER_ALLOC_WORKAROUND                             \
+                traits.loadRhs(blB + (0 + 8 * K) * Traits::RhsProgress, rhs_panel);   \
+                traits.madd(A0, rhs_panel, C0, T0, fix<0>);                           \
+                traits.madd(A1, rhs_panel, C8, T0, fix<0>);                           \
+                traits.madd(A2, rhs_panel, C16, T0, fix<0>);                          \
+                traits.updateRhs(blB + (1 + 8 * K) * Traits::RhsProgress, rhs_panel); \
+                traits.madd(A0, rhs_panel, C1, T0, fix<1>);                           \
+                traits.madd(A1, rhs_panel, C9, T0, fix<1>);                           \
+                traits.madd(A2, rhs_panel, C17, T0, fix<1>);                          \
+                traits.updateRhs(blB + (2 + 8 * K) * Traits::RhsProgress, rhs_panel); \
+                traits.madd(A0, rhs_panel, C2, T0, fix<2>);                           \
+                traits.madd(A1, rhs_panel, C10, T0, fix<2>);                          \
+                traits.madd(A2, rhs_panel, C18, T0, fix<2>);                          \
+                traits.updateRhs(blB + (3 + 8 * K) * Traits::RhsProgress, rhs_panel); \
+                traits.madd(A0, rhs_panel, C3, T0, fix<3>);                           \
+                traits.madd(A1, rhs_panel, C11, T0, fix<3>);                          \
+                traits.madd(A2, rhs_panel, C19, T0, fix<3>);                          \
+                traits.loadRhs(blB + (4 + 8 * K) * Traits::RhsProgress, rhs_panel);   \
+                traits.madd(A0, rhs_panel, C4, T0, fix<0>);                           \
+                traits.madd(A1, rhs_panel, C12, T0, fix<0>);                          \
+                traits.madd(A2, rhs_panel, C20, T0, fix<0>);                          \
+                traits.updateRhs(blB + (5 + 8 * K) * Traits::RhsProgress, rhs_panel); \
+                traits.madd(A0, rhs_panel, C5, T0, fix<1>);                           \
+                traits.madd(A1, rhs_panel, C13, T0, fix<1>);                          \
+                traits.madd(A2, rhs_panel, C21, T0, fix<1>);                          \
+                traits.updateRhs(blB + (6 + 8 * K) * Traits::RhsProgress, rhs_panel); \
+                traits.madd(A0, rhs_panel, C6, T0, fix<2>);                           \
+                traits.madd(A1, rhs_panel, C14, T0, fix<2>);                          \
+                traits.madd(A2, rhs_panel, C22, T0, fix<2>);                          \
+                traits.updateRhs(blB + (7 + 8 * K) * Traits::RhsProgress, rhs_panel); \
+                traits.madd(A0, rhs_panel, C7, T0, fix<3>);                           \
+                traits.madd(A1, rhs_panel, C15, T0, fix<3>);                          \
+                traits.madd(A2, rhs_panel, C23, T0, fix<3>);                          \
+                EIGEN_ASM_COMMENT("end step of gebp micro kernel 3pX8");              \
+            } while (false)
+
+                EIGEN_GEBP_ONESTEP(0);
+                EIGEN_GEBP_ONESTEP(1);
+                EIGEN_GEBP_ONESTEP(2);
+                EIGEN_GEBP_ONESTEP(3);
+                EIGEN_GEBP_ONESTEP(4);
+                EIGEN_GEBP_ONESTEP(5);
+                EIGEN_GEBP_ONESTEP(6);
+                EIGEN_GEBP_ONESTEP(7);
+
+                blB += pk * 8 * RhsProgress;
+                blA += pk * 3 * Traits::LhsProgress;
+                EIGEN_ASM_COMMENT("end gebp micro kernel 3pX8");
+            }
+
+            // process remaining peeled loop
+            for (Index k = peeled_kc; k < depth; k++)
+            {
+
+                RhsPanel27 rhs_panel;
+                RhsPacket T0;
+                LhsPacket A2;
+                EIGEN_GEBP_ONESTEP(0);
+                blB += 8 * RhsProgress;
+                blA += 3 * Traits::LhsProgress;
+            }
+
+            #undef EIGEN_GEBP_ONESTEP
+
+            ResPacket R0, R1, R2;
+            ResPacket alphav = pset1<ResPacket>(alpha);
+
+            R0 = r0.template loadPacket<ResPacket>(0 * Traits::ResPacketSize);
+            R1 = r0.template loadPacket<ResPacket>(1 * Traits::ResPacketSize);
+            R2 = r0.template loadPacket<ResPacket>(2 * Traits::ResPacketSize);
+            traits.acc(C0, alphav, R0);
+            traits.acc(C8, alphav, R1);
+            traits.acc(C16, alphav, R2);
+            r0.storePacket(0 * Traits::ResPacketSize, R0);
+            r0.storePacket(1 * Traits::ResPacketSize, R1);
+            r0.storePacket(2 * Traits::ResPacketSize, R2);
+
+            R0 = r1.template loadPacket<ResPacket>(0 * Traits::ResPacketSize);
+            R1 = r1.template loadPacket<ResPacket>(1 * Traits::ResPacketSize);
+            R2 = r1.template loadPacket<ResPacket>(2 * Traits::ResPacketSize);
+            traits.acc(C1, alphav, R0);
+            traits.acc(C9, alphav, R1);
+            traits.acc(C17, alphav, R2);
+            r1.storePacket(0 * Traits::ResPacketSize, R0);
+            r1.storePacket(1 * Traits::ResPacketSize, R1);
+            r1.storePacket(2 * Traits::ResPacketSize, R2);
+
+            R0 = r2.template loadPacket<ResPacket>(0 * Traits::ResPacketSize);
+            R1 = r2.template loadPacket<ResPacket>(1 * Traits::ResPacketSize);
+            R2 = r2.template loadPacket<ResPacket>(2 * Traits::ResPacketSize);
+            traits.acc(C2, alphav, R0);
+            traits.acc(C10, alphav, R1);
+            traits.acc(C18, alphav, R2);
+            r2.storePacket(0 * Traits::ResPacketSize, R0);
+            r2.storePacket(1 * Traits::ResPacketSize, R1);
+            r2.storePacket(2 * Traits::ResPacketSize, R2);
+
+            R0 = r3.template loadPacket<ResPacket>(0 * Traits::ResPacketSize);
+            R1 = r3.template loadPacket<ResPacket>(1 * Traits::ResPacketSize);
+            R2 = r3.template loadPacket<ResPacket>(2 * Traits::ResPacketSize);
+            traits.acc(C3, alphav, R0);
+            traits.acc(C11, alphav, R1);
+            traits.acc(C19, alphav, R2);
+            r3.storePacket(0 * Traits::ResPacketSize, R0);
+            r3.storePacket(1 * Traits::ResPacketSize, R1);
+            r3.storePacket(2 * Traits::ResPacketSize, R2);
+
+            R0 = r4.template loadPacket<ResPacket>(0 * Traits::ResPacketSize);
+            R1 = r4.template loadPacket<ResPacket>(1 * Traits::ResPacketSize);
+            R2 = r4.template loadPacket<ResPacket>(2 * Traits::ResPacketSize);
+            traits.acc(C4, alphav, R0);
+            traits.acc(C12, alphav, R1);
+            traits.acc(C20, alphav, R2);
+            r4.storePacket(0 * Traits::ResPacketSize, R0);
+            r4.storePacket(1 * Traits::ResPacketSize, R1);
+            r4.storePacket(2 * Traits::ResPacketSize, R2);
+
+            R0 = r5.template loadPacket<ResPacket>(0 * Traits::ResPacketSize);
+            R1 = r5.template loadPacket<ResPacket>(1 * Traits::ResPacketSize);
+            R2 = r5.template loadPacket<ResPacket>(2 * Traits::ResPacketSize);
+            traits.acc(C5, alphav, R0);
+            traits.acc(C13, alphav, R1);
+            traits.acc(C21, alphav, R2);
+            r5.storePacket(0 * Traits::ResPacketSize, R0);
+            r5.storePacket(1 * Traits::ResPacketSize, R1);
+            r5.storePacket(2 * Traits::ResPacketSize, R2);
+
+            R0 = r6.template loadPacket<ResPacket>(0 * Traits::ResPacketSize);
+            R1 = r6.template loadPacket<ResPacket>(1 * Traits::ResPacketSize);
+            R2 = r6.template loadPacket<ResPacket>(2 * Traits::ResPacketSize);
+            traits.acc(C6, alphav, R0);
+            traits.acc(C14, alphav, R1);
+            traits.acc(C22, alphav, R2);
+            r6.storePacket(0 * Traits::ResPacketSize, R0);
+            r6.storePacket(1 * Traits::ResPacketSize, R1);
+            r6.storePacket(2 * Traits::ResPacketSize, R2);
+
+            R0 = r7.template loadPacket<ResPacket>(0 * Traits::ResPacketSize);
+            R1 = r7.template loadPacket<ResPacket>(1 * Traits::ResPacketSize);
+            R2 = r7.template loadPacket<ResPacket>(2 * Traits::ResPacketSize);
+            traits.acc(C7, alphav, R0);
+            traits.acc(C15, alphav, R1);
+            traits.acc(C23, alphav, R2);
+            r7.storePacket(0 * Traits::ResPacketSize, R0);
+            r7.storePacket(1 * Traits::ResPacketSize, R1);
+            r7.storePacket(2 * Traits::ResPacketSize, R2);
+          }
+        }
+        }
+#endif
+        for(Index j2=packet_cols8; j2<packet_cols4; j2+=4)
         {
           for(Index i=i1; i<actual_panel_end; i+=3*LhsProgress)
           {
@@ -1471,14 +1823,14 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
           r3.prefetch(0);
 
           // performs "inner" products
-          const RhsScalar* blB = &blockB[j2*strideB+offsetB*nr];
+          const RhsScalar* blB = &blockB[j2*strideB+offsetB*4];
           prefetch(&blB[0]);
           LhsPacket A0, A1;
 
           for(Index k=0; k<peeled_kc; k+=pk)
           {
             EIGEN_ASM_COMMENT("begin gebp micro kernel 3pX4");
-            // 15 registers are taken (12 for acc, 2 for lhs).
+            // 15 registers are taken (12 for acc, 3 for lhs).
             RhsPanel15 rhs_panel;
             RhsPacket T0;
             LhsPacket A2;
@@ -1687,7 +2039,173 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
       for(Index i1=peeled_mc3; i1<peeled_mc2; i1+=actual_panel_rows)
       {
         Index actual_panel_end = (std::min)(i1+actual_panel_rows, peeled_mc2);
-        for(Index j2=0; j2<packet_cols4; j2+=nr)
+#if EIGEN_ARCH_ARM64
+        EIGEN_IF_CONSTEXPR(nr>=8) {
+        for(Index j2=0; j2<packet_cols8; j2+=8)
+        {
+          for(Index i=i1; i<actual_panel_end; i+=2*LhsProgress)
+          {
+            const LhsScalar* blA = &blockA[i*strideA+offsetA*(2*Traits::LhsProgress)];
+            prefetch(&blA[0]);
+
+            AccPacket C0, C1, C2, C3, C4, C5, C6, C7,
+                      C8, C9, C10, C11, C12, C13, C14, C15;
+            traits.initAcc(C0); traits.initAcc(C1); traits.initAcc(C2); traits.initAcc(C3);
+            traits.initAcc(C4); traits.initAcc(C5); traits.initAcc(C6); traits.initAcc(C7);
+            traits.initAcc(C8); traits.initAcc(C9); traits.initAcc(C10); traits.initAcc(C11);
+            traits.initAcc(C12); traits.initAcc(C13); traits.initAcc(C14); traits.initAcc(C15);
+
+            LinearMapper r0 = res.getLinearMapper(i, j2 + 0);
+            LinearMapper r1 = res.getLinearMapper(i, j2 + 1);
+            LinearMapper r2 = res.getLinearMapper(i, j2 + 2);
+            LinearMapper r3 = res.getLinearMapper(i, j2 + 3);
+            LinearMapper r4 = res.getLinearMapper(i, j2 + 4);
+            LinearMapper r5 = res.getLinearMapper(i, j2 + 5);
+            LinearMapper r6 = res.getLinearMapper(i, j2 + 6);
+            LinearMapper r7 = res.getLinearMapper(i, j2 + 7);
+            r0.prefetch(prefetch_res_offset);
+            r1.prefetch(prefetch_res_offset);
+            r2.prefetch(prefetch_res_offset);
+            r3.prefetch(prefetch_res_offset);
+            r4.prefetch(prefetch_res_offset);
+            r5.prefetch(prefetch_res_offset);
+            r6.prefetch(prefetch_res_offset);
+            r7.prefetch(prefetch_res_offset);
+
+            const RhsScalar* blB = &blockB[j2*strideB+offsetB*8];
+            prefetch(&blB[0]);
+            LhsPacket A0, A1;
+            for(Index k=0; k<peeled_kc; k+=pk)
+            {
+              RhsPacketx4 rhs_panel;
+              RhsPacket T0;
+              // NOTE: the begin/end asm comments below work around bug 935!
+              // but they are not enough for gcc>=6 without FMA (bug 1637)
+              #if EIGEN_GNUC_AT_LEAST(6,0) && defined(EIGEN_VECTORIZE_SSE)
+                #define EIGEN_GEBP_2Px8_SPILLING_WORKAROUND __asm__  ("" : [a0] "+x,m" (A0),[a1] "+x,m" (A1));
+              #else
+                #define EIGEN_GEBP_2Px8_SPILLING_WORKAROUND
+              #endif
+#define EIGEN_GEBGP_ONESTEP(K)                                                \
+            do {                                                              \
+              EIGEN_ASM_COMMENT("begin step of gebp micro kernel 2pX8");      \
+              traits.loadLhs(&blA[(0 + 2 * K) * LhsProgress], A0);            \
+              traits.loadLhs(&blA[(1 + 2 * K) * LhsProgress], A1);            \
+              traits.loadRhs(&blB[(0 + 8 * K) * RhsProgress], rhs_panel);     \
+              traits.madd(A0, rhs_panel, C0, T0, fix<0>);                     \
+              traits.madd(A1, rhs_panel, C8, T0, fix<0>);                     \
+              traits.updateRhs(&blB[(1 + 8 * K) * RhsProgress], rhs_panel);   \
+              traits.madd(A0, rhs_panel, C1, T0, fix<1>);                     \
+              traits.madd(A1, rhs_panel, C9, T0, fix<1>);                     \
+              traits.updateRhs(&blB[(2 + 8 * K) * RhsProgress], rhs_panel);   \
+              traits.madd(A0, rhs_panel, C2, T0, fix<2>);                     \
+              traits.madd(A1, rhs_panel, C10, T0, fix<2>);                    \
+              traits.updateRhs(&blB[(3 + 8 * K) * RhsProgress], rhs_panel);   \
+              traits.madd(A0, rhs_panel, C3, T0, fix<3>);                     \
+              traits.madd(A1, rhs_panel, C11, T0, fix<3>);                    \
+              traits.loadRhs(&blB[(4 + 8 * K) * RhsProgress], rhs_panel);     \
+              traits.madd(A0, rhs_panel, C4, T0, fix<0>);                     \
+              traits.madd(A1, rhs_panel, C12, T0, fix<0>);                    \
+              traits.updateRhs(&blB[(5 + 8 * K) * RhsProgress], rhs_panel);   \
+              traits.madd(A0, rhs_panel, C5, T0, fix<1>);                     \
+              traits.madd(A1, rhs_panel, C13, T0, fix<1>);                    \
+              traits.updateRhs(&blB[(6 + 8 * K) * RhsProgress], rhs_panel);   \
+              traits.madd(A0, rhs_panel, C6, T0, fix<2>);                     \
+              traits.madd(A1, rhs_panel, C14, T0, fix<2>);                    \
+              traits.updateRhs(&blB[(7 + 8 * K) * RhsProgress], rhs_panel);   \
+              traits.madd(A0, rhs_panel, C7, T0, fix<3>);                     \
+              traits.madd(A1, rhs_panel, C15, T0, fix<3>);                    \
+              EIGEN_GEBP_2Px8_SPILLING_WORKAROUND                             \
+              EIGEN_ASM_COMMENT("end step of gebp micro kernel 2pX8");        \
+            } while (false)
+
+              EIGEN_ASM_COMMENT("begin gebp micro kernel 2pX8");
+
+              EIGEN_GEBGP_ONESTEP(0);
+              EIGEN_GEBGP_ONESTEP(1);
+              EIGEN_GEBGP_ONESTEP(2);
+              EIGEN_GEBGP_ONESTEP(3);
+              EIGEN_GEBGP_ONESTEP(4);
+              EIGEN_GEBGP_ONESTEP(5);
+              EIGEN_GEBGP_ONESTEP(6);
+              EIGEN_GEBGP_ONESTEP(7);
+
+              blB += pk*8*RhsProgress;
+              blA += pk*(2*Traits::LhsProgress);
+
+              EIGEN_ASM_COMMENT("end gebp micro kernel 2pX8");
+            }
+            // process remaining peeled loop
+            for(Index k=peeled_kc; k<depth; k++)
+            {
+              RhsPacketx4 rhs_panel;
+              RhsPacket T0;
+              EIGEN_GEBGP_ONESTEP(0);
+              blB += 8*RhsProgress;
+              blA += 2*Traits::LhsProgress;
+            }
+
+#undef EIGEN_GEBGP_ONESTEP
+
+            ResPacket R0, R1, R2, R3;
+            ResPacket alphav = pset1<ResPacket>(alpha);
+
+            R0 = r0.template loadPacket<ResPacket>(0 * Traits::ResPacketSize);
+            R1 = r0.template loadPacket<ResPacket>(1 * Traits::ResPacketSize);
+            R2 = r1.template loadPacket<ResPacket>(0 * Traits::ResPacketSize);
+            R3 = r1.template loadPacket<ResPacket>(1 * Traits::ResPacketSize);
+            traits.acc(C0, alphav, R0);
+            traits.acc(C8, alphav, R1);
+            traits.acc(C1, alphav, R2);
+            traits.acc(C9, alphav, R3);
+            r0.storePacket(0 * Traits::ResPacketSize, R0);
+            r0.storePacket(1 * Traits::ResPacketSize, R1);
+            r1.storePacket(0 * Traits::ResPacketSize, R2);
+            r1.storePacket(1 * Traits::ResPacketSize, R3);
+
+            R0 = r2.template loadPacket<ResPacket>(0 * Traits::ResPacketSize);
+            R1 = r2.template loadPacket<ResPacket>(1 * Traits::ResPacketSize);
+            R2 = r3.template loadPacket<ResPacket>(0 * Traits::ResPacketSize);
+            R3 = r3.template loadPacket<ResPacket>(1 * Traits::ResPacketSize);
+            traits.acc(C2,  alphav, R0);
+            traits.acc(C10,  alphav, R1);
+            traits.acc(C3,  alphav, R2);
+            traits.acc(C11,  alphav, R3);
+            r2.storePacket(0 * Traits::ResPacketSize, R0);
+            r2.storePacket(1 * Traits::ResPacketSize, R1);
+            r3.storePacket(0 * Traits::ResPacketSize, R2);
+            r3.storePacket(1 * Traits::ResPacketSize, R3);
+
+            R0 = r4.template loadPacket<ResPacket>(0 * Traits::ResPacketSize);
+            R1 = r4.template loadPacket<ResPacket>(1 * Traits::ResPacketSize);
+            R2 = r5.template loadPacket<ResPacket>(0 * Traits::ResPacketSize);
+            R3 = r5.template loadPacket<ResPacket>(1 * Traits::ResPacketSize);
+            traits.acc(C4,  alphav, R0);
+            traits.acc(C12,  alphav, R1);
+            traits.acc(C5,  alphav, R2);
+            traits.acc(C13,  alphav, R3);
+            r4.storePacket(0 * Traits::ResPacketSize, R0);
+            r4.storePacket(1 * Traits::ResPacketSize, R1);
+            r5.storePacket(0 * Traits::ResPacketSize, R2);
+            r5.storePacket(1 * Traits::ResPacketSize, R3);
+
+            R0 = r6.template loadPacket<ResPacket>(0 * Traits::ResPacketSize);
+            R1 = r6.template loadPacket<ResPacket>(1 * Traits::ResPacketSize);
+            R2 = r7.template loadPacket<ResPacket>(0 * Traits::ResPacketSize);
+            R3 = r7.template loadPacket<ResPacket>(1 * Traits::ResPacketSize);
+            traits.acc(C6,  alphav, R0);
+            traits.acc(C14,  alphav, R1);
+            traits.acc(C7,  alphav, R2);
+            traits.acc(C15,  alphav, R3);
+            r6.storePacket(0 * Traits::ResPacketSize, R0);
+            r6.storePacket(1 * Traits::ResPacketSize, R1);
+            r7.storePacket(0 * Traits::ResPacketSize, R2);
+            r7.storePacket(1 * Traits::ResPacketSize, R3);
+          }
+        }
+        }
+#endif
+        for(Index j2=packet_cols8; j2<packet_cols4; j2+=4)
         {
           for(Index i=i1; i<actual_panel_end; i+=2*LhsProgress)
           {
@@ -1715,7 +2233,7 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
           r3.prefetch(prefetch_res_offset);
 
           // performs "inner" products
-          const RhsScalar* blB = &blockB[j2*strideB+offsetB*nr];
+          const RhsScalar* blB = &blockB[j2*strideB+offsetB*4];
           prefetch(&blB[0]);
           LhsPacket A0, A1;
 
@@ -1727,7 +2245,7 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
 
           // NOTE: the begin/end asm comments below work around bug 935!
           // but they are not enough for gcc>=6 without FMA (bug 1637)
-          #if EIGEN_GNUC_AT_LEAST(6,0) && defined(EIGEN_VECTORIZE_SSE)
+          #if EIGEN_GNUC_AT_LEAST(6,0) && defined(EIGEN_VECTORIZE_SSE) && !(EIGEN_COMP_LCC)
             #define EIGEN_GEBP_2PX4_SPILLING_WORKAROUND __asm__  ("" : [a0] "+x,m" (A0),[a1] "+x,m" (A1));
           #else
             #define EIGEN_GEBP_2PX4_SPILLING_WORKAROUND
@@ -1904,22 +2422,84 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
     //---------- Process remaining rows, 1 at once ----------
     if(peeled_mc_quarter<rows)
     {
+#if EIGEN_ARCH_ARM64
+      EIGEN_IF_CONSTEXPR(nr>=8) {
       // loop on each panel of the rhs
-      for(Index j2=0; j2<packet_cols4; j2+=nr)
+      for(Index j2=0; j2<packet_cols8; j2+=8)
       {
         // loop on each row of the lhs (1*LhsProgress x depth)
         for(Index i=peeled_mc_quarter; i<rows; i+=1)
         {
           const LhsScalar* blA = &blockA[i*strideA+offsetA];
           prefetch(&blA[0]);
-          const RhsScalar* blB = &blockB[j2*strideB+offsetB*nr];
+          // gets a 1 x 1 res block as registers
+          ResScalar C0(0),C1(0),C2(0),C3(0),C4(0),C5(0),C6(0),C7(0);
+          const RhsScalar* blB = &blockB[j2*strideB+offsetB*8];
+          for(Index k=0; k<depth; k++)
+          {
+            LhsScalar A0 = blA[k];
+            RhsScalar B_0;
+
+            B_0 = blB[0];
+            C0 = cj.pmadd(A0, B_0, C0);
+
+            B_0 = blB[1];
+            C1 = cj.pmadd(A0, B_0, C1);
+
+            B_0 = blB[2];
+            C2 = cj.pmadd(A0, B_0, C2);
+
+            B_0 = blB[3];
+            C3 = cj.pmadd(A0, B_0, C3);
+
+            B_0 = blB[4];
+            C4 = cj.pmadd(A0, B_0, C4);
+
+            B_0 = blB[5];
+            C5 = cj.pmadd(A0, B_0, C5);
+
+            B_0 = blB[6];
+            C6 = cj.pmadd(A0, B_0, C6);
+
+            B_0 = blB[7];
+            C7 = cj.pmadd(A0, B_0, C7);
+
+            blB += 8;
+          }
+          res(i, j2 + 0) += alpha * C0;
+          res(i, j2 + 1) += alpha * C1;
+          res(i, j2 + 2) += alpha * C2;
+          res(i, j2 + 3) += alpha * C3;
+          res(i, j2 + 4) += alpha * C4;
+          res(i, j2 + 5) += alpha * C5;
+          res(i, j2 + 6) += alpha * C6;
+          res(i, j2 + 7) += alpha * C7;
+        }
+      }
+      }
+#endif
+
+      for(Index j2=packet_cols8; j2<packet_cols4; j2+=4)
+      {
+        // loop on each row of the lhs (1*LhsProgress x depth)
+        for(Index i=peeled_mc_quarter; i<rows; i+=1)
+        {
+          const LhsScalar* blA = &blockA[i*strideA+offsetA];
+          prefetch(&blA[0]);
+          const RhsScalar* blB = &blockB[j2*strideB+offsetB*4];
 
           // If LhsProgress is 8 or 16, it assumes that there is a
           // half or quarter packet, respectively, of the same size as
           // nr (which is currently 4) for the return type.
           const int SResPacketHalfSize = unpacket_traits<typename unpacket_traits<SResPacket>::half>::size;
           const int SResPacketQuarterSize = unpacket_traits<typename unpacket_traits<typename unpacket_traits<SResPacket>::half>::half>::size;
-          if ((SwappedTraits::LhsProgress % 4) == 0 &&
+          // The following code assumes we can load SRhsPacket in such a way that
+          // it multiplies blocks of 4 elements in SLhsPacket.  This is not the
+          // case for some customized kernels (i.e. NEON fp16).  If the assumption
+          // fails, drop down to the scalar path.
+          constexpr bool kCanLoadSRhsQuad = (unpacket_traits<SLhsPacket>::size < 4) || (unpacket_traits<SRhsPacket>::size % (unpacket_traits<SLhsPacket>::size / 4)) == 0;
+          if (kCanLoadSRhsQuad && 
+              (SwappedTraits::LhsProgress % 4) == 0 &&
               (SwappedTraits::LhsProgress<=16) &&
               (SwappedTraits::LhsProgress!=8  || SResPacketHalfSize==nr) &&
               (SwappedTraits::LhsProgress!=16 || SResPacketQuarterSize==nr))
@@ -1974,10 +2554,10 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
             if(SwappedTraits::LhsProgress==8)
             {
               // Special case where we have to first reduce the accumulation register C0
-              typedef typename conditional<SwappedTraits::LhsProgress>=8,typename unpacket_traits<SResPacket>::half,SResPacket>::type SResPacketHalf;
-              typedef typename conditional<SwappedTraits::LhsProgress>=8,typename unpacket_traits<SLhsPacket>::half,SLhsPacket>::type SLhsPacketHalf;
-              typedef typename conditional<SwappedTraits::LhsProgress>=8,typename unpacket_traits<SRhsPacket>::half,SRhsPacket>::type SRhsPacketHalf;
-              typedef typename conditional<SwappedTraits::LhsProgress>=8,typename unpacket_traits<SAccPacket>::half,SAccPacket>::type SAccPacketHalf;
+              typedef std::conditional_t<SwappedTraits::LhsProgress>=8,typename unpacket_traits<SResPacket>::half,SResPacket> SResPacketHalf;
+              typedef std::conditional_t<SwappedTraits::LhsProgress>=8,typename unpacket_traits<SLhsPacket>::half,SLhsPacket> SLhsPacketHalf;
+              typedef std::conditional_t<SwappedTraits::LhsProgress>=8,typename unpacket_traits<SRhsPacket>::half,SRhsPacket> SRhsPacketHalf;
+              typedef std::conditional_t<SwappedTraits::LhsProgress>=8,typename unpacket_traits<SAccPacket>::half,SAccPacket> SAccPacketHalf;
 
               SResPacketHalf R = res.template gatherPacket<SResPacketHalf>(i, j2);
               SResPacketHalf alphav = pset1<SResPacketHalf>(alpha);
@@ -2269,8 +2849,8 @@ EIGEN_DONT_INLINE void gemm_pack_lhs<Scalar, Index, DataMapper, Pack1, Pack2, Pa
   bool gone_half = false, gone_quarter = false, gone_last = false;
 
   Index i = 0;
-  int pack = Pack1;
-  int psize = PacketSize;
+  Index pack = Pack1;
+  Index psize = PacketSize;
   while(pack>0)
   {
     Index remaining_rows = rows-i;
@@ -2290,21 +2870,21 @@ EIGEN_DONT_INLINE void gemm_pack_lhs<Scalar, Index, DataMapper, Pack1, Pack2, Pa
           {
             if (psize == PacketSize) {
               PacketBlock<Packet> kernel;
-              for (int p = 0; p < psize; ++p) kernel.packet[p] = lhs.template loadPacket<Packet>(i+p+m, k);
+              for (Index p = 0; p < psize; ++p) kernel.packet[p] = lhs.template loadPacket<Packet>(i+p+m, k);
               ptranspose(kernel);
-              for (int p = 0; p < psize; ++p) pstore(blockA+count+m+(pack)*p, cj.pconj(kernel.packet[p]));
+              for (Index p = 0; p < psize; ++p) pstore(blockA+count+m+(pack)*p, cj.pconj(kernel.packet[p]));
             } else if (HasHalf && psize == HalfPacketSize) {
               gone_half = true;
               PacketBlock<HalfPacket> kernel_half;
-              for (int p = 0; p < psize; ++p) kernel_half.packet[p] = lhs.template loadPacket<HalfPacket>(i+p+m, k);
+              for (Index p = 0; p < psize; ++p) kernel_half.packet[p] = lhs.template loadPacket<HalfPacket>(i+p+m, k);
               ptranspose(kernel_half);
-              for (int p = 0; p < psize; ++p) pstore(blockA+count+m+(pack)*p, cj.pconj(kernel_half.packet[p]));
+              for (Index p = 0; p < psize; ++p) pstore(blockA+count+m+(pack)*p, cj.pconj(kernel_half.packet[p]));
             } else if (HasQuarter && psize == QuarterPacketSize) {
               gone_quarter = true;
               PacketBlock<QuarterPacket> kernel_quarter;
-              for (int p = 0; p < psize; ++p) kernel_quarter.packet[p] = lhs.template loadPacket<QuarterPacket>(i+p+m, k);
+              for (Index p = 0; p < psize; ++p) kernel_quarter.packet[p] = lhs.template loadPacket<QuarterPacket>(i+p+m, k);
               ptranspose(kernel_quarter);
-              for (int p = 0; p < psize; ++p) pstore(blockA+count+m+(pack)*p, cj.pconj(kernel_quarter.packet[p]));
+              for (Index p = 0; p < psize; ++p) pstore(blockA+count+m+(pack)*p, cj.pconj(kernel_quarter.packet[p]));
 	    }
           }
           count += psize*pack;
@@ -2395,53 +2975,125 @@ EIGEN_DONT_INLINE void gemm_pack_rhs<Scalar, Index, DataMapper, nr, ColMajor, Co
   Index packet_cols4 = nr>=4 ? (cols/4) * 4 : 0;
   Index count = 0;
   const Index peeled_k = (depth/PacketSize)*PacketSize;
-//   if(nr>=8)
-//   {
-//     for(Index j2=0; j2<packet_cols8; j2+=8)
-//     {
-//       // skip what we have before
-//       if(PanelMode) count += 8 * offset;
-//       const Scalar* b0 = &rhs[(j2+0)*rhsStride];
-//       const Scalar* b1 = &rhs[(j2+1)*rhsStride];
-//       const Scalar* b2 = &rhs[(j2+2)*rhsStride];
-//       const Scalar* b3 = &rhs[(j2+3)*rhsStride];
-//       const Scalar* b4 = &rhs[(j2+4)*rhsStride];
-//       const Scalar* b5 = &rhs[(j2+5)*rhsStride];
-//       const Scalar* b6 = &rhs[(j2+6)*rhsStride];
-//       const Scalar* b7 = &rhs[(j2+7)*rhsStride];
-//       Index k=0;
-//       if(PacketSize==8) // TODO enable vectorized transposition for PacketSize==4
-//       {
-//         for(; k<peeled_k; k+=PacketSize) {
-//           PacketBlock<Packet> kernel;
-//           for (int p = 0; p < PacketSize; ++p) {
-//             kernel.packet[p] = ploadu<Packet>(&rhs[(j2+p)*rhsStride+k]);
-//           }
-//           ptranspose(kernel);
-//           for (int p = 0; p < PacketSize; ++p) {
-//             pstoreu(blockB+count, cj.pconj(kernel.packet[p]));
-//             count+=PacketSize;
-//           }
-//         }
-//       }
-//       for(; k<depth; k++)
-//       {
-//         blockB[count+0] = cj(b0[k]);
-//         blockB[count+1] = cj(b1[k]);
-//         blockB[count+2] = cj(b2[k]);
-//         blockB[count+3] = cj(b3[k]);
-//         blockB[count+4] = cj(b4[k]);
-//         blockB[count+5] = cj(b5[k]);
-//         blockB[count+6] = cj(b6[k]);
-//         blockB[count+7] = cj(b7[k]);
-//         count += 8;
-//       }
-//       // skip what we have after
-//       if(PanelMode) count += 8 * (stride-offset-depth);
-//     }
-//   }
 
-  if(nr>=4)
+#if EIGEN_ARCH_ARM64
+  EIGEN_IF_CONSTEXPR(nr>=8)
+  {
+    for(Index j2=0; j2<packet_cols8; j2+=8)
+    {
+      // skip what we have before
+      if(PanelMode) count += 8 * offset;
+      const LinearMapper dm0 = rhs.getLinearMapper(0, j2 + 0);
+      const LinearMapper dm1 = rhs.getLinearMapper(0, j2 + 1);
+      const LinearMapper dm2 = rhs.getLinearMapper(0, j2 + 2);
+      const LinearMapper dm3 = rhs.getLinearMapper(0, j2 + 3);
+      const LinearMapper dm4 = rhs.getLinearMapper(0, j2 + 4);
+      const LinearMapper dm5 = rhs.getLinearMapper(0, j2 + 5);
+      const LinearMapper dm6 = rhs.getLinearMapper(0, j2 + 6);
+      const LinearMapper dm7 = rhs.getLinearMapper(0, j2 + 7);
+      Index k = 0;
+      if (PacketSize % 2 == 0 && PacketSize <= 8) // 2 4 8
+      {
+        for (; k < peeled_k; k += PacketSize)
+        {
+          if (PacketSize == 2)
+          {
+            PacketBlock<Packet, PacketSize==2 ?2:PacketSize> kernel0, kernel1, kernel2, kernel3;
+            kernel0.packet[0%PacketSize] = dm0.template loadPacket<Packet>(k);
+            kernel0.packet[1%PacketSize] = dm1.template loadPacket<Packet>(k);
+            kernel1.packet[0%PacketSize] = dm2.template loadPacket<Packet>(k);
+            kernel1.packet[1%PacketSize] = dm3.template loadPacket<Packet>(k);
+            kernel2.packet[0%PacketSize] = dm4.template loadPacket<Packet>(k);
+            kernel2.packet[1%PacketSize] = dm5.template loadPacket<Packet>(k);
+            kernel3.packet[0%PacketSize] = dm6.template loadPacket<Packet>(k);
+            kernel3.packet[1%PacketSize] = dm7.template loadPacket<Packet>(k);
+            ptranspose(kernel0);
+            ptranspose(kernel1);
+            ptranspose(kernel2);
+            ptranspose(kernel3);
+
+            pstoreu(blockB + count + 0 * PacketSize, cj.pconj(kernel0.packet[0 % PacketSize]));
+            pstoreu(blockB + count + 1 * PacketSize, cj.pconj(kernel1.packet[0 % PacketSize]));
+            pstoreu(blockB + count + 2 * PacketSize, cj.pconj(kernel2.packet[0 % PacketSize]));
+            pstoreu(blockB + count + 3 * PacketSize, cj.pconj(kernel3.packet[0 % PacketSize]));
+
+            pstoreu(blockB + count + 4 * PacketSize, cj.pconj(kernel0.packet[1 % PacketSize]));
+            pstoreu(blockB + count + 5 * PacketSize, cj.pconj(kernel1.packet[1 % PacketSize]));
+            pstoreu(blockB + count + 6 * PacketSize, cj.pconj(kernel2.packet[1 % PacketSize]));
+            pstoreu(blockB + count + 7 * PacketSize, cj.pconj(kernel3.packet[1 % PacketSize]));
+            count+=8*PacketSize;
+          }
+          else if (PacketSize == 4)
+          {
+            PacketBlock<Packet, PacketSize == 4?4:PacketSize> kernel0, kernel1;
+
+            kernel0.packet[0%PacketSize] = dm0.template loadPacket<Packet>(k);
+            kernel0.packet[1%PacketSize] = dm1.template loadPacket<Packet>(k);
+            kernel0.packet[2%PacketSize] = dm2.template loadPacket<Packet>(k);
+            kernel0.packet[3%PacketSize] = dm3.template loadPacket<Packet>(k);
+            kernel1.packet[0%PacketSize] = dm4.template loadPacket<Packet>(k);
+            kernel1.packet[1%PacketSize] = dm5.template loadPacket<Packet>(k);
+            kernel1.packet[2%PacketSize] = dm6.template loadPacket<Packet>(k);
+            kernel1.packet[3%PacketSize] = dm7.template loadPacket<Packet>(k);
+            ptranspose(kernel0);
+            ptranspose(kernel1);
+
+            pstoreu(blockB+count+0*PacketSize, cj.pconj(kernel0.packet[0%PacketSize]));
+            pstoreu(blockB+count+1*PacketSize, cj.pconj(kernel1.packet[0%PacketSize]));
+            pstoreu(blockB+count+2*PacketSize, cj.pconj(kernel0.packet[1%PacketSize]));
+            pstoreu(blockB+count+3*PacketSize, cj.pconj(kernel1.packet[1%PacketSize]));
+            pstoreu(blockB+count+4*PacketSize, cj.pconj(kernel0.packet[2%PacketSize]));
+            pstoreu(blockB+count+5*PacketSize, cj.pconj(kernel1.packet[2%PacketSize]));
+            pstoreu(blockB+count+6*PacketSize, cj.pconj(kernel0.packet[3%PacketSize]));
+            pstoreu(blockB+count+7*PacketSize, cj.pconj(kernel1.packet[3%PacketSize]));
+            count+=8*PacketSize;
+          }
+          else if (PacketSize == 8)
+          {
+            PacketBlock<Packet, PacketSize==8?8:PacketSize> kernel0;
+
+            kernel0.packet[0%PacketSize] = dm0.template loadPacket<Packet>(k);
+            kernel0.packet[1%PacketSize] = dm1.template loadPacket<Packet>(k);
+            kernel0.packet[2%PacketSize] = dm2.template loadPacket<Packet>(k);
+            kernel0.packet[3%PacketSize] = dm3.template loadPacket<Packet>(k);
+            kernel0.packet[4%PacketSize] = dm4.template loadPacket<Packet>(k);
+            kernel0.packet[5%PacketSize] = dm5.template loadPacket<Packet>(k);
+            kernel0.packet[6%PacketSize] = dm6.template loadPacket<Packet>(k);
+            kernel0.packet[7%PacketSize] = dm7.template loadPacket<Packet>(k);
+            ptranspose(kernel0);
+
+            pstoreu(blockB+count+0*PacketSize, cj.pconj(kernel0.packet[0%PacketSize]));
+            pstoreu(blockB+count+1*PacketSize, cj.pconj(kernel0.packet[1%PacketSize]));
+            pstoreu(blockB+count+2*PacketSize, cj.pconj(kernel0.packet[2%PacketSize]));
+            pstoreu(blockB+count+3*PacketSize, cj.pconj(kernel0.packet[3%PacketSize]));
+            pstoreu(blockB+count+4*PacketSize, cj.pconj(kernel0.packet[4%PacketSize]));
+            pstoreu(blockB+count+5*PacketSize, cj.pconj(kernel0.packet[5%PacketSize]));
+            pstoreu(blockB+count+6*PacketSize, cj.pconj(kernel0.packet[6%PacketSize]));
+            pstoreu(blockB+count+7*PacketSize, cj.pconj(kernel0.packet[7%PacketSize]));
+            count+=8*PacketSize;
+          }
+        }
+      }
+
+      for(; k<depth; k++)
+      {
+        blockB[count+0] = cj(dm0(k));
+        blockB[count+1] = cj(dm1(k));
+        blockB[count+2] = cj(dm2(k));
+        blockB[count+3] = cj(dm3(k));
+        blockB[count+4] = cj(dm4(k));
+        blockB[count+5] = cj(dm5(k));
+        blockB[count+6] = cj(dm6(k));
+        blockB[count+7] = cj(dm7(k));
+        count += 8;
+      }
+      // skip what we have after
+      if(PanelMode) count += 8 * (stride-offset-depth);
+    }
+  }
+#endif
+  
+  EIGEN_IF_CONSTEXPR(nr>=4)
   {
     for(Index j2=packet_cols8; j2<packet_cols4; j2+=4)
     {
@@ -2520,39 +3172,44 @@ struct gemm_pack_rhs<Scalar, Index, DataMapper, nr, RowMajor, Conjugate, PanelMo
     Index packet_cols4 = nr>=4 ? (cols/4) * 4 : 0;
     Index count = 0;
 
-  //   if(nr>=8)
-  //   {
-  //     for(Index j2=0; j2<packet_cols8; j2+=8)
-  //     {
-  //       // skip what we have before
-  //       if(PanelMode) count += 8 * offset;
-  //       for(Index k=0; k<depth; k++)
-  //       {
-  //         if (PacketSize==8) {
-  //           Packet A = ploadu<Packet>(&rhs[k*rhsStride + j2]);
-  //           pstoreu(blockB+count, cj.pconj(A));
-  //         } else if (PacketSize==4) {
-  //           Packet A = ploadu<Packet>(&rhs[k*rhsStride + j2]);
-  //           Packet B = ploadu<Packet>(&rhs[k*rhsStride + j2 + PacketSize]);
-  //           pstoreu(blockB+count, cj.pconj(A));
-  //           pstoreu(blockB+count+PacketSize, cj.pconj(B));
-  //         } else {
-  //           const Scalar* b0 = &rhs[k*rhsStride + j2];
-  //           blockB[count+0] = cj(b0[0]);
-  //           blockB[count+1] = cj(b0[1]);
-  //           blockB[count+2] = cj(b0[2]);
-  //           blockB[count+3] = cj(b0[3]);
-  //           blockB[count+4] = cj(b0[4]);
-  //           blockB[count+5] = cj(b0[5]);
-  //           blockB[count+6] = cj(b0[6]);
-  //           blockB[count+7] = cj(b0[7]);
-  //         }
-  //         count += 8;
-  //       }
-  //       // skip what we have after
-  //       if(PanelMode) count += 8 * (stride-offset-depth);
-  //     }
-  //   }
+#if EIGEN_ARCH_ARM64
+    EIGEN_IF_CONSTEXPR(nr>=8)
+    {
+      for(Index j2=0; j2<packet_cols8; j2+=8)
+      {
+        // skip what we have before
+        if(PanelMode) count += 8 * offset;
+        for(Index k=0; k<depth; k++)
+        {
+          if (PacketSize==8) {
+            Packet A = rhs.template loadPacket<Packet>(k, j2);
+            pstoreu(blockB+count, cj.pconj(A));
+            count += PacketSize;
+          } else if (PacketSize==4) {
+            Packet A = rhs.template loadPacket<Packet>(k, j2);
+            Packet B = rhs.template loadPacket<Packet>(k, j2 + 4);
+            pstoreu(blockB+count, cj.pconj(A));
+            pstoreu(blockB+count+PacketSize, cj.pconj(B));
+            count += 2*PacketSize;
+          } else {
+            const LinearMapper dm0 = rhs.getLinearMapper(k, j2);
+            blockB[count+0] = cj(dm0(0));
+            blockB[count+1] = cj(dm0(1));
+            blockB[count+2] = cj(dm0(2));
+            blockB[count+3] = cj(dm0(3));
+            blockB[count+4] = cj(dm0(4));
+            blockB[count+5] = cj(dm0(5));
+            blockB[count+6] = cj(dm0(6));
+            blockB[count+7] = cj(dm0(7));
+            count += 8;
+          }
+        }
+        // skip what we have after
+        if(PanelMode) count += 8 * (stride-offset-depth);
+      }
+    }
+#endif
+    
     if(nr>=4)
     {
       for(Index j2=packet_cols8; j2<packet_cols4; j2+=4)
@@ -2621,7 +3278,7 @@ inline std::ptrdiff_t l2CacheSize()
 }
 
 /** \returns the currently set level 3 cpu cache size (in bytes) used to estimate the ideal blocking size paramete\
-rs.                                                                                                                
+rs.
 * \sa setCpuCacheSize */
 inline std::ptrdiff_t l3CacheSize()
 {
diff --git a/libs/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h b/libs/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h
index caa65fc..38bddac 100644
--- a/libs/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h
+++ b/libs/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h
@@ -10,11 +10,13 @@
 #ifndef EIGEN_GENERAL_MATRIX_MATRIX_H
 #define EIGEN_GENERAL_MATRIX_MATRIX_H
 
+#include "../InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
 
-template<typename _LhsScalar, typename _RhsScalar> class level3_blocking;
+template<typename LhsScalar_, typename RhsScalar_> class level3_blocking;
 
 /* Specialization for a row-major destination matrix => simple transposition of the product */
 template<
@@ -148,9 +150,6 @@ static void run(Index rows, Index cols, Index depth,
       // Release all the sub blocks A'_i of A' for the current thread,
       // i.e., we simply decrement the number of users by 1
       for(Index i=0; i<threads; ++i)
-#if !EIGEN_HAS_CXX11_ATOMIC
-        #pragma omp atomic
-#endif
         info[i].users -= 1;
     }
   }
@@ -247,11 +246,11 @@ struct gemm_functor
 template<int StorageOrder, typename LhsScalar, typename RhsScalar, int MaxRows, int MaxCols, int MaxDepth, int KcFactor=1,
 bool FiniteAtCompileTime = MaxRows!=Dynamic && MaxCols!=Dynamic && MaxDepth != Dynamic> class gemm_blocking_space;
 
-template<typename _LhsScalar, typename _RhsScalar>
+template<typename LhsScalar_, typename RhsScalar_>
 class level3_blocking
 {
-    typedef _LhsScalar LhsScalar;
-    typedef _RhsScalar RhsScalar;
+    typedef LhsScalar_ LhsScalar;
+    typedef RhsScalar_ RhsScalar;
 
   protected:
     LhsScalar* m_blockA;
@@ -275,20 +274,19 @@ class level3_blocking
     inline RhsScalar* blockB() { return m_blockB; }
 };
 
-template<int StorageOrder, typename _LhsScalar, typename _RhsScalar, int MaxRows, int MaxCols, int MaxDepth, int KcFactor>
-class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, KcFactor, true /* == FiniteAtCompileTime */>
+template<int StorageOrder, typename LhsScalar_, typename RhsScalar_, int MaxRows, int MaxCols, int MaxDepth, int KcFactor>
+class gemm_blocking_space<StorageOrder,LhsScalar_,RhsScalar_,MaxRows, MaxCols, MaxDepth, KcFactor, true /* == FiniteAtCompileTime */>
   : public level3_blocking<
-      typename conditional<StorageOrder==RowMajor,_RhsScalar,_LhsScalar>::type,
-      typename conditional<StorageOrder==RowMajor,_LhsScalar,_RhsScalar>::type>
+      std::conditional_t<StorageOrder==RowMajor,RhsScalar_,LhsScalar_>,
+      std::conditional_t<StorageOrder==RowMajor,LhsScalar_,RhsScalar_>>
 {
     enum {
       Transpose = StorageOrder==RowMajor,
       ActualRows = Transpose ? MaxCols : MaxRows,
       ActualCols = Transpose ? MaxRows : MaxCols
     };
-    typedef typename conditional<Transpose,_RhsScalar,_LhsScalar>::type LhsScalar;
-    typedef typename conditional<Transpose,_LhsScalar,_RhsScalar>::type RhsScalar;
-    typedef gebp_traits<LhsScalar,RhsScalar> Traits;
+    typedef std::conditional_t<Transpose,RhsScalar_,LhsScalar_> LhsScalar;
+    typedef std::conditional_t<Transpose,LhsScalar_,RhsScalar_> RhsScalar;
     enum {
       SizeA = ActualRows * MaxDepth,
       SizeB = ActualCols * MaxDepth
@@ -326,18 +324,17 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M
     inline void allocateAll() {}
 };
 
-template<int StorageOrder, typename _LhsScalar, typename _RhsScalar, int MaxRows, int MaxCols, int MaxDepth, int KcFactor>
-class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, KcFactor, false>
+template<int StorageOrder, typename LhsScalar_, typename RhsScalar_, int MaxRows, int MaxCols, int MaxDepth, int KcFactor>
+class gemm_blocking_space<StorageOrder,LhsScalar_,RhsScalar_,MaxRows, MaxCols, MaxDepth, KcFactor, false>
   : public level3_blocking<
-      typename conditional<StorageOrder==RowMajor,_RhsScalar,_LhsScalar>::type,
-      typename conditional<StorageOrder==RowMajor,_LhsScalar,_RhsScalar>::type>
+      std::conditional_t<StorageOrder==RowMajor,RhsScalar_,LhsScalar_>,
+      std::conditional_t<StorageOrder==RowMajor,LhsScalar_,RhsScalar_>>
 {
     enum {
       Transpose = StorageOrder==RowMajor
     };
-    typedef typename conditional<Transpose,_RhsScalar,_LhsScalar>::type LhsScalar;
-    typedef typename conditional<Transpose,_LhsScalar,_RhsScalar>::type RhsScalar;
-    typedef gebp_traits<LhsScalar,RhsScalar> Traits;
+    typedef std::conditional_t<Transpose,RhsScalar_,LhsScalar_> LhsScalar;
+    typedef std::conditional_t<Transpose,LhsScalar_,RhsScalar_> RhsScalar;
 
     Index m_sizeA;
     Index m_sizeB;
@@ -416,14 +413,14 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct>
 
   typedef internal::blas_traits<Lhs> LhsBlasTraits;
   typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
-  typedef typename internal::remove_all<ActualLhsType>::type ActualLhsTypeCleaned;
+  typedef internal::remove_all_t<ActualLhsType> ActualLhsTypeCleaned;
 
   typedef internal::blas_traits<Rhs> RhsBlasTraits;
   typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
-  typedef typename internal::remove_all<ActualRhsType>::type ActualRhsTypeCleaned;
+  typedef internal::remove_all_t<ActualRhsType> ActualRhsTypeCleaned;
 
   enum {
-    MaxDepthAtCompileTime = EIGEN_SIZE_MIN_PREFER_FIXED(Lhs::MaxColsAtCompileTime,Rhs::MaxRowsAtCompileTime)
+    MaxDepthAtCompileTime = min_size_prefer_fixed(Lhs::MaxColsAtCompileTime, Rhs::MaxRowsAtCompileTime)
   };
 
   typedef generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode> lazyproduct;
@@ -486,8 +483,8 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct>
         ::scaleAndAddTo(dst_vec, a_lhs.row(0), a_rhs, alpha);
     }
 
-    typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs);
-    typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs);
+    add_const_on_value_type_t<ActualLhsType> lhs = LhsBlasTraits::extract(a_lhs);
+    add_const_on_value_type_t<ActualRhsType> rhs = RhsBlasTraits::extract(a_rhs);
 
     Scalar actualAlpha = combine_scalar_factors(alpha, a_lhs, a_rhs);
 
diff --git a/libs/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h b/libs/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h
index 6ba0d9b..716f2ca 100644
--- a/libs/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h
+++ b/libs/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_H
 #define EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_H
 
+#include "../InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjLhs, bool ConjRhs>
@@ -142,7 +144,7 @@ struct tribb_kernel
   typedef typename Traits::ResScalar ResScalar;
 
   enum {
-    BlockSize  = meta_least_common_multiple<EIGEN_PLAIN_ENUM_MAX(mr,nr),EIGEN_PLAIN_ENUM_MIN(mr,nr)>::ret
+    BlockSize  = meta_least_common_multiple<plain_enum_max(mr, nr), plain_enum_min(mr,nr)>::ret
   };
   void operator()(ResScalar* _res, Index resIncr, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index size, Index depth, const ResScalar& alpha)
   {
@@ -208,17 +210,17 @@ struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,true>
   {
     typedef typename MatrixType::Scalar Scalar;
     
-    typedef typename internal::remove_all<typename ProductType::LhsNested>::type Lhs;
+    typedef internal::remove_all_t<typename ProductType::LhsNested> Lhs;
     typedef internal::blas_traits<Lhs> LhsBlasTraits;
     typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhs;
-    typedef typename internal::remove_all<ActualLhs>::type _ActualLhs;
-    typename internal::add_const_on_value_type<ActualLhs>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
+    typedef internal::remove_all_t<ActualLhs> ActualLhs_;
+    internal::add_const_on_value_type_t<ActualLhs> actualLhs = LhsBlasTraits::extract(prod.lhs());
     
-    typedef typename internal::remove_all<typename ProductType::RhsNested>::type Rhs;
+    typedef internal::remove_all_t<typename ProductType::RhsNested> Rhs;
     typedef internal::blas_traits<Rhs> RhsBlasTraits;
     typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhs;
-    typedef typename internal::remove_all<ActualRhs>::type _ActualRhs;
-    typename internal::add_const_on_value_type<ActualRhs>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
+    typedef internal::remove_all_t<ActualRhs> ActualRhs_;
+    internal::add_const_on_value_type_t<ActualRhs> actualRhs = RhsBlasTraits::extract(prod.rhs());
 
     Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived());
 
@@ -227,19 +229,19 @@ struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,true>
 
     enum {
       StorageOrder = (internal::traits<MatrixType>::Flags&RowMajorBit) ? RowMajor : ColMajor,
-      UseLhsDirectly = _ActualLhs::InnerStrideAtCompileTime==1,
-      UseRhsDirectly = _ActualRhs::InnerStrideAtCompileTime==1
+      UseLhsDirectly = ActualLhs_::InnerStrideAtCompileTime==1,
+      UseRhsDirectly = ActualRhs_::InnerStrideAtCompileTime==1
     };
     
     internal::gemv_static_vector_if<Scalar,Lhs::SizeAtCompileTime,Lhs::MaxSizeAtCompileTime,!UseLhsDirectly> static_lhs;
     ei_declare_aligned_stack_constructed_variable(Scalar, actualLhsPtr, actualLhs.size(),
       (UseLhsDirectly ? const_cast<Scalar*>(actualLhs.data()) : static_lhs.data()));
-    if(!UseLhsDirectly) Map<typename _ActualLhs::PlainObject>(actualLhsPtr, actualLhs.size()) = actualLhs;
+    if(!UseLhsDirectly) Map<typename ActualLhs_::PlainObject>(actualLhsPtr, actualLhs.size()) = actualLhs;
     
     internal::gemv_static_vector_if<Scalar,Rhs::SizeAtCompileTime,Rhs::MaxSizeAtCompileTime,!UseRhsDirectly> static_rhs;
     ei_declare_aligned_stack_constructed_variable(Scalar, actualRhsPtr, actualRhs.size(),
       (UseRhsDirectly ? const_cast<Scalar*>(actualRhs.data()) : static_rhs.data()));
-    if(!UseRhsDirectly) Map<typename _ActualRhs::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
+    if(!UseRhsDirectly) Map<typename ActualRhs_::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
     
     
     selfadjoint_rank1_update<Scalar,Index,StorageOrder,UpLo,
@@ -254,17 +256,17 @@ struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,false>
 {
   static void run(MatrixType& mat, const ProductType& prod, const typename MatrixType::Scalar& alpha, bool beta)
   {
-    typedef typename internal::remove_all<typename ProductType::LhsNested>::type Lhs;
+    typedef internal::remove_all_t<typename ProductType::LhsNested> Lhs;
     typedef internal::blas_traits<Lhs> LhsBlasTraits;
     typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhs;
-    typedef typename internal::remove_all<ActualLhs>::type _ActualLhs;
-    typename internal::add_const_on_value_type<ActualLhs>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
+    typedef internal::remove_all_t<ActualLhs> ActualLhs_;
+    internal::add_const_on_value_type_t<ActualLhs> actualLhs = LhsBlasTraits::extract(prod.lhs());
     
-    typedef typename internal::remove_all<typename ProductType::RhsNested>::type Rhs;
+    typedef internal::remove_all_t<typename ProductType::RhsNested> Rhs;
     typedef internal::blas_traits<Rhs> RhsBlasTraits;
     typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhs;
-    typedef typename internal::remove_all<ActualRhs>::type _ActualRhs;
-    typename internal::add_const_on_value_type<ActualRhs>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
+    typedef internal::remove_all_t<ActualRhs> ActualRhs_;
+    internal::add_const_on_value_type_t<ActualRhs> actualRhs = RhsBlasTraits::extract(prod.rhs());
 
     typename ProductType::Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived());
 
@@ -273,8 +275,8 @@ struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,false>
 
     enum {
       IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0,
-      LhsIsRowMajor = _ActualLhs::Flags&RowMajorBit ? 1 : 0,
-      RhsIsRowMajor = _ActualRhs::Flags&RowMajorBit ? 1 : 0,
+      LhsIsRowMajor = ActualLhs_::Flags&RowMajorBit ? 1 : 0,
+      RhsIsRowMajor = ActualRhs_::Flags&RowMajorBit ? 1 : 0,
       SkipDiag = (UpLo&(UnitDiag|ZeroDiag))!=0
     };
 
@@ -284,7 +286,7 @@ struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,false>
     Index depth = actualLhs.cols();
 
     typedef internal::gemm_blocking_space<IsRowMajor ? RowMajor : ColMajor,typename Lhs::Scalar,typename Rhs::Scalar,
-          MatrixType::MaxColsAtCompileTime, MatrixType::MaxColsAtCompileTime, _ActualRhs::MaxColsAtCompileTime> BlockingType;
+          MatrixType::MaxColsAtCompileTime, MatrixType::MaxColsAtCompileTime, ActualRhs_::MaxColsAtCompileTime> BlockingType;
 
     BlockingType blocking(size, size, depth, 1, false);
 
diff --git a/libs/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h b/libs/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h
index 9a650ec..45ad5da 100644
--- a/libs/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h
+++ b/libs/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h
@@ -33,6 +33,8 @@
 #ifndef EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_BLAS_H
 #define EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_BLAS_H
 
+#include "../InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
diff --git a/libs/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h b/libs/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h
index 71abf40..490fe67 100644
--- a/libs/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h
+++ b/libs/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h
@@ -33,6 +33,8 @@
 #ifndef EIGEN_GENERAL_MATRIX_MATRIX_BLAS_H
 #define EIGEN_GENERAL_MATRIX_MATRIX_BLAS_H
 
+#include "../InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
diff --git a/libs/eigen/Eigen/src/Core/products/GeneralMatrixVector.h b/libs/eigen/Eigen/src/Core/products/GeneralMatrixVector.h
index dfb6aeb..7307994 100644
--- a/libs/eigen/Eigen/src/Core/products/GeneralMatrixVector.h
+++ b/libs/eigen/Eigen/src/Core/products/GeneralMatrixVector.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_GENERAL_MATRIX_VECTOR_H
 #define EIGEN_GENERAL_MATRIX_VECTOR_H
 
+#include "../InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
@@ -29,42 +31,42 @@ struct gemv_packet_cond<GEMVPacketFull, T1, T2, T3> { typedef T1 type; };
 template <typename T1, typename T2, typename T3>
 struct gemv_packet_cond<GEMVPacketHalf, T1, T2, T3> { typedef T2 type; };
 
-template<typename LhsScalar, typename RhsScalar, int _PacketSize=GEMVPacketFull>
+template<typename LhsScalar, typename RhsScalar, int PacketSize_=GEMVPacketFull>
 class gemv_traits
 {
   typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
 
-#define PACKET_DECL_COND_PREFIX(prefix, name, packet_size)                        \
+#define PACKET_DECL_COND_POSTFIX(postfix, name, packet_size)                        \
   typedef typename gemv_packet_cond<packet_size,                                  \
                                     typename packet_traits<name ## Scalar>::type, \
                                     typename packet_traits<name ## Scalar>::half, \
                                     typename unpacket_traits<typename packet_traits<name ## Scalar>::half>::half>::type \
-  prefix ## name ## Packet
+  name ## Packet ## postfix
 
-  PACKET_DECL_COND_PREFIX(_, Lhs, _PacketSize);
-  PACKET_DECL_COND_PREFIX(_, Rhs, _PacketSize);
-  PACKET_DECL_COND_PREFIX(_, Res, _PacketSize);
-#undef PACKET_DECL_COND_PREFIX
+  PACKET_DECL_COND_POSTFIX(_, Lhs, PacketSize_);
+  PACKET_DECL_COND_POSTFIX(_, Rhs, PacketSize_);
+  PACKET_DECL_COND_POSTFIX(_, Res, PacketSize_);
+#undef PACKET_DECL_COND_POSTFIX
 
 public:
   enum {
-        Vectorizable = unpacket_traits<_LhsPacket>::vectorizable &&
-        unpacket_traits<_RhsPacket>::vectorizable &&
-        int(unpacket_traits<_LhsPacket>::size)==int(unpacket_traits<_RhsPacket>::size),
-        LhsPacketSize = Vectorizable ? unpacket_traits<_LhsPacket>::size : 1,
-        RhsPacketSize = Vectorizable ? unpacket_traits<_RhsPacket>::size : 1,
-        ResPacketSize = Vectorizable ? unpacket_traits<_ResPacket>::size : 1
+        Vectorizable = unpacket_traits<LhsPacket_>::vectorizable &&
+        unpacket_traits<RhsPacket_>::vectorizable &&
+        int(unpacket_traits<LhsPacket_>::size)==int(unpacket_traits<RhsPacket_>::size),
+        LhsPacketSize = Vectorizable ? unpacket_traits<LhsPacket_>::size : 1,
+        RhsPacketSize = Vectorizable ? unpacket_traits<RhsPacket_>::size : 1,
+        ResPacketSize = Vectorizable ? unpacket_traits<ResPacket_>::size : 1
   };
 
-  typedef typename conditional<Vectorizable,_LhsPacket,LhsScalar>::type LhsPacket;
-  typedef typename conditional<Vectorizable,_RhsPacket,RhsScalar>::type RhsPacket;
-  typedef typename conditional<Vectorizable,_ResPacket,ResScalar>::type ResPacket;
+  typedef std::conditional_t<Vectorizable,LhsPacket_,LhsScalar> LhsPacket;
+  typedef std::conditional_t<Vectorizable,RhsPacket_,RhsScalar> RhsPacket;
+  typedef std::conditional_t<Vectorizable,ResPacket_,ResScalar> ResPacket;
 };
 
 
 /* Optimized col-major matrix * vector product:
  * This algorithm processes the matrix per vertical panels,
- * which are then processed horizontaly per chunck of 8*PacketSize x 1 vertical segments.
+ * which are then processed horizontally per chunck of 8*PacketSize x 1 vertical segments.
  *
  * Mixing type logic: C += alpha * A * B
  *  |  A  |  B  |alpha| comments
diff --git a/libs/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h b/libs/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h
index 6e36c2b..f77e2e4 100644
--- a/libs/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h
+++ b/libs/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h
@@ -33,6 +33,8 @@
 #ifndef EIGEN_GENERAL_MATRIX_VECTOR_BLAS_H
 #define EIGEN_GENERAL_MATRIX_VECTOR_BLAS_H
 
+#include "../InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
diff --git a/libs/eigen/Eigen/src/Core/products/Parallelizer.h b/libs/eigen/Eigen/src/Core/products/Parallelizer.h
index 8f91879..da4affb 100644
--- a/libs/eigen/Eigen/src/Core/products/Parallelizer.h
+++ b/libs/eigen/Eigen/src/Core/products/Parallelizer.h
@@ -10,9 +10,7 @@
 #ifndef EIGEN_PARALLELIZER_H
 #define EIGEN_PARALLELIZER_H
 
-#if EIGEN_HAS_CXX11_ATOMIC
-#include <atomic>
-#endif
+#include "../InternalHeaderCheck.h"
 
 namespace Eigen {
 
@@ -78,18 +76,13 @@ namespace internal {
 
 template<typename Index> struct GemmParallelInfo
 {
-  GemmParallelInfo() : sync(-1), users(0), lhs_start(0), lhs_length(0) {}
 
-  // volatile is not enough on all architectures (see bug 1572)
-  // to guarantee that when thread A says to thread B that it is
-  // done with packing a block, then all writes have been really
-  // carried out... C++11 memory model+atomic guarantees this.
-#if EIGEN_HAS_CXX11_ATOMIC
+#ifdef EIGEN_HAS_OPENMP
+  GemmParallelInfo() : sync(-1), users(0), lhs_start(0), lhs_length(0) {}
   std::atomic<Index> sync;
   std::atomic<int> users;
 #else
-  Index volatile sync;
-  int volatile users;
+  GemmParallelInfo() : lhs_start(0), lhs_length(0) {}
 #endif
 
   Index lhs_start;
@@ -104,7 +97,7 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, Index depth,
   // Without C++11, we have to disable GEMM's parallelization on
   // non x86 architectures because there volatile is not enough for our purpose.
   // See bug 1572.
-#if (! defined(EIGEN_HAS_OPENMP)) || defined(EIGEN_USE_BLAS) || ((!EIGEN_HAS_CXX11_ATOMIC) && !(EIGEN_ARCH_i386_OR_x86_64))
+#if (! defined(EIGEN_HAS_OPENMP)) || defined(EIGEN_USE_BLAS)
   // FIXME the transpose variable is only needed to properly split
   // the matrix product when multithreading is enabled. This is a temporary
   // fix to support row-major destination matrices. This whole
diff --git a/libs/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h b/libs/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h
index 33ecf10..c7bb445 100644
--- a/libs/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h
+++ b/libs/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_H
 #define EIGEN_SELFADJOINT_MATRIX_MATRIX_H
 
+#include "../InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
@@ -312,10 +314,10 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,Co
     const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
   {
     product_selfadjoint_matrix<Scalar, Index,
-      EIGEN_LOGICAL_XOR(RhsSelfAdjoint,RhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
-      RhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsSelfAdjoint,ConjugateRhs),
-      EIGEN_LOGICAL_XOR(LhsSelfAdjoint,LhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
-      LhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsSelfAdjoint,ConjugateLhs),
+      logical_xor(RhsSelfAdjoint,RhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
+      RhsSelfAdjoint, NumTraits<Scalar>::IsComplex && logical_xor(RhsSelfAdjoint, ConjugateRhs),
+      logical_xor(LhsSelfAdjoint,LhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
+      LhsSelfAdjoint, NumTraits<Scalar>::IsComplex && logical_xor(LhsSelfAdjoint, ConjugateLhs),
       ColMajor,ResInnerStride>
       ::run(cols, rows,  rhs, rhsStride,  lhs, lhsStride,  res, resIncr, resStride,  alpha, blocking);
   }
@@ -509,8 +511,8 @@ struct selfadjoint_product_impl<Lhs,LhsMode,false,Rhs,RhsMode,false>
   {
     eigen_assert(dst.rows()==a_lhs.rows() && dst.cols()==a_rhs.cols());
 
-    typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs);
-    typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs);
+    add_const_on_value_type_t<ActualLhsType> lhs = LhsBlasTraits::extract(a_lhs);
+    add_const_on_value_type_t<ActualRhsType> rhs = RhsBlasTraits::extract(a_rhs);
 
     Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs)
                                * RhsBlasTraits::extractScalarFactor(a_rhs);
@@ -521,10 +523,10 @@ struct selfadjoint_product_impl<Lhs,LhsMode,false,Rhs,RhsMode,false>
     BlockingType blocking(lhs.rows(), rhs.cols(), lhs.cols(), 1, false);
 
     internal::product_selfadjoint_matrix<Scalar, Index,
-      EIGEN_LOGICAL_XOR(LhsIsUpper,internal::traits<Lhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, LhsIsSelfAdjoint,
-      NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsIsUpper,bool(LhsBlasTraits::NeedToConjugate)),
-      EIGEN_LOGICAL_XOR(RhsIsUpper,internal::traits<Rhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, RhsIsSelfAdjoint,
-      NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsIsUpper,bool(RhsBlasTraits::NeedToConjugate)),
+      internal::logical_xor(LhsIsUpper, internal::traits<Lhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, LhsIsSelfAdjoint,
+      NumTraits<Scalar>::IsComplex && internal::logical_xor(LhsIsUpper, bool(LhsBlasTraits::NeedToConjugate)),
+      internal::logical_xor(RhsIsUpper, internal::traits<Rhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, RhsIsSelfAdjoint,
+      NumTraits<Scalar>::IsComplex && internal::logical_xor(RhsIsUpper, bool(RhsBlasTraits::NeedToConjugate)),
       internal::traits<Dest>::Flags&RowMajorBit  ? RowMajor : ColMajor,
       Dest::InnerStrideAtCompileTime>
       ::run(
diff --git a/libs/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h b/libs/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h
index 61396db..0e371da 100644
--- a/libs/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h
+++ b/libs/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h
@@ -33,6 +33,8 @@
 #ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_BLAS_H
 #define EIGEN_SELFADJOINT_MATRIX_MATRIX_BLAS_H
 
+#include "../InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
diff --git a/libs/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h b/libs/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h
index d38fd72..a62b6b5 100644
--- a/libs/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h
+++ b/libs/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_SELFADJOINT_MATRIX_VECTOR_H
 #define EIGEN_SELFADJOINT_MATRIX_VECTOR_H
 
+#include "../InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
@@ -55,12 +57,12 @@ void selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateL
     FirstTriangular = IsRowMajor == IsLower
   };
 
-  conj_helper<Scalar,Scalar,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs,  IsRowMajor), ConjugateRhs> cj0;
-  conj_helper<Scalar,Scalar,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, !IsRowMajor), ConjugateRhs> cj1;
+  conj_helper<Scalar,Scalar,NumTraits<Scalar>::IsComplex && logical_xor(ConjugateLhs,  IsRowMajor), ConjugateRhs> cj0;
+  conj_helper<Scalar,Scalar,NumTraits<Scalar>::IsComplex && logical_xor(ConjugateLhs, !IsRowMajor), ConjugateRhs> cj1;
   conj_helper<RealScalar,Scalar,false, ConjugateRhs> cjd;
 
-  conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs,  IsRowMajor), ConjugateRhs> pcj0;
-  conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, !IsRowMajor), ConjugateRhs> pcj1;
+  conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex && logical_xor(ConjugateLhs,  IsRowMajor), ConjugateRhs> pcj0;
+  conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex && logical_xor(ConjugateLhs, !IsRowMajor), ConjugateRhs> pcj1;
 
   Scalar cjAlpha = ConjugateRhs ? numext::conj(alpha) : alpha;
 
@@ -167,11 +169,11 @@ struct selfadjoint_product_impl<Lhs,LhsMode,false,Rhs,0,true>
   
   typedef internal::blas_traits<Lhs> LhsBlasTraits;
   typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
-  typedef typename internal::remove_all<ActualLhsType>::type ActualLhsTypeCleaned;
+  typedef internal::remove_all_t<ActualLhsType> ActualLhsTypeCleaned;
   
   typedef internal::blas_traits<Rhs> RhsBlasTraits;
   typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
-  typedef typename internal::remove_all<ActualRhsType>::type ActualRhsTypeCleaned;
+  typedef internal::remove_all_t<ActualRhsType> ActualRhsTypeCleaned;
 
   enum { LhsUpLo = LhsMode&(Upper|Lower) };
 
@@ -181,12 +183,12 @@ struct selfadjoint_product_impl<Lhs,LhsMode,false,Rhs,0,true>
   {
     typedef typename Dest::Scalar ResScalar;
     typedef typename Rhs::Scalar RhsScalar;
-    typedef Map<Matrix<ResScalar,Dynamic,1>, EIGEN_PLAIN_ENUM_MIN(AlignedMax,internal::packet_traits<ResScalar>::size)> MappedDest;
+    typedef Map<Matrix<ResScalar,Dynamic,1>, plain_enum_min(AlignedMax,internal::packet_traits<ResScalar>::size)> MappedDest;
     
     eigen_assert(dest.rows()==a_lhs.rows() && dest.cols()==a_rhs.cols());
 
-    typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs);
-    typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs);
+    add_const_on_value_type_t<ActualLhsType> lhs = LhsBlasTraits::extract(a_lhs);
+    add_const_on_value_type_t<ActualRhsType> rhs = RhsBlasTraits::extract(a_rhs);
 
     Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs)
                                * RhsBlasTraits::extractScalarFactor(a_rhs);
diff --git a/libs/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h b/libs/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h
index 1238345..99a8ccd 100644
--- a/libs/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h
+++ b/libs/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h
@@ -33,6 +33,8 @@
 #ifndef EIGEN_SELFADJOINT_MATRIX_VECTOR_BLAS_H
 #define EIGEN_SELFADJOINT_MATRIX_VECTOR_BLAS_H
 
+#include "../InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
diff --git a/libs/eigen/Eigen/src/Core/products/SelfadjointProduct.h b/libs/eigen/Eigen/src/Core/products/SelfadjointProduct.h
index a21be80..4cbc1f7 100644
--- a/libs/eigen/Eigen/src/Core/products/SelfadjointProduct.h
+++ b/libs/eigen/Eigen/src/Core/products/SelfadjointProduct.h
@@ -16,6 +16,8 @@
 * It corresponds to the level 3 SYRK and level 2 SYR Blas routines.
 **********************************************************************/
 
+#include "../InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 
@@ -26,7 +28,7 @@ struct selfadjoint_rank1_update<Scalar,Index,ColMajor,UpLo,ConjLhs,ConjRhs>
   {
     internal::conj_if<ConjRhs> cj;
     typedef Map<const Matrix<Scalar,Dynamic,1> > OtherMap;
-    typedef typename internal::conditional<ConjLhs,typename OtherMap::ConjugateReturnType,const OtherMap&>::type ConjLhsType;
+    typedef std::conditional_t<ConjLhs,typename OtherMap::ConjugateReturnType,const OtherMap&> ConjLhsType;
     for (Index i=0; i<size; ++i)
     {
       Map<Matrix<Scalar,Dynamic,1> >(mat+stride*i+(UpLo==Lower ? i : 0), (UpLo==Lower ? size-i : (i+1)))
@@ -55,14 +57,14 @@ struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,true>
     typedef typename MatrixType::Scalar Scalar;
     typedef internal::blas_traits<OtherType> OtherBlasTraits;
     typedef typename OtherBlasTraits::DirectLinearAccessType ActualOtherType;
-    typedef typename internal::remove_all<ActualOtherType>::type _ActualOtherType;
-    typename internal::add_const_on_value_type<ActualOtherType>::type actualOther = OtherBlasTraits::extract(other.derived());
+    typedef internal::remove_all_t<ActualOtherType> ActualOtherType_;
+    internal::add_const_on_value_type_t<ActualOtherType> actualOther = OtherBlasTraits::extract(other.derived());
 
     Scalar actualAlpha = alpha * OtherBlasTraits::extractScalarFactor(other.derived());
 
     enum {
       StorageOrder = (internal::traits<MatrixType>::Flags&RowMajorBit) ? RowMajor : ColMajor,
-      UseOtherDirectly = _ActualOtherType::InnerStrideAtCompileTime==1
+      UseOtherDirectly = ActualOtherType_::InnerStrideAtCompileTime==1
     };
     internal::gemv_static_vector_if<Scalar,OtherType::SizeAtCompileTime,OtherType::MaxSizeAtCompileTime,!UseOtherDirectly> static_other;
 
@@ -70,7 +72,7 @@ struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,true>
       (UseOtherDirectly ? const_cast<Scalar*>(actualOther.data()) : static_other.data()));
       
     if(!UseOtherDirectly)
-      Map<typename _ActualOtherType::PlainObject>(actualOtherPtr, actualOther.size()) = actualOther;
+      Map<typename ActualOtherType_::PlainObject>(actualOtherPtr, actualOther.size()) = actualOther;
     
     selfadjoint_rank1_update<Scalar,Index,StorageOrder,UpLo,
                               OtherBlasTraits::NeedToConjugate  && NumTraits<Scalar>::IsComplex,
@@ -87,21 +89,21 @@ struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,false>
     typedef typename MatrixType::Scalar Scalar;
     typedef internal::blas_traits<OtherType> OtherBlasTraits;
     typedef typename OtherBlasTraits::DirectLinearAccessType ActualOtherType;
-    typedef typename internal::remove_all<ActualOtherType>::type _ActualOtherType;
-    typename internal::add_const_on_value_type<ActualOtherType>::type actualOther = OtherBlasTraits::extract(other.derived());
+    typedef internal::remove_all_t<ActualOtherType> ActualOtherType_;
+    internal::add_const_on_value_type_t<ActualOtherType> actualOther = OtherBlasTraits::extract(other.derived());
 
     Scalar actualAlpha = alpha * OtherBlasTraits::extractScalarFactor(other.derived());
 
     enum {
       IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0,
-      OtherIsRowMajor = _ActualOtherType::Flags&RowMajorBit ? 1 : 0
+      OtherIsRowMajor = ActualOtherType_::Flags&RowMajorBit ? 1 : 0
     };
 
     Index size = mat.cols();
     Index depth = actualOther.cols();
 
     typedef internal::gemm_blocking_space<IsRowMajor ? RowMajor : ColMajor,Scalar,Scalar,
-              MatrixType::MaxColsAtCompileTime, MatrixType::MaxColsAtCompileTime, _ActualOtherType::MaxColsAtCompileTime> BlockingType;
+              MatrixType::MaxColsAtCompileTime, MatrixType::MaxColsAtCompileTime, ActualOtherType_::MaxColsAtCompileTime> BlockingType;
 
     BlockingType blocking(size, size, depth, 1, false);
 
diff --git a/libs/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h b/libs/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h
index f752a0b..fb199ad 100644
--- a/libs/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h
+++ b/libs/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_SELFADJOINTRANK2UPTADE_H
 #define EIGEN_SELFADJOINTRANK2UPTADE_H
 
+#include "../InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
@@ -50,9 +52,8 @@ struct selfadjoint_rank2_update_selector<Scalar,Index,UType,VType,Upper>
   }
 };
 
-template<bool Cond, typename T> struct conj_expr_if
-  : conditional<!Cond, const T&,
-      CwiseUnaryOp<scalar_conjugate_op<typename traits<T>::Scalar>,T> > {};
+template<bool Cond, typename T>
+using conj_expr_if = std::conditional<!Cond, const T&, CwiseUnaryOp<scalar_conjugate_op<typename traits<T>::Scalar>,T>>;
 
 } // end namespace internal
 
@@ -63,13 +64,13 @@ EIGEN_DEVICE_FUNC SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,U
 {
   typedef internal::blas_traits<DerivedU> UBlasTraits;
   typedef typename UBlasTraits::DirectLinearAccessType ActualUType;
-  typedef typename internal::remove_all<ActualUType>::type _ActualUType;
-  typename internal::add_const_on_value_type<ActualUType>::type actualU = UBlasTraits::extract(u.derived());
+  typedef internal::remove_all_t<ActualUType> ActualUType_;
+  internal::add_const_on_value_type_t<ActualUType> actualU = UBlasTraits::extract(u.derived());
 
   typedef internal::blas_traits<DerivedV> VBlasTraits;
   typedef typename VBlasTraits::DirectLinearAccessType ActualVType;
-  typedef typename internal::remove_all<ActualVType>::type _ActualVType;
-  typename internal::add_const_on_value_type<ActualVType>::type actualV = VBlasTraits::extract(v.derived());
+  typedef internal::remove_all_t<ActualVType> ActualVType_;
+  internal::add_const_on_value_type_t<ActualVType> actualV = VBlasTraits::extract(v.derived());
 
   // If MatrixType is row major, then we use the routine for lower triangular in the upper triangular case and
   // vice versa, and take the complex conjugate of all coefficients and vector entries.
@@ -80,8 +81,8 @@ EIGEN_DEVICE_FUNC SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,U
   if (IsRowMajor)
     actualAlpha = numext::conj(actualAlpha);
 
-  typedef typename internal::remove_all<typename internal::conj_expr_if<int(IsRowMajor) ^ int(UBlasTraits::NeedToConjugate), _ActualUType>::type>::type UType;
-  typedef typename internal::remove_all<typename internal::conj_expr_if<int(IsRowMajor) ^ int(VBlasTraits::NeedToConjugate), _ActualVType>::type>::type VType;
+  typedef internal::remove_all_t<typename internal::conj_expr_if<int(IsRowMajor) ^ int(UBlasTraits::NeedToConjugate), ActualUType_>::type> UType;
+  typedef internal::remove_all_t<typename internal::conj_expr_if<int(IsRowMajor) ^ int(VBlasTraits::NeedToConjugate), ActualVType_>::type> VType;
   internal::selfadjoint_rank2_update_selector<Scalar, Index, UType, VType,
     (IsRowMajor ? int(UpLo==Upper ? Lower : Upper) : UpLo)>
     ::run(_expression().const_cast_derived().data(),_expression().outerStride(),UType(actualU),VType(actualV),actualAlpha);
diff --git a/libs/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h b/libs/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h
index f0c6050..770107a 100644
--- a/libs/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h
+++ b/libs/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_TRIANGULAR_MATRIX_MATRIX_H
 #define EIGEN_TRIANGULAR_MATRIX_MATRIX_H
 
+#include "../InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
@@ -18,10 +20,10 @@ namespace internal {
 // struct gemm_pack_lhs_triangular
 // {
 //   Matrix<Scalar,mr,mr,
-//   void operator()(Scalar* blockA, const EIGEN_RESTRICT Scalar* _lhs, int lhsStride, int depth, int rows)
+//   void operator()(Scalar* blockA, const EIGEN_RESTRICT Scalar* lhs_, int lhsStride, int depth, int rows)
 //   {
 //     conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
-//     const_blas_data_mapper<Scalar, StorageOrder> lhs(_lhs,lhsStride);
+//     const_blas_data_mapper<Scalar, StorageOrder> lhs(lhs_,lhsStride);
 //     int count = 0;
 //     const int peeled_mc = (rows/mr)*mr;
 //     for(int i=0; i<peeled_mc; i+=mr)
@@ -89,15 +91,15 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
   
   typedef gebp_traits<Scalar,Scalar> Traits;
   enum {
-    SmallPanelWidth   = 2 * EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),
+    SmallPanelWidth   = 2 * plain_enum_max(Traits::mr, Traits::nr),
     IsLower = (Mode&Lower) == Lower,
     SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1
   };
 
   static EIGEN_DONT_INLINE void run(
     Index _rows, Index _cols, Index _depth,
-    const Scalar* _lhs, Index lhsStride,
-    const Scalar* _rhs, Index rhsStride,
+    const Scalar* lhs_, Index lhsStride,
+    const Scalar* rhs_, Index rhsStride,
     Scalar* res,        Index resIncr, Index resStride,
     const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
 };
@@ -110,9 +112,9 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true,
                                                         LhsStorageOrder,ConjugateLhs,
                                                         RhsStorageOrder,ConjugateRhs,ColMajor,ResInnerStride,Version>::run(
     Index _rows, Index _cols, Index _depth,
-    const Scalar* _lhs, Index lhsStride,
-    const Scalar* _rhs, Index rhsStride,
-    Scalar* _res,       Index resIncr, Index resStride,
+    const Scalar* lhs_, Index lhsStride,
+    const Scalar* rhs_, Index rhsStride,
+    Scalar* res_,       Index resIncr, Index resStride,
     const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
   {
     // strip zeros
@@ -124,9 +126,9 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true,
     typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper;
     typedef const_blas_data_mapper<Scalar, Index, RhsStorageOrder> RhsMapper;
     typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper;
-    LhsMapper lhs(_lhs,lhsStride);
-    RhsMapper rhs(_rhs,rhsStride);
-    ResMapper res(_res, resStride, resIncr);
+    LhsMapper lhs(lhs_,lhsStride);
+    RhsMapper rhs(rhs_,rhsStride);
+    ResMapper res(res_, resStride, resIncr);
 
     Index kc = blocking.kc();                   // cache block size along the K direction
     Index mc = (std::min)(rows,blocking.mc());  // cache block size along the M direction
@@ -247,15 +249,15 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
 {
   typedef gebp_traits<Scalar,Scalar> Traits;
   enum {
-    SmallPanelWidth   = EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),
+    SmallPanelWidth   = plain_enum_max(Traits::mr, Traits::nr),
     IsLower = (Mode&Lower) == Lower,
     SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1
   };
 
   static EIGEN_DONT_INLINE void run(
     Index _rows, Index _cols, Index _depth,
-    const Scalar* _lhs, Index lhsStride,
-    const Scalar* _rhs, Index rhsStride,
+    const Scalar* lhs_, Index lhsStride,
+    const Scalar* rhs_, Index rhsStride,
     Scalar* res,        Index resIncr, Index resStride,
     const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
 };
@@ -268,9 +270,9 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false,
                                                         LhsStorageOrder,ConjugateLhs,
                                                         RhsStorageOrder,ConjugateRhs,ColMajor,ResInnerStride,Version>::run(
     Index _rows, Index _cols, Index _depth,
-    const Scalar* _lhs, Index lhsStride,
-    const Scalar* _rhs, Index rhsStride,
-    Scalar* _res,       Index resIncr, Index resStride,
+    const Scalar* lhs_, Index lhsStride,
+    const Scalar* rhs_, Index rhsStride,
+    Scalar* res_,       Index resIncr, Index resStride,
     const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
   {
     const Index PacketBytes = packet_traits<Scalar>::size*sizeof(Scalar);
@@ -283,9 +285,9 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false,
     typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper;
     typedef const_blas_data_mapper<Scalar, Index, RhsStorageOrder> RhsMapper;
     typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper;
-    LhsMapper lhs(_lhs,lhsStride);
-    RhsMapper rhs(_rhs,rhsStride);
-    ResMapper res(_res, resStride, resIncr);
+    LhsMapper lhs(lhs_,lhsStride);
+    RhsMapper rhs(rhs_,rhsStride);
+    ResMapper res(res_, resStride, resIncr);
 
     Index kc = blocking.kc();                   // cache block size along the K direction
     Index mc = (std::min)(rows,blocking.mc());  // cache block size along the M direction
@@ -412,13 +414,13 @@ struct triangular_product_impl<Mode,LhsIsTriangular,Lhs,false,Rhs,false>
     
     typedef internal::blas_traits<Lhs> LhsBlasTraits;
     typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
-    typedef typename internal::remove_all<ActualLhsType>::type ActualLhsTypeCleaned;
+    typedef internal::remove_all_t<ActualLhsType> ActualLhsTypeCleaned;
     typedef internal::blas_traits<Rhs> RhsBlasTraits;
     typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
-    typedef typename internal::remove_all<ActualRhsType>::type ActualRhsTypeCleaned;
-    
-    typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs);
-    typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs);
+    typedef internal::remove_all_t<ActualRhsType> ActualRhsTypeCleaned;
+
+    internal::add_const_on_value_type_t<ActualLhsType> lhs = LhsBlasTraits::extract(a_lhs);
+    internal::add_const_on_value_type_t<ActualRhsType> rhs = RhsBlasTraits::extract(a_rhs);
 
     LhsScalar lhs_alpha = LhsBlasTraits::extractScalarFactor(a_lhs);
     RhsScalar rhs_alpha = RhsBlasTraits::extractScalarFactor(a_rhs);
@@ -451,12 +453,12 @@ struct triangular_product_impl<Mode,LhsIsTriangular,Lhs,false,Rhs,false>
     // Apply correction if the diagonal is unit and a scalar factor was nested:
     if ((Mode&UnitDiag)==UnitDiag)
     {
-      if (LhsIsTriangular && lhs_alpha!=LhsScalar(1))
+      if (LhsIsTriangular && !numext::is_exactly_one(lhs_alpha))
       {
         Index diagSize = (std::min)(lhs.rows(),lhs.cols());
         dst.topRows(diagSize) -= ((lhs_alpha-LhsScalar(1))*a_rhs).topRows(diagSize);
       }
-      else if ((!LhsIsTriangular) && rhs_alpha!=RhsScalar(1))
+      else if ((!LhsIsTriangular) && !numext::is_exactly_one(rhs_alpha))
       {
         Index diagSize = (std::min)(rhs.rows(),rhs.cols());
         dst.leftCols(diagSize) -= (rhs_alpha-RhsScalar(1))*a_lhs.leftCols(diagSize);
diff --git a/libs/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h b/libs/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h
index a98d12e..1eb57d3 100644
--- a/libs/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h
+++ b/libs/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h
@@ -33,6 +33,8 @@
 #ifndef EIGEN_TRIANGULAR_MATRIX_MATRIX_BLAS_H
 #define EIGEN_TRIANGULAR_MATRIX_MATRIX_BLAS_H
 
+#include "../InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
diff --git a/libs/eigen/Eigen/src/Core/products/TriangularMatrixVector.h b/libs/eigen/Eigen/src/Core/products/TriangularMatrixVector.h
index 76bfa15..df15e81 100644
--- a/libs/eigen/Eigen/src/Core/products/TriangularMatrixVector.h
+++ b/libs/eigen/Eigen/src/Core/products/TriangularMatrixVector.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_TRIANGULARMATRIXVECTOR_H
 #define EIGEN_TRIANGULARMATRIXVECTOR_H
 
+#include "../InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
@@ -209,17 +211,16 @@ template<int Mode> struct trmv_selector<Mode,ColMajor>
     typedef typename Lhs::Scalar      LhsScalar;
     typedef typename Rhs::Scalar      RhsScalar;
     typedef typename Dest::Scalar     ResScalar;
-    typedef typename Dest::RealScalar RealScalar;
     
     typedef internal::blas_traits<Lhs> LhsBlasTraits;
     typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
     typedef internal::blas_traits<Rhs> RhsBlasTraits;
     typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
     
-    typedef Map<Matrix<ResScalar,Dynamic,1>, EIGEN_PLAIN_ENUM_MIN(AlignedMax,internal::packet_traits<ResScalar>::size)> MappedDest;
+    typedef Map<Matrix<ResScalar,Dynamic,1>, plain_enum_min(AlignedMax,internal::packet_traits<ResScalar>::size)> MappedDest;
 
-    typename internal::add_const_on_value_type<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs);
-    typename internal::add_const_on_value_type<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs);
+    add_const_on_value_type_t<ActualLhsType> actualLhs = LhsBlasTraits::extract(lhs);
+    add_const_on_value_type_t<ActualRhsType> actualRhs = RhsBlasTraits::extract(rhs);
 
     LhsScalar lhs_alpha = LhsBlasTraits::extractScalarFactor(lhs);
     RhsScalar rhs_alpha = RhsBlasTraits::extractScalarFactor(rhs);
@@ -235,7 +236,7 @@ template<int Mode> struct trmv_selector<Mode,ColMajor>
 
     gemv_static_vector_if<ResScalar,Dest::SizeAtCompileTime,Dest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest;
 
-    bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0));
+    bool alphaIsCompatible = (!ComplexByReal) || numext::is_exactly_zero(numext::imag(actualAlpha));
     bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible;
 
     RhsScalar compatibleAlpha = get_factor<ResScalar,RhsScalar>::run(actualAlpha);
@@ -276,7 +277,7 @@ template<int Mode> struct trmv_selector<Mode,ColMajor>
         dest = MappedDest(actualDestPtr, dest.size());
     }
 
-    if ( ((Mode&UnitDiag)==UnitDiag) && (lhs_alpha!=LhsScalar(1)) )
+    if ( ((Mode&UnitDiag)==UnitDiag) && !numext::is_exactly_one(lhs_alpha) )
     {
       Index diagSize = (std::min)(lhs.rows(),lhs.cols());
       dest.head(diagSize) -= (lhs_alpha-LhsScalar(1))*rhs.head(diagSize);
@@ -297,10 +298,10 @@ template<int Mode> struct trmv_selector<Mode,RowMajor>
     typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
     typedef internal::blas_traits<Rhs> RhsBlasTraits;
     typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
-    typedef typename internal::remove_all<ActualRhsType>::type ActualRhsTypeCleaned;
+    typedef internal::remove_all_t<ActualRhsType> ActualRhsTypeCleaned;
 
-    typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs);
-    typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs);
+    std::add_const_t<ActualLhsType> actualLhs = LhsBlasTraits::extract(lhs);
+    std::add_const_t<ActualRhsType> actualRhs = RhsBlasTraits::extract(rhs);
 
     LhsScalar lhs_alpha = LhsBlasTraits::extractScalarFactor(lhs);
     RhsScalar rhs_alpha = RhsBlasTraits::extractScalarFactor(rhs);
@@ -335,7 +336,7 @@ template<int Mode> struct trmv_selector<Mode,RowMajor>
             dest.data(),dest.innerStride(),
             actualAlpha);
 
-    if ( ((Mode&UnitDiag)==UnitDiag) && (lhs_alpha!=LhsScalar(1)) )
+    if ( ((Mode&UnitDiag)==UnitDiag) && !numext::is_exactly_one(lhs_alpha) )
     {
       Index diagSize = (std::min)(lhs.rows(),lhs.cols());
       dest.head(diagSize) -= (lhs_alpha-LhsScalar(1))*rhs.head(diagSize);
diff --git a/libs/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h b/libs/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h
index 3d47a2b..7a4d59e 100644
--- a/libs/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h
+++ b/libs/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h
@@ -33,6 +33,8 @@
 #ifndef EIGEN_TRIANGULAR_MATRIX_VECTOR_BLAS_H
 #define EIGEN_TRIANGULAR_MATRIX_VECTOR_BLAS_H
 
+#include "../InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
diff --git a/libs/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h b/libs/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h
index 6d879ba..b148d9c 100644
--- a/libs/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h
+++ b/libs/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h
@@ -2,6 +2,7 @@
 // for linear algebra.
 //
 // Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Modifications Copyright (C) 2022 Intel Corporation
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
@@ -10,10 +11,123 @@
 #ifndef EIGEN_TRIANGULAR_SOLVER_MATRIX_H
 #define EIGEN_TRIANGULAR_SOLVER_MATRIX_H
 
-namespace Eigen { 
+#include "../InternalHeaderCheck.h"
+
+namespace Eigen {
 
 namespace internal {
 
+template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder,int OtherInnerStride>
+struct trsmKernelL {
+  // Generic Implementation of triangular solve for triangular matrix on left and multiple rhs.
+  // Handles non-packed matrices.
+  static void kernel(
+    Index size, Index otherSize,
+    const Scalar* _tri, Index triStride,
+    Scalar* _other, Index otherIncr, Index otherStride);
+};
+
+template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder,int OtherInnerStride>
+struct trsmKernelR {
+  // Generic Implementation of triangular solve for triangular matrix on right and multiple lhs.
+  // Handles non-packed matrices.
+  static void kernel(
+    Index size, Index otherSize,
+    const Scalar* _tri, Index triStride,
+    Scalar* _other, Index otherIncr, Index otherStride);
+};
+
+template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder,int OtherInnerStride>
+EIGEN_STRONG_INLINE void trsmKernelL<Scalar, Index, Mode, Conjugate, TriStorageOrder, OtherInnerStride>::kernel(
+    Index size, Index otherSize,
+    const Scalar* _tri, Index triStride,
+    Scalar* _other, Index otherIncr, Index otherStride)
+  {
+    typedef const_blas_data_mapper<Scalar, Index, TriStorageOrder> TriMapper;
+    typedef blas_data_mapper<Scalar, Index, ColMajor, Unaligned, OtherInnerStride> OtherMapper;
+    TriMapper tri(_tri, triStride);
+    OtherMapper other(_other, otherStride, otherIncr);
+
+    enum { IsLower = (Mode&Lower) == Lower };
+    conj_if<Conjugate> conj;
+
+    // tr solve
+    for (Index k=0; k<size; ++k)
+    {
+      // TODO write a small kernel handling this (can be shared with trsv)
+      Index i  = IsLower ? k : -k-1;
+      Index rs = size - k - 1; // remaining size
+      Index s  = TriStorageOrder==RowMajor ? (IsLower ? 0 : i+1)
+        :  IsLower ? i+1 : i-rs;
+
+      Scalar a = (Mode & UnitDiag) ? Scalar(1) : Scalar(1)/conj(tri(i,i));
+      for (Index j=0; j<otherSize; ++j)
+      {
+        if (TriStorageOrder==RowMajor)
+        {
+          Scalar b(0);
+          const Scalar* l = &tri(i,s);
+          typename OtherMapper::LinearMapper r = other.getLinearMapper(s,j);
+          for (Index i3=0; i3<k; ++i3)
+            b += conj(l[i3]) * r(i3);
+
+          other(i,j) = (other(i,j) - b)*a;
+        }
+        else
+        {
+          Scalar& otherij = other(i,j);
+          otherij *= a;
+          Scalar b = otherij;
+          typename OtherMapper::LinearMapper r = other.getLinearMapper(s,j);
+          typename TriMapper::LinearMapper l = tri.getLinearMapper(s,i);
+          for (Index i3=0;i3<rs;++i3)
+            r(i3) -= b * conj(l(i3));
+        }
+      }
+    }
+  }
+
+
+template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride>
+EIGEN_STRONG_INLINE void trsmKernelR<Scalar, Index, Mode, Conjugate, TriStorageOrder, OtherInnerStride>::kernel(
+    Index size, Index otherSize,
+    const Scalar* _tri, Index triStride,
+    Scalar* _other, Index otherIncr, Index otherStride)
+{
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  typedef blas_data_mapper<Scalar, Index, ColMajor, Unaligned, OtherInnerStride> LhsMapper;
+  typedef const_blas_data_mapper<Scalar, Index, TriStorageOrder> RhsMapper;
+  LhsMapper lhs(_other, otherStride, otherIncr);
+  RhsMapper rhs(_tri, triStride);
+
+  enum {
+    RhsStorageOrder   = TriStorageOrder,
+    IsLower = (Mode&Lower) == Lower
+  };
+  conj_if<Conjugate> conj;
+
+  for (Index k=0; k<size; ++k)
+  {
+    Index j = IsLower ? size-k-1 : k;
+
+    typename LhsMapper::LinearMapper r = lhs.getLinearMapper(0,j);
+    for (Index k3=0; k3<k; ++k3)
+    {
+      Scalar b = conj(rhs(IsLower ? j+1+k3 : k3,j));
+      typename LhsMapper::LinearMapper a = lhs.getLinearMapper(0,IsLower ? j+1+k3 : k3);
+      for (Index i=0; i<otherSize; ++i)
+                    r(i) -= a(i) * b;
+    }
+    if((Mode & UnitDiag)==0)
+    {
+      Scalar inv_rjj = RealScalar(1)/conj(rhs(j,j));
+      for (Index i=0; i<otherSize; ++i)
+        r(i) *= inv_rjj;
+    }
+  }
+}
+
+
 // if the rhs is row major, let's transpose the product
 template <typename Scalar, typename Index, int Side, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride>
 struct triangular_solve_matrix<Scalar,Index,Side,Mode,Conjugate,TriStorageOrder,RowMajor,OtherInnerStride>
@@ -44,6 +158,7 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO
     Scalar* _other, Index otherIncr, Index otherStride,
     level3_blocking<Scalar,Scalar>& blocking);
 };
+
 template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride>
 EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor,OtherInnerStride>::run(
     Index size, Index otherSize,
@@ -53,6 +168,25 @@ EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conju
   {
     Index cols = otherSize;
 
+    std::ptrdiff_t l1, l2, l3;
+    manage_caching_sizes(GetAction, &l1, &l2, &l3);
+
+#if defined(EIGEN_VECTORIZE_AVX512) && EIGEN_USE_AVX512_TRSM_L_KERNELS && EIGEN_ENABLE_AVX512_NOCOPY_TRSM_L_CUTOFFS
+    EIGEN_IF_CONSTEXPR( (OtherInnerStride == 1 &&
+                       (std::is_same<Scalar,float>::value ||
+                        std::is_same<Scalar,double>::value)) ) {
+      // Very rough cutoffs to determine when to call trsm w/o packing
+      // For small problem sizes trsmKernel compiled with clang is generally faster.
+      // TODO: Investigate better heuristics for cutoffs.
+      double L2Cap = 0.5; // 50% of L2 size
+      if (size < avx512_trsm_cutoff<Scalar>(l2, cols, L2Cap)) {
+        trsmKernelL<Scalar, Index, Mode, Conjugate, TriStorageOrder, 1>::kernel(
+          size, cols, _tri, triStride, _other, 1, otherStride);
+        return;
+      }
+    }
+#endif
+
     typedef const_blas_data_mapper<Scalar, Index, TriStorageOrder> TriMapper;
     typedef blas_data_mapper<Scalar, Index, ColMajor, Unaligned, OtherInnerStride> OtherMapper;
     TriMapper tri(_tri, triStride);
@@ -61,7 +195,7 @@ EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conju
     typedef gebp_traits<Scalar,Scalar> Traits;
 
     enum {
-      SmallPanelWidth   = EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),
+      SmallPanelWidth   = plain_enum_max(Traits::mr, Traits::nr),
       IsLower = (Mode&Lower) == Lower
     };
 
@@ -74,15 +208,12 @@ EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conju
     ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
     ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
 
-    conj_if<Conjugate> conj;
     gebp_kernel<Scalar, Scalar, Index, OtherMapper, Traits::mr, Traits::nr, Conjugate, false> gebp_kernel;
     gemm_pack_lhs<Scalar, Index, TriMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing, TriStorageOrder> pack_lhs;
     gemm_pack_rhs<Scalar, Index, OtherMapper, Traits::nr, ColMajor, false, true> pack_rhs;
 
     // the goal here is to subdivise the Rhs panels such that we keep some cache
     // coherence when accessing the rhs elements
-    std::ptrdiff_t l1, l2, l3;
-    manage_caching_sizes(GetAction, &l1, &l2, &l3);
     Index subcols = cols>0 ? l2/(4 * sizeof(Scalar) * std::max<Index>(otherStride,size)) : 0;
     subcols = std::max<Index>((subcols/Traits::nr)*Traits::nr, Traits::nr);
 
@@ -113,38 +244,19 @@ EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conju
         {
           Index actualPanelWidth = std::min<Index>(actual_kc-k1, SmallPanelWidth);
           // tr solve
-          for (Index k=0; k<actualPanelWidth; ++k)
           {
-            // TODO write a small kernel handling this (can be shared with trsv)
-            Index i  = IsLower ? k2+k1+k : k2-k1-k-1;
-            Index rs = actualPanelWidth - k - 1; // remaining size
-            Index s  = TriStorageOrder==RowMajor ? (IsLower ? k2+k1 : i+1)
-                                                 :  IsLower ? i+1 : i-rs;
-
-            Scalar a = (Mode & UnitDiag) ? Scalar(1) : Scalar(1)/conj(tri(i,i));
-            for (Index j=j2; j<j2+actual_cols; ++j)
-            {
-              if (TriStorageOrder==RowMajor)
-              {
-                Scalar b(0);
-                const Scalar* l = &tri(i,s);
-                typename OtherMapper::LinearMapper r = other.getLinearMapper(s,j);
-                for (Index i3=0; i3<k; ++i3)
-                  b += conj(l[i3]) * r(i3);
-
-                other(i,j) = (other(i,j) - b)*a;
-              }
-              else
-              {
-                Scalar& otherij = other(i,j);
-                otherij *= a;
-                Scalar b = otherij;
-                typename OtherMapper::LinearMapper r = other.getLinearMapper(s,j);
-                typename TriMapper::LinearMapper l = tri.getLinearMapper(s,i);
-                for (Index i3=0;i3<rs;++i3)
-                  r(i3) -= b * conj(l(i3));
-              }
+            Index i  = IsLower ? k2+k1 : k2-k1;
+#if defined(EIGEN_VECTORIZE_AVX512) && EIGEN_USE_AVX512_TRSM_L_KERNELS
+            EIGEN_IF_CONSTEXPR( (OtherInnerStride == 1 &&
+                                 (std::is_same<Scalar,float>::value ||
+                                  std::is_same<Scalar,double>::value)) ) {
+              i  = IsLower ? k2 + k1: k2 - k1 - actualPanelWidth;
             }
+#endif
+            trsmKernelL<Scalar, Index, Mode, Conjugate, TriStorageOrder, OtherInnerStride>::kernel(
+              actualPanelWidth, actual_cols,
+              _tri + i + (i)*triStride, triStride,
+              _other + i*OtherInnerStride + j2*otherStride, otherIncr, otherStride);
           }
 
           Index lengthTarget = actual_kc-k1-actualPanelWidth;
@@ -166,7 +278,7 @@ EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conju
           }
         }
       }
-      
+
       // R2 -= A21 * B => GEPP
       {
         Index start = IsLower ? k2+kc : 0;
@@ -196,6 +308,7 @@ struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorage
     Scalar* _other, Index otherIncr, Index otherStride,
     level3_blocking<Scalar,Scalar>& blocking);
 };
+
 template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride>
 EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor,OtherInnerStride>::run(
     Index size, Index otherSize,
@@ -204,7 +317,22 @@ EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conj
     level3_blocking<Scalar,Scalar>& blocking)
   {
     Index rows = otherSize;
-    typedef typename NumTraits<Scalar>::Real RealScalar;
+
+#if defined(EIGEN_VECTORIZE_AVX512) && EIGEN_USE_AVX512_TRSM_R_KERNELS && EIGEN_ENABLE_AVX512_NOCOPY_TRSM_R_CUTOFFS
+    EIGEN_IF_CONSTEXPR( (OtherInnerStride == 1 &&
+                 (std::is_same<Scalar,float>::value ||
+                  std::is_same<Scalar,double>::value)) ) {
+      // TODO: Investigate better heuristics for cutoffs.
+      std::ptrdiff_t l1, l2, l3;
+      manage_caching_sizes(GetAction, &l1, &l2, &l3);
+      double L2Cap = 0.5; // 50% of L2 size
+      if (size < avx512_trsm_cutoff<Scalar>(l2, rows, L2Cap)) {
+        trsmKernelR<Scalar, Index, Mode, Conjugate, TriStorageOrder, OtherInnerStride>::
+          kernel(size, rows, _tri, triStride, _other, 1, otherStride);
+        return;
+      }
+    }
+#endif
 
     typedef blas_data_mapper<Scalar, Index, ColMajor, Unaligned, OtherInnerStride> LhsMapper;
     typedef const_blas_data_mapper<Scalar, Index, TriStorageOrder> RhsMapper;
@@ -214,7 +342,7 @@ EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conj
     typedef gebp_traits<Scalar,Scalar> Traits;
     enum {
       RhsStorageOrder   = TriStorageOrder,
-      SmallPanelWidth   = EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),
+      SmallPanelWidth   = plain_enum_max(Traits::mr, Traits::nr),
       IsLower = (Mode&Lower) == Lower
     };
 
@@ -227,7 +355,6 @@ EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conj
     ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
     ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
 
-    conj_if<Conjugate> conj;
     gebp_kernel<Scalar, Scalar, Index, LhsMapper, Traits::mr, Traits::nr, false, Conjugate> gebp_kernel;
     gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr, RhsStorageOrder> pack_rhs;
     gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr, RhsStorageOrder,false,true> pack_rhs_panel;
@@ -294,27 +421,13 @@ EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conj
                           panelOffset, panelOffset); // offsets
             }
 
-            // unblocked triangular solve
-            for (Index k=0; k<actualPanelWidth; ++k)
             {
-              Index j = IsLower ? absolute_j2+actualPanelWidth-k-1 : absolute_j2+k;
-
-              typename LhsMapper::LinearMapper r = lhs.getLinearMapper(i2,j);
-              for (Index k3=0; k3<k; ++k3)
-              {
-                Scalar b = conj(rhs(IsLower ? j+1+k3 : absolute_j2+k3,j));
-                typename LhsMapper::LinearMapper a = lhs.getLinearMapper(i2,IsLower ? j+1+k3 : absolute_j2+k3);
-                for (Index i=0; i<actual_mc; ++i)
-                  r(i) -= a(i) * b;
-              }
-              if((Mode & UnitDiag)==0)
-              {
-                Scalar inv_rjj = RealScalar(1)/conj(rhs(j,j));
-                for (Index i=0; i<actual_mc; ++i)
-                  r(i) *= inv_rjj;
-              }
+              // unblocked triangular solve
+              trsmKernelR<Scalar, Index, Mode, Conjugate, TriStorageOrder, OtherInnerStride>::
+                kernel(actualPanelWidth, actual_mc,
+                            _tri + absolute_j2 + absolute_j2*triStride, triStride,
+                            _other + i2*OtherInnerStride + absolute_j2*otherStride, otherIncr, otherStride);
             }
-
             // pack the just computed part of lhs to A
             pack_lhs_panel(blockA, lhs.getSubMapper(i2,absolute_j2),
                            actualPanelWidth, actual_mc,
@@ -329,7 +442,6 @@ EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conj
       }
     }
   }
-
 } // end namespace internal
 
 } // end namespace Eigen
diff --git a/libs/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h b/libs/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h
index 621194c..2b63388 100644
--- a/libs/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h
+++ b/libs/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h
@@ -33,6 +33,8 @@
 #ifndef EIGEN_TRIANGULAR_SOLVER_MATRIX_BLAS_H
 #define EIGEN_TRIANGULAR_SOLVER_MATRIX_BLAS_H
 
+#include "../InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
diff --git a/libs/eigen/Eigen/src/Core/products/TriangularSolverVector.h b/libs/eigen/Eigen/src/Core/products/TriangularSolverVector.h
index 6473170..b8fbb5b 100644
--- a/libs/eigen/Eigen/src/Core/products/TriangularSolverVector.h
+++ b/libs/eigen/Eigen/src/Core/products/TriangularSolverVector.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_TRIANGULAR_SOLVER_VECTOR_H
 #define EIGEN_TRIANGULAR_SOLVER_VECTOR_H
 
+#include "../InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
@@ -41,11 +43,10 @@ struct triangular_solve_vector<LhsScalar, RhsScalar, Index, OnTheLeft, Mode, Con
     typedef const_blas_data_mapper<LhsScalar,Index,RowMajor> LhsMapper;
     typedef const_blas_data_mapper<RhsScalar,Index,ColMajor> RhsMapper;
 
-    typename internal::conditional<
-                          Conjugate,
-                          const CwiseUnaryOp<typename internal::scalar_conjugate_op<LhsScalar>,LhsMap>,
-                          const LhsMap&>
-                        ::type cjLhs(lhs);
+    std::conditional_t<
+                  Conjugate,
+                  const CwiseUnaryOp<typename internal::scalar_conjugate_op<LhsScalar>,LhsMap>,
+                  const LhsMap&> cjLhs(lhs);
     static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;
     for(Index pi=IsLower ? 0 : size;
         IsLower ? pi<size : pi>0;
@@ -77,7 +78,7 @@ struct triangular_solve_vector<LhsScalar, RhsScalar, Index, OnTheLeft, Mode, Con
         if (k>0)
           rhs[i] -= (cjLhs.row(i).segment(s,k).transpose().cwiseProduct(Map<const Matrix<RhsScalar,Dynamic,1> >(rhs+s,k))).sum();
 
-        if((!(Mode & UnitDiag)) && numext::not_equal_strict(rhs[i],RhsScalar(0)))
+        if((!(Mode & UnitDiag)) && !is_identically_zero(rhs[i]))
           rhs[i] /= cjLhs(i,i);
       }
     }
@@ -97,10 +98,10 @@ struct triangular_solve_vector<LhsScalar, RhsScalar, Index, OnTheLeft, Mode, Con
     const LhsMap lhs(_lhs,size,size,OuterStride<>(lhsStride));
     typedef const_blas_data_mapper<LhsScalar,Index,ColMajor> LhsMapper;
     typedef const_blas_data_mapper<RhsScalar,Index,ColMajor> RhsMapper;
-    typename internal::conditional<Conjugate,
-                                   const CwiseUnaryOp<typename internal::scalar_conjugate_op<LhsScalar>,LhsMap>,
-                                   const LhsMap&
-                                  >::type cjLhs(lhs);
+    std::conditional_t<Conjugate,
+                            const CwiseUnaryOp<typename internal::scalar_conjugate_op<LhsScalar>,LhsMap>,
+                            const LhsMap&
+                           > cjLhs(lhs);
     static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;
 
     for(Index pi=IsLower ? 0 : size;
@@ -114,7 +115,7 @@ struct triangular_solve_vector<LhsScalar, RhsScalar, Index, OnTheLeft, Mode, Con
       for(Index k=0; k<actualPanelWidth; ++k)
       {
         Index i = IsLower ? pi+k : pi-k-1;
-        if(numext::not_equal_strict(rhs[i],RhsScalar(0)))
+        if(!is_identically_zero(rhs[i]))
         {
           if(!(Mode & UnitDiag))
             rhs[i] /= cjLhs.coeff(i,i);
diff --git a/libs/eigen/Eigen/src/Core/util/BlasUtil.h b/libs/eigen/Eigen/src/Core/util/BlasUtil.h
old mode 100755
new mode 100644
index e16a564..3f7638e
--- a/libs/eigen/Eigen/src/Core/util/BlasUtil.h
+++ b/libs/eigen/Eigen/src/Core/util/BlasUtil.h
@@ -13,6 +13,8 @@
 // This file contains many lightweight helper classes used to
 // implement and control fast level 2 and level 3 BLAS-like routines.
 
+#include "../InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
@@ -85,7 +87,7 @@ public:
     eigen_assert(incr==1);
   }
 
-  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void prefetch(int i) const {
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void prefetch(Index i) const {
     internal::prefetch(&operator()(i));
   }
 
@@ -98,11 +100,26 @@ public:
     return ploadt<PacketType, AlignmentType>(m_data + i);
   }
 
+  template<typename PacketType>
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketType loadPacketPartial(Index i, Index n, Index offset = 0) const {
+    return ploadt_partial<PacketType, AlignmentType>(m_data + i, n, offset);
+  }
+
+  template<typename PacketType, int AlignmentT>
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketType load(Index i) const {
+    return ploadt<PacketType, AlignmentT>(m_data + i);
+  }
+
   template<typename PacketType>
   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacket(Index i, const PacketType &p) const {
     pstoret<Scalar, PacketType, AlignmentType>(m_data + i, p);
   }
 
+  template<typename PacketType>
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacketPartial(Index i, const PacketType &p, Index n, Index offset = 0) const {
+    pstoret_partial<Scalar, PacketType, AlignmentType>(m_data + i, p, n, offset);
+  }
+
 protected:
   Scalar *m_data;
 };
@@ -187,6 +204,9 @@ public:
     return VectorMapper(&operator()(i, j));
   }
 
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void prefetch(Index i, Index j) const {
+    internal::prefetch(&operator()(i, j));
+  }
 
   EIGEN_DEVICE_FUNC
   EIGEN_ALWAYS_INLINE Scalar& operator()(Index i, Index j) const {
@@ -198,11 +218,26 @@ public:
     return ploadt<PacketType, AlignmentType>(&operator()(i, j));
   }
 
+  template<typename PacketType>
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketType loadPacketPartial(Index i, Index j, Index n, Index offset = 0) const {
+    return ploadt_partial<PacketType, AlignmentType>(&operator()(i, j), n, offset);
+  }
+
   template <typename PacketT, int AlignmentT>
   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketT load(Index i, Index j) const {
     return ploadt<PacketT, AlignmentT>(&operator()(i, j));
   }
 
+  template<typename PacketType>
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacket(Index i, Index j, const PacketType &p) const {
+    pstoret<Scalar, PacketType, AlignmentType>(&operator()(i, j), p);
+  }
+
+  template<typename PacketType>
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacketPartial(Index i, Index j, const PacketType &p, Index n, Index offset = 0) const {
+    pstoret_partial<Scalar, PacketType, AlignmentType>(&operator()(i, j), p, n, offset);
+  }
+
   template<typename SubPacket>
   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void scatterPacket(Index i, Index j, const SubPacket &p) const {
     pscatter<Scalar, SubPacket>(&operator()(i, j), p, m_stride);
@@ -214,6 +249,7 @@ public:
   }
 
   EIGEN_DEVICE_FUNC const Index stride() const { return m_stride; }
+  EIGEN_DEVICE_FUNC const Index incr() const { return 1; }
   EIGEN_DEVICE_FUNC const Scalar* data() const { return m_data; }
 
   EIGEN_DEVICE_FUNC Index firstAligned(Index size) const {
@@ -255,11 +291,21 @@ public:
     return pgather<Scalar,PacketType>(m_data + i*m_incr.value(), m_incr.value());
   }
 
+  template<typename PacketType>
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketType loadPacketPartial(Index i, Index n, Index /*offset*/ = 0) const {
+    return pgather_partial<Scalar,PacketType>(m_data + i*m_incr.value(), m_incr.value(), n);
+  }
+
   template<typename PacketType>
   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacket(Index i, const PacketType &p) const {
     pscatter<Scalar, PacketType>(m_data + i*m_incr.value(), p, m_incr.value());
   }
 
+  template<typename PacketType>
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacketPartial(Index i, const PacketType &p, Index n, Index /*offset*/ = 0) const {
+    pscatter_partial<Scalar, PacketType>(m_data + i*m_incr.value(), p, m_incr.value(), n);
+  }
+
 protected:
   Scalar *m_data;
   const internal::variable_if_dynamic<Index,Incr> m_incr;
@@ -282,6 +328,10 @@ public:
     return LinearMapper(&operator()(i, j), m_incr.value());
   }
 
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void prefetch(Index i, Index j) const {
+    internal::prefetch(&operator()(i, j));
+  }
+
   EIGEN_DEVICE_FUNC
   EIGEN_ALWAYS_INLINE Scalar& operator()(Index i, Index j) const {
     return m_data[StorageOrder==RowMajor ? j*m_incr.value() + i*m_stride : i*m_incr.value() + j*m_stride];
@@ -292,11 +342,26 @@ public:
     return pgather<Scalar,PacketType>(&operator()(i, j),m_incr.value());
   }
 
+  template<typename PacketType>
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketType loadPacketPartial(Index i, Index j, Index n, Index /*offset*/ = 0) const {
+    return pgather_partial<Scalar,PacketType>(&operator()(i, j),m_incr.value(),n);
+  }
+
   template <typename PacketT, int AlignmentT>
   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketT load(Index i, Index j) const {
     return pgather<Scalar,PacketT>(&operator()(i, j),m_incr.value());
   }
 
+  template<typename PacketType>
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacket(Index i, Index j, const PacketType &p) const {
+    pscatter<Scalar, PacketType>(&operator()(i, j), p, m_incr.value());
+  }
+
+  template<typename PacketType>
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacketPartial(Index i, Index j, const PacketType &p, Index n, Index /*offset*/ = 0) const {
+    pscatter_partial<Scalar, PacketType>(&operator()(i, j), p, m_incr.value(), n);
+  }
+
   template<typename SubPacket>
   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void scatterPacket(Index i, Index j, const SubPacket &p) const {
     pscatter<Scalar, SubPacket>(&operator()(i, j), p, m_stride);
@@ -308,17 +373,18 @@ public:
   }
 
   // storePacketBlock_helper defines a way to access values inside the PacketBlock, this is essentially required by the Complex types.
-  template<typename SubPacket, typename ScalarT, int n, int idx>
+  template<typename SubPacket, typename Scalar_, int n, int idx>
   struct storePacketBlock_helper
   {
-    storePacketBlock_helper<SubPacket, ScalarT, n, idx-1> spbh;
+    storePacketBlock_helper<SubPacket, Scalar_, n, idx-1> spbh;
     EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(const blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType, Incr>* sup, Index i, Index j, const PacketBlock<SubPacket, n>& block) const {
       spbh.store(sup, i,j,block);
-      for(int l = 0; l < unpacket_traits<SubPacket>::size; l++)
-      {
-        ScalarT *v = &sup->operator()(i+l, j+idx);
-        *v = block.packet[idx][l];
-      }
+      sup->template storePacket<SubPacket>(i, j+idx, block.packet[idx]);
+      //for(int l = 0; l < unpacket_traits<SubPacket>::size; l++)
+      //{
+      //  Scalar_ *v = &sup->operator()(i+l, j+idx);
+      //  *v = *reinterpret_cast<Scalar_ *>(&block.packet[idx][l]);
+      //}
     }
   };
 
@@ -328,12 +394,7 @@ public:
     storePacketBlock_helper<SubPacket, std::complex<float>, n, idx-1> spbh;
     EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(const blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType, Incr>* sup, Index i, Index j, const PacketBlock<SubPacket, n>& block) const {
       spbh.store(sup,i,j,block);
-      for(int l = 0; l < unpacket_traits<SubPacket>::size; l++)
-      {
-        std::complex<float> *v = &sup->operator()(i+l, j+idx);
-        v->real(block.packet[idx].v[2*l+0]);
-        v->imag(block.packet[idx].v[2*l+1]);
-      }
+      sup->template storePacket<SubPacket>(i, j+idx, block.packet[idx]);
     }
   };
 
@@ -352,8 +413,8 @@ public:
     }
   };
 
-  template<typename SubPacket, typename ScalarT, int n>
-  struct storePacketBlock_helper<SubPacket, ScalarT, n, -1>
+  template<typename SubPacket, typename Scalar_, int n>
+  struct storePacketBlock_helper<SubPacket, Scalar_, n, -1>
   {
     EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(const blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType, Incr>*, Index, Index, const PacketBlock<SubPacket, n>& ) const {
     }
@@ -378,6 +439,10 @@ public:
     storePacketBlock_helper<SubPacket, Scalar, n, n-1> spb;
     spb.store(this, i,j,block);
   }
+
+  EIGEN_DEVICE_FUNC const Index stride() const { return m_stride; }
+  EIGEN_DEVICE_FUNC const Index incr() const { return m_incr.value(); }
+  EIGEN_DEVICE_FUNC Scalar* data() const { return m_data; }
 protected:
   Scalar* EIGEN_RESTRICT m_data;
   const Index m_stride;
@@ -403,7 +468,7 @@ template<typename XprType> struct blas_traits
 {
   typedef typename traits<XprType>::Scalar Scalar;
   typedef const XprType& ExtractType;
-  typedef XprType _ExtractType;
+  typedef XprType ExtractType_;
   enum {
     IsComplex = NumTraits<Scalar>::IsComplex,
     IsTransposed = false,
@@ -414,10 +479,10 @@ template<typename XprType> struct blas_traits
                              ) ?  1 : 0,
     HasScalarFactor = false
   };
-  typedef typename conditional<bool(HasUsableDirectAccess),
+  typedef std::conditional_t<bool(HasUsableDirectAccess),
     ExtractType,
-    typename _ExtractType::PlainObject
-    >::type DirectLinearAccessType;
+    typename ExtractType_::PlainObject
+    > DirectLinearAccessType;
   static inline EIGEN_DEVICE_FUNC ExtractType extract(const XprType& x) { return x; }
   static inline EIGEN_DEVICE_FUNC const Scalar extractScalarFactor(const XprType&) { return Scalar(1); }
 };
@@ -498,12 +563,12 @@ struct blas_traits<Transpose<NestedXpr> >
   typedef typename NestedXpr::Scalar Scalar;
   typedef blas_traits<NestedXpr> Base;
   typedef Transpose<NestedXpr> XprType;
-  typedef Transpose<const typename Base::_ExtractType>  ExtractType; // const to get rid of a compile error; anyway blas traits are only used on the RHS
-  typedef Transpose<const typename Base::_ExtractType> _ExtractType;
-  typedef typename conditional<bool(Base::HasUsableDirectAccess),
+  typedef Transpose<const typename Base::ExtractType_>  ExtractType; // const to get rid of a compile error; anyway blas traits are only used on the RHS
+  typedef Transpose<const typename Base::ExtractType_> ExtractType_;
+  typedef std::conditional_t<bool(Base::HasUsableDirectAccess),
     ExtractType,
     typename ExtractType::PlainObject
-    >::type DirectLinearAccessType;
+    > DirectLinearAccessType;
   enum {
     IsTransposed = Base::IsTransposed ? 0 : 1
   };
diff --git a/libs/eigen/Eigen/src/Core/util/ConfigureVectorization.h b/libs/eigen/Eigen/src/Core/util/ConfigureVectorization.h
index af4e696..7c1a08b 100644
--- a/libs/eigen/Eigen/src/Core/util/ConfigureVectorization.h
+++ b/libs/eigen/Eigen/src/Core/util/ConfigureVectorization.h
@@ -30,27 +30,13 @@
  *
  * If we made alignment depend on whether or not EIGEN_VECTORIZE is defined, it would be impossible to link
  * vectorized and non-vectorized code.
- * 
- * FIXME: this code can be cleaned up once we switch to proper C++11 only.
  */
 #if (defined EIGEN_CUDACC)
   #define EIGEN_ALIGN_TO_BOUNDARY(n) __align__(n)
   #define EIGEN_ALIGNOF(x) __alignof(x)
-#elif EIGEN_HAS_ALIGNAS
+#else
   #define EIGEN_ALIGN_TO_BOUNDARY(n) alignas(n)
   #define EIGEN_ALIGNOF(x) alignof(x)
-#elif EIGEN_COMP_GNUC || EIGEN_COMP_PGI || EIGEN_COMP_IBM || EIGEN_COMP_ARM
-  #define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n)))
-  #define EIGEN_ALIGNOF(x) __alignof(x)
-#elif EIGEN_COMP_MSVC
-  #define EIGEN_ALIGN_TO_BOUNDARY(n) __declspec(align(n))
-  #define EIGEN_ALIGNOF(x) __alignof(x)
-#elif EIGEN_COMP_SUNCC
-  // FIXME not sure about this one:
-  #define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n)))
-  #define EIGEN_ALIGNOF(x) __alignof(x)
-#else
-  #error Please tell me what is the equivalent of alignas(n) and alignof(x) for your compiler
 #endif
 
 // If the user explicitly disable vectorization, then we also disable alignment
@@ -105,18 +91,12 @@
   // try to keep heap alignment even when we have to disable static alignment.
   #if EIGEN_COMP_GNUC && !(EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_ARM_OR_ARM64 || EIGEN_ARCH_PPC || EIGEN_ARCH_IA64 || EIGEN_ARCH_MIPS)
   #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1
-  #elif EIGEN_ARCH_ARM_OR_ARM64 && EIGEN_COMP_GNUC_STRICT && EIGEN_GNUC_AT_MOST(4, 6)
-  // Old versions of GCC on ARM, at least 4.4, were once seen to have buggy static alignment support.
-  // Not sure which version fixed it, hopefully it doesn't affect 4.7, which is still somewhat in use.
-  // 4.8 and newer seem definitely unaffected.
-  #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1
   #else
   #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 0
   #endif
 
   // static alignment is completely disabled with GCC 3, Sun Studio, and QCC/QNX
   #if !EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT \
-  && !EIGEN_GCC3_OR_OLDER \
   && !EIGEN_COMP_SUNCC \
   && !EIGEN_OS_QNX
     #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 1
@@ -201,14 +181,12 @@
 // removed as gcc 4.1 and msvc 2008 are not supported anyways.
 #if EIGEN_COMP_MSVC
   #include <malloc.h> // for _aligned_malloc -- need it regardless of whether vectorization is enabled
-  #if (EIGEN_COMP_MSVC >= 1500) // 2008 or later
-    // a user reported that in 64-bit mode, MSVC doesn't care to define _M_IX86_FP.
-    #if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || EIGEN_ARCH_x86_64
-      #define EIGEN_SSE2_ON_MSVC_2008_OR_LATER
-    #endif
+  // a user reported that in 64-bit mode, MSVC doesn't care to define _M_IX86_FP.
+  #if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || EIGEN_ARCH_x86_64
+    #define EIGEN_SSE2_ON_MSVC_2008_OR_LATER
   #endif
 #else
-  #if (defined __SSE2__) && ( (!EIGEN_COMP_GNUC) || EIGEN_COMP_ICC || EIGEN_GNUC_AT_LEAST(4,2) )
+  #if (defined __SSE2__) && ( (!EIGEN_COMP_GNUC) || EIGEN_COMP_ICC || EIGEN_COMP_GNUC )
     #define EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC
   #endif
 #endif
@@ -292,6 +270,17 @@
         #ifdef __AVX512BF16__
           #define EIGEN_VECTORIZE_AVX512BF16
         #endif
+        #ifdef __AVX512FP16__
+          #ifdef __AVX512VL__
+            #define EIGEN_VECTORIZE_AVX512FP16
+          #else
+            #if EIGEN_COMP_GNUC
+              #error Please add -mavx512vl to your compiler flags: compiling with -mavx512fp16 alone without AVX512-VL is not supported.
+            #else
+              #error Please enable AVX512-VL in your compiler flags (e.g. -mavx512vl): compiling with AVX512-FP16 alone without AVX512-VL is not supported.
+            #endif
+          #endif 
+        #endif
       #endif
     #endif
 
@@ -339,7 +328,7 @@
     extern "C" {
       // In theory we should only include immintrin.h and not the other *mmintrin.h header files directly.
       // Doing so triggers some issues with ICC. However old gcc versions seems to not have this file, thus:
-      #if EIGEN_COMP_ICC >= 1110
+      #if EIGEN_COMP_ICC >= 1110 || EIGEN_COMP_EMSCRIPTEN
         #include <immintrin.h>
       #else
         #include <mmintrin.h>
@@ -438,13 +427,15 @@
   #include <arm_fp16.h>
 #endif
 
-#if defined(__F16C__) && (!defined(EIGEN_GPUCC) && (!defined(EIGEN_COMP_CLANG) || EIGEN_COMP_CLANG>=380))
+#if defined(__F16C__) && (!defined(EIGEN_GPUCC) && (!EIGEN_COMP_CLANG || EIGEN_COMP_CLANG>=380))
   // We can use the optimized fp16 to float and float to fp16 conversion routines
   #define EIGEN_HAS_FP16_C
 
-  #if defined(EIGEN_COMP_CLANG)
-    // Workaround for clang: The FP16C intrinsics for clang are included by
-    // immintrin.h, as opposed to emmintrin.h as suggested by Intel:
+  #if EIGEN_COMP_GNUC
+    // Make sure immintrin.h is included, even if e.g. vectorization is
+    // explicitly disabled (see also issue #2395).
+    // Note that FP16C intrinsics for gcc and clang are included by immintrin.h,
+    // as opposed to emmintrin.h as suggested by Intel:
     // https://software.intel.com/sites/landingpage/IntrinsicsGuide/#othertechs=FP16C&expand=1711
     #include <immintrin.h>
   #endif
@@ -468,10 +459,14 @@
   #include <hip/hip_vector_types.h>
   #define EIGEN_HAS_HIP_FP16
   #include <hip/hip_fp16.h>
+  #define EIGEN_HAS_HIP_BF16
+  #include <hip/hip_bfloat16.h>
 #endif
 
 
 /** \brief Namespace containing all symbols from the %Eigen library. */
+#include "../InternalHeaderCheck.h"
+
 namespace Eigen {
 
 inline static const char *SimdInstructionSetsInUse(void) {
diff --git a/libs/eigen/Eigen/src/Core/util/Constants.h b/libs/eigen/Eigen/src/Core/util/Constants.h
index 35dcaa7..0175087 100644
--- a/libs/eigen/Eigen/src/Core/util/Constants.h
+++ b/libs/eigen/Eigen/src/Core/util/Constants.h
@@ -12,6 +12,8 @@
 #ifndef EIGEN_CONSTANTS_H
 #define EIGEN_CONSTANTS_H
 
+#include "../InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /** This value means that a positive quantity (e.g., a size) is not known at compile-time, and that instead the value is
@@ -312,7 +314,7 @@ enum SpecializedType {
 };
 
 /** \ingroup enums
-  * Enum containing possible values for the \p _Options template parameter of
+  * Enum containing possible values for the \p Options_ template parameter of
   * Matrix, Array and BandMatrix. */
 enum StorageOptions {
   /** Storage order is column major (see \ref TopicStorageOrders). */
@@ -421,14 +423,16 @@ enum DecompositionOptions {
 /** \ingroup enums
   * Possible values for the \p QRPreconditioner template parameter of JacobiSVD. */
 enum QRPreconditioners {
-  /** Do not specify what is to be done if the SVD of a non-square matrix is asked for. */
-  NoQRPreconditioner,
-  /** Use a QR decomposition without pivoting as the first step. */
-  HouseholderQRPreconditioner,
   /** Use a QR decomposition with column pivoting as the first step. */
-  ColPivHouseholderQRPreconditioner,
+  ColPivHouseholderQRPreconditioner = 0x0,
+  /** Do not specify what is to be done if the SVD of a non-square matrix is asked for. */
+  NoQRPreconditioner = 0x40,
+  /** Use a QR decomposition without pivoting as the first step. */
+  HouseholderQRPreconditioner = 0x80,
   /** Use a QR decomposition with full pivoting as the first step. */
-  FullPivHouseholderQRPreconditioner
+  FullPivHouseholderQRPreconditioner = 0xC0,
+  /** Used to disable the QR Preconditioner in BDCSVD. */
+  DisableQRDecomposition = NoQRPreconditioner
 };
 
 #ifdef Success
@@ -529,6 +533,7 @@ struct DenseShape             { static std::string debugName() { return "DenseSh
 struct SolverShape            { static std::string debugName() { return "SolverShape"; } };
 struct HomogeneousShape       { static std::string debugName() { return "HomogeneousShape"; } };
 struct DiagonalShape          { static std::string debugName() { return "DiagonalShape"; } };
+struct SkewSymmetricShape     { static std::string debugName() { return "SkewSymmetricShape"; } };
 struct BandShape              { static std::string debugName() { return "BandShape"; } };
 struct TriangularShape        { static std::string debugName() { return "TriangularShape"; } };
 struct SelfAdjointShape       { static std::string debugName() { return "SelfAdjointShape"; } };
@@ -547,7 +552,7 @@ struct IteratorBased {};
 /** \internal
  * Constants for comparison functors
  */
-enum ComparisonName {
+enum ComparisonName : unsigned int {
   cmp_EQ = 0,
   cmp_LT = 1,
   cmp_LE = 2,
diff --git a/libs/eigen/Eigen/src/Core/util/DisableStupidWarnings.h b/libs/eigen/Eigen/src/Core/util/DisableStupidWarnings.h
old mode 100755
new mode 100644
index fe0cfec..0865fb6
--- a/libs/eigen/Eigen/src/Core/util/DisableStupidWarnings.h
+++ b/libs/eigen/Eigen/src/Core/util/DisableStupidWarnings.h
@@ -1,9 +1,10 @@
 #ifndef EIGEN_WARNINGS_DISABLED
 #define EIGEN_WARNINGS_DISABLED
 
-#ifdef _MSC_VER
+#if defined(_MSC_VER)
   // 4100 - unreferenced formal parameter (occurred e.g. in aligned_allocator::destroy(pointer p))
   // 4101 - unreferenced local variable
+  // 4127 - conditional expression is constant
   // 4181 - qualifier applied to reference type ignored
   // 4211 - nonstandard extension used : redefined extern to static
   // 4244 - 'argument' : conversion from 'type1' to 'type2', possible loss of data
@@ -19,7 +20,7 @@
   #ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS
     #pragma warning( push )
   #endif
-  #pragma warning( disable : 4100 4101 4181 4211 4244 4273 4324 4503 4512 4522 4700 4714 4717 4800)
+  #pragma warning( disable : 4100 4101 4127 4181 4211 4244 4273 4324 4503 4512 4522 4700 4714 4717 4800)
 
 #elif defined __INTEL_COMPILER
   // 2196 - routine is both "inline" and "noinline" ("noinline" assumed)
@@ -35,25 +36,28 @@
   #pragma warning disable 2196 279 1684 2259
 
 #elif defined __clang__
-  // -Wconstant-logical-operand - warning: use of logical && with constant operand; switch to bitwise & or remove constant
-  //     this is really a stupid warning as it warns on compile-time expressions involving enums
   #ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS
     #pragma clang diagnostic push
   #endif
-  #pragma clang diagnostic ignored "-Wconstant-logical-operand"
-  #if __clang_major__ >= 3 && __clang_minor__ >= 5
-    #pragma clang diagnostic ignored "-Wabsolute-value"
-  #endif
-  #if __clang_major__ >= 10
-    #pragma clang diagnostic ignored "-Wimplicit-int-float-conversion"
-  #endif
-  #if ( defined(__ALTIVEC__) || defined(__VSX__) ) && __cplusplus < 201103L
-    // warning: generic selections are a C11-specific feature
-    // ignoring warnings thrown at vec_ctf in Altivec/PacketMath.h
-    #pragma clang diagnostic ignored "-Wc11-extensions"
+  #if defined(__has_warning)
+    // -Wconstant-logical-operand - warning: use of logical && with constant operand; switch to bitwise & or remove constant
+    //     this is really a stupid warning as it warns on compile-time expressions involving enums
+    #if __has_warning("-Wconstant-logical-operand")
+      #pragma clang diagnostic ignored "-Wconstant-logical-operand"
+    #endif
+    #if __has_warning("-Wimplicit-int-float-conversion")
+      #pragma clang diagnostic ignored "-Wimplicit-int-float-conversion"
+    #endif
+    #if ( defined(__ALTIVEC__) || defined(__VSX__) ) && __cplusplus < 201103L
+      // warning: generic selections are a C11-specific feature
+      // ignoring warnings thrown at vec_ctf in Altivec/PacketMath.h
+      #if __has_warning("-Wc11-extensions")
+        #pragma clang diagnostic ignored "-Wc11-extensions"
+      #endif
+    #endif
   #endif
 
-#elif defined __GNUC__
+#elif defined __GNUC__ && !defined(__FUJITSU)
 
   #if (!defined(EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS)) &&  (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6))
     #pragma GCC diagnostic push
@@ -74,25 +78,53 @@
 #endif
 
 #if defined __NVCC__
-  #pragma diag_suppress boolean_controlling_expr_is_constant
+  // MSVC 14.16 (required by CUDA 9.*) does not support the _Pragma keyword, so
+  // we instead use Microsoft's __pragma extension.
+  #if defined _MSC_VER
+    #define EIGEN_MAKE_PRAGMA(X) __pragma(#X)
+  #else
+    #define EIGEN_MAKE_PRAGMA(X) _Pragma(#X)
+  #endif
+  #if defined __NVCC_DIAG_PRAGMA_SUPPORT__
+    #define EIGEN_NV_DIAG_SUPPRESS(X) EIGEN_MAKE_PRAGMA(nv_diag_suppress X)
+  #else
+    #define EIGEN_NV_DIAG_SUPPRESS(X) EIGEN_MAKE_PRAGMA(diag_suppress X)
+  #endif
+
+  EIGEN_NV_DIAG_SUPPRESS(boolean_controlling_expr_is_constant)
   // Disable the "statement is unreachable" message
-  #pragma diag_suppress code_is_unreachable
+  EIGEN_NV_DIAG_SUPPRESS(code_is_unreachable)
   // Disable the "dynamic initialization in unreachable code" message
-  #pragma diag_suppress initialization_not_reachable
+  EIGEN_NV_DIAG_SUPPRESS(initialization_not_reachable)
   // Disable the "invalid error number" message that we get with older versions of nvcc
-  #pragma diag_suppress 1222
+  EIGEN_NV_DIAG_SUPPRESS(1222)
   // Disable the "calling a __host__ function from a __host__ __device__ function is not allowed" messages (yes, there are many of them and they seem to change with every version of the compiler)
-  #pragma diag_suppress 2527
-  #pragma diag_suppress 2529
-  #pragma diag_suppress 2651
-  #pragma diag_suppress 2653
-  #pragma diag_suppress 2668
-  #pragma diag_suppress 2669
-  #pragma diag_suppress 2670
-  #pragma diag_suppress 2671
-  #pragma diag_suppress 2735
-  #pragma diag_suppress 2737
-  #pragma diag_suppress 2739
+  EIGEN_NV_DIAG_SUPPRESS(2527)
+  EIGEN_NV_DIAG_SUPPRESS(2529)
+  EIGEN_NV_DIAG_SUPPRESS(2651)
+  EIGEN_NV_DIAG_SUPPRESS(2653)
+  EIGEN_NV_DIAG_SUPPRESS(2668)
+  EIGEN_NV_DIAG_SUPPRESS(2669)
+  EIGEN_NV_DIAG_SUPPRESS(2670)
+  EIGEN_NV_DIAG_SUPPRESS(2671)
+  EIGEN_NV_DIAG_SUPPRESS(2735)
+  EIGEN_NV_DIAG_SUPPRESS(2737)
+  EIGEN_NV_DIAG_SUPPRESS(2739)
+  EIGEN_NV_DIAG_SUPPRESS(2885)
+  EIGEN_NV_DIAG_SUPPRESS(2888)
+  EIGEN_NV_DIAG_SUPPRESS(2976)
+  EIGEN_NV_DIAG_SUPPRESS(2979)
+  EIGEN_NV_DIAG_SUPPRESS(20011)
+  EIGEN_NV_DIAG_SUPPRESS(20014)
+  // Disable the "// __device__ annotation is ignored on a function(...) that is
+  //              explicitly defaulted on its first declaration" message.
+  // The __device__ annotation seems to actually be needed in some cases,
+  // otherwise resulting in kernel runtime errors.
+  EIGEN_NV_DIAG_SUPPRESS(2886)
+  EIGEN_NV_DIAG_SUPPRESS(2977)
+  EIGEN_NV_DIAG_SUPPRESS(20012)
+  #undef EIGEN_NV_DIAG_SUPPRESS
+  #undef EIGEN_MAKE_PRAGMA
 #endif
 
 #else
diff --git a/libs/eigen/Eigen/src/Core/util/ForwardDeclarations.h b/libs/eigen/Eigen/src/Core/util/ForwardDeclarations.h
index 2f9cc44..8f87c4a 100644
--- a/libs/eigen/Eigen/src/Core/util/ForwardDeclarations.h
+++ b/libs/eigen/Eigen/src/Core/util/ForwardDeclarations.h
@@ -11,6 +11,8 @@
 #ifndef EIGEN_FORWARDDECLARATIONS_H
 #define EIGEN_FORWARDDECLARATIONS_H
 
+#include "../InternalHeaderCheck.h"
+
 namespace Eigen {
 namespace internal {
 
@@ -49,24 +51,13 @@ template<typename Derived> class DenseBase;
 template<typename Derived> class PlainObjectBase;
 template<typename Derived, int Level> class DenseCoeffsBase;
 
-template<typename _Scalar, int _Rows, int _Cols,
-         int _Options = AutoAlign |
-#if EIGEN_GNUC_AT(3,4)
-    // workaround a bug in at least gcc 3.4.6
-    // the innermost ?: ternary operator is misparsed. We write it slightly
-    // differently and this makes gcc 3.4.6 happy, but it's ugly.
-    // The error would only show up with EIGEN_DEFAULT_TO_ROW_MAJOR is defined
-    // (when EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION is RowMajor)
-                          ( (_Rows==1 && _Cols!=1) ? Eigen::RowMajor
-                          : !(_Cols==1 && _Rows!=1) ?  EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION
-                          : Eigen::ColMajor ),
-#else
-                          ( (_Rows==1 && _Cols!=1) ? Eigen::RowMajor
-                          : (_Cols==1 && _Rows!=1) ? Eigen::ColMajor
+template<typename Scalar_, int Rows_, int Cols_,
+         int Options_ = AutoAlign |
+                          ( (Rows_==1 && Cols_!=1) ? Eigen::RowMajor
+                          : (Cols_==1 && Rows_!=1) ? Eigen::ColMajor
                           : EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION ),
-#endif
-         int _MaxRows = _Rows,
-         int _MaxCols = _Cols
+         int MaxRows_ = Rows_,
+         int MaxCols_ = Cols_
 > class Matrix;
 
 template<typename Derived> class MatrixBase;
@@ -87,7 +78,6 @@ template<typename MatrixType> class Transpose;
 template<typename MatrixType> class Conjugate;
 template<typename NullaryOp, typename MatrixType>         class CwiseNullaryOp;
 template<typename UnaryOp,   typename MatrixType>         class CwiseUnaryOp;
-template<typename ViewOp,    typename MatrixType>         class CwiseUnaryView;
 template<typename BinaryOp,  typename Lhs, typename Rhs>  class CwiseBinaryOp;
 template<typename TernaryOp, typename Arg1, typename Arg2, typename Arg3>  class CwiseTernaryOp;
 template<typename Decomposition, typename Rhstype>        class Solve;
@@ -96,16 +86,19 @@ template<typename XprType>                                class Inverse;
 template<typename Lhs, typename Rhs, int Option = DefaultProduct> class Product;
 
 template<typename Derived> class DiagonalBase;
-template<typename _DiagonalVectorType> class DiagonalWrapper;
-template<typename _Scalar, int SizeAtCompileTime, int MaxSizeAtCompileTime=SizeAtCompileTime> class DiagonalMatrix;
+template<typename DiagonalVectorType_> class DiagonalWrapper;
+template<typename Scalar_, int SizeAtCompileTime, int MaxSizeAtCompileTime=SizeAtCompileTime> class DiagonalMatrix;
 template<typename MatrixType, typename DiagonalType, int ProductOrder> class DiagonalProduct;
 template<typename MatrixType, int Index = 0> class Diagonal;
+template<typename Derived> class SkewSymmetricBase;
+template<typename VectorType_> class SkewSymmetricWrapper;
+template<typename Scalar_> class SkewSymmetricMatrix3;
 template<int SizeAtCompileTime, int MaxSizeAtCompileTime = SizeAtCompileTime, typename IndexType=int> class PermutationMatrix;
 template<int SizeAtCompileTime, int MaxSizeAtCompileTime = SizeAtCompileTime, typename IndexType=int> class Transpositions;
 template<typename Derived> class PermutationBase;
 template<typename Derived> class TranspositionsBase;
-template<typename _IndicesType> class PermutationWrapper;
-template<typename _IndicesType> class TranspositionsWrapper;
+template<typename IndicesType_> class PermutationWrapper;
+template<typename IndicesType_> class TranspositionsWrapper;
 
 template<typename Derived,
          int Level = internal::accessors_level<Derived>::has_write_access ? WriteAccessors : ReadOnlyAccessors
@@ -116,7 +109,8 @@ template<int Value = Dynamic> class OuterStride;
 template<typename MatrixType, int MapOptions=Unaligned, typename StrideType = Stride<0,0> > class Map;
 template<typename Derived> class RefBase;
 template<typename PlainObjectType, int Options = 0,
-         typename StrideType = typename internal::conditional<PlainObjectType::IsVectorAtCompileTime,InnerStride<1>,OuterStride<> >::type > class Ref;
+         typename StrideType = typename std::conditional_t<PlainObjectType::IsVectorAtCompileTime,InnerStride<1>,OuterStride<> > > class Ref;
+template<typename ViewOp,    typename MatrixType, typename StrideType = Stride<0,0>>         class CwiseUnaryView;
 
 template<typename Derived> class TriangularBase;
 template<typename MatrixType, unsigned int Mode> class TriangularView;
@@ -142,7 +136,7 @@ template<typename DecompositionType> struct image_retval;
 } // end namespace internal
 
 namespace internal {
-template<typename _Scalar, int Rows=Dynamic, int Cols=Dynamic, int Supers=Dynamic, int Subs=Dynamic, int Options=0> class BandMatrix;
+template<typename Scalar_, int Rows=Dynamic, int Cols=Dynamic, int Supers=Dynamic, int Subs=Dynamic, int Options=0> class BandMatrix;
 }
 
 namespace internal {
@@ -205,8 +199,12 @@ template<typename Scalar, typename NewType> struct scalar_cast_op;
 template<typename Scalar> struct scalar_random_op;
 template<typename Scalar> struct scalar_constant_op;
 template<typename Scalar> struct scalar_identity_op;
-template<typename Scalar,bool is_complex, bool is_integer> struct scalar_sign_op;
-template<typename Scalar,typename ScalarExponent> struct scalar_pow_op;
+template<typename Scalar> struct scalar_sign_op;
+template <typename Scalar, typename ScalarExponent>
+struct scalar_pow_op;
+template <typename Scalar, typename ScalarExponent, bool BaseIsInteger, bool ExponentIsInteger, bool BaseIsComplex,
+          bool ExponentIsComplex>
+struct scalar_unary_pow_op;
 template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_hypot_op;
 template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_product_op;
 template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_quotient_op;
@@ -242,23 +240,12 @@ template<typename Scalar> struct scalar_bessel_k1e_op;
 struct IOFormat;
 
 // Array module
-template<typename _Scalar, int _Rows, int _Cols,
-         int _Options = AutoAlign |
-#if EIGEN_GNUC_AT(3,4)
-    // workaround a bug in at least gcc 3.4.6
-    // the innermost ?: ternary operator is misparsed. We write it slightly
-    // differently and this makes gcc 3.4.6 happy, but it's ugly.
-    // The error would only show up with EIGEN_DEFAULT_TO_ROW_MAJOR is defined
-    // (when EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION is RowMajor)
-                          ( (_Rows==1 && _Cols!=1) ? Eigen::RowMajor
-                          : !(_Cols==1 && _Rows!=1) ?  EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION
-                          : Eigen::ColMajor ),
-#else
-                          ( (_Rows==1 && _Cols!=1) ? Eigen::RowMajor
-                          : (_Cols==1 && _Rows!=1) ? Eigen::ColMajor
+template<typename Scalar_, int Rows_, int Cols_,
+         int Options_ = AutoAlign |
+                          ( (Rows_==1 && Cols_!=1) ? Eigen::RowMajor
+                          : (Cols_==1 && Rows_!=1) ? Eigen::ColMajor
                           : EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION ),
-#endif
-         int _MaxRows = _Rows, int _MaxCols = _Cols> class Array;
+         int MaxRows_ = Rows_, int MaxCols_ = Cols_> class Array;
 template<typename ConditionMatrixType, typename ThenMatrixType, typename ElseMatrixType> class Select;
 template<typename MatrixType, typename BinaryOp, int Direction> class PartialReduxExpr;
 template<typename ExpressionType, int Direction> class VectorwiseOp;
@@ -275,25 +262,27 @@ template<typename MatrixType> class ColPivHouseholderQR;
 template<typename MatrixType> class FullPivHouseholderQR;
 template<typename MatrixType> class CompleteOrthogonalDecomposition;
 template<typename MatrixType> class SVDBase;
-template<typename MatrixType, int QRPreconditioner = ColPivHouseholderQRPreconditioner> class JacobiSVD;
-template<typename MatrixType> class BDCSVD;
+template<typename MatrixType, int Options = 0> class JacobiSVD;
+template<typename MatrixType, int Options = 0> class BDCSVD;
 template<typename MatrixType, int UpLo = Lower> class LLT;
 template<typename MatrixType, int UpLo = Lower> class LDLT;
 template<typename VectorsType, typename CoeffsType, int Side=OnTheLeft> class HouseholderSequence;
 template<typename Scalar>     class JacobiRotation;
 
 // Geometry module:
-template<typename Derived, int _Dim> class RotationBase;
-template<typename Lhs, typename Rhs> class Cross;
+namespace internal {
+template<typename Derived, typename OtherDerived, int Size = MatrixBase<Derived>::SizeAtCompileTime> struct cross_impl;
+}
+template<typename Derived, int Dim_> class RotationBase;
 template<typename Derived> class QuaternionBase;
 template<typename Scalar> class Rotation2D;
 template<typename Scalar> class AngleAxis;
 template<typename Scalar,int Dim> class Translation;
 template<typename Scalar,int Dim> class AlignedBox;
 template<typename Scalar, int Options = AutoAlign> class Quaternion;
-template<typename Scalar,int Dim,int Mode,int _Options=AutoAlign> class Transform;
-template <typename _Scalar, int _AmbientDim, int Options=AutoAlign> class ParametrizedLine;
-template <typename _Scalar, int _AmbientDim, int Options=AutoAlign> class Hyperplane;
+template<typename Scalar,int Dim,int Mode,int Options_=AutoAlign> class Transform;
+template <typename Scalar_, int AmbientDim_, int Options=AutoAlign> class ParametrizedLine;
+template <typename Scalar_, int AmbientDim_, int Options=AutoAlign> class Hyperplane;
 template<typename Scalar> class UniformScaling;
 template<typename MatrixType,int Direction> class Homogeneous;
 
diff --git a/libs/eigen/Eigen/src/Core/util/IndexedViewHelper.h b/libs/eigen/Eigen/src/Core/util/IndexedViewHelper.h
index f85de30..19fa45d 100644
--- a/libs/eigen/Eigen/src/Core/util/IndexedViewHelper.h
+++ b/libs/eigen/Eigen/src/Core/util/IndexedViewHelper.h
@@ -11,11 +11,17 @@
 #ifndef EIGEN_INDEXED_VIEW_HELPER_H
 #define EIGEN_INDEXED_VIEW_HELPER_H
 
+#include "../InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
 struct symbolic_last_tag {};
-}
+}  // namespace internal
+
+namespace placeholders {
+
+typedef symbolic::SymbolExpr<internal::symbolic_last_tag> last_t;
 
 /** \var last
   * \ingroup Core_Module
@@ -28,38 +34,20 @@ struct symbolic_last_tag {};
   * A typical usage example would be:
   * \code
   * using namespace Eigen;
-  * using Eigen::last;
+  * using Eigen::placeholders::last;
   * VectorXd v(n);
   * v(seq(2,last-2)).setOnes();
   * \endcode
   *
   * \sa end
   */
-static const symbolic::SymbolExpr<internal::symbolic_last_tag> last; // PLEASE use Eigen::last   instead of Eigen::placeholders::last
+static const last_t last;
 
-/** \var lastp1
-  * \ingroup Core_Module
-  *
-  * Can be used as a parameter to Eigen::seq and Eigen::seqN functions to symbolically
-  * reference the last+1 element/row/columns of the underlying vector or matrix once
-  * passed to DenseBase::operator()(const RowIndices&, const ColIndices&).
-  *
-  * This symbolic placeholder supports standard arithmetic operations.
-  * It is essentially an alias to last+fix<1>.
-  *
-  * \sa last
-  */
-#ifdef EIGEN_PARSED_BY_DOXYGEN
-static const auto lastp1 = last+fix<1>;
-#else
-// Using a FixedExpr<1> expression is important here to make sure the compiler
-// can fully optimize the computation starting indices with zero overhead.
-static const symbolic::AddExpr<symbolic::SymbolExpr<internal::symbolic_last_tag>,symbolic::ValueExpr<Eigen::internal::FixedInt<1> > > lastp1(last+fix<1>());
-#endif
+}  // namespace placeholders
 
 namespace internal {
 
- // Replace symbolic last/end "keywords" by their true runtime value
+// Replace symbolic last/end "keywords" by their true runtime value
 inline Index eval_expr_given_size(Index x, Index /* size */)   { return x; }
 
 template<int N>
@@ -68,7 +56,7 @@ FixedInt<N> eval_expr_given_size(FixedInt<N> x, Index /*size*/)   { return x; }
 template<typename Derived>
 Index eval_expr_given_size(const symbolic::BaseExpr<Derived> &x, Index size)
 {
-  return x.derived().eval(last=size-1);
+  return x.derived().eval(Eigen::placeholders::last=size-1);
 }
 
 // Extract increment/step at compile time
@@ -111,7 +99,7 @@ template<> struct get_compile_time_incr<SingleRange> {
 
 // Turn a single index into something that looks like an array (i.e., that exposes a .size(), and operator[](int) methods)
 template<typename T, int XprSize>
-struct IndexedViewCompatibleType<T,XprSize,typename internal::enable_if<internal::is_integral<T>::value>::type> {
+struct IndexedViewCompatibleType<T,XprSize,std::enable_if_t<internal::is_integral<T>::value>> {
   // Here we could simply use Array, but maybe it's less work for the compiler to use
   // a simpler wrapper as SingleRange
   //typedef Eigen::Array<Index,1,1> type;
@@ -119,13 +107,13 @@ struct IndexedViewCompatibleType<T,XprSize,typename internal::enable_if<internal
 };
 
 template<typename T, int XprSize>
-struct IndexedViewCompatibleType<T, XprSize, typename enable_if<symbolic::is_symbolic<T>::value>::type> {
+struct IndexedViewCompatibleType<T, XprSize, std::enable_if_t<symbolic::is_symbolic<T>::value>> {
   typedef SingleRange type;
 };
 
 
 template<typename T>
-typename enable_if<symbolic::is_symbolic<T>::value,SingleRange>::type
+std::enable_if_t<symbolic::is_symbolic<T>::value,SingleRange>
 makeIndexedViewCompatible(const T& id, Index size, SpecializedType) {
   return eval_expr_given_size(id,size);
 }
@@ -163,23 +151,44 @@ template<int Size> struct get_compile_time_incr<AllRange<Size> > {
 
 } // end namespace internal
 
+namespace placeholders {
+
+typedef symbolic::AddExpr<symbolic::SymbolExpr<internal::symbolic_last_tag>,symbolic::ValueExpr<Eigen::internal::FixedInt<1> > > lastp1_t;
+typedef Eigen::internal::all_t all_t;
+
+/** \var lastp1
+  * \ingroup Core_Module
+  *
+  * Can be used as a parameter to Eigen::seq and Eigen::seqN functions to symbolically
+  * reference the last+1 element/row/columns of the underlying vector or matrix once
+  * passed to DenseBase::operator()(const RowIndices&, const ColIndices&).
+  *
+  * This symbolic placeholder supports standard arithmetic operations.
+  * It is essentially an alias to last+fix<1>.
+  *
+  * \sa last
+  */
+#ifdef EIGEN_PARSED_BY_DOXYGEN
+static const auto lastp1 = last+fix<1>;
+#else
+// Using a FixedExpr<1> expression is important here to make sure the compiler
+// can fully optimize the computation starting indices with zero overhead.
+static const lastp1_t lastp1(last+fix<1>());
+#endif
+
+/** \var end
+  * \ingroup Core_Module
+  * \sa lastp1
+  */
+static const lastp1_t end = lastp1;
 
 /** \var all
   * \ingroup Core_Module
   * Can be used as a parameter to DenseBase::operator()(const RowIndices&, const ColIndices&) to index all rows or columns
   */
-static const Eigen::internal::all_t all; // PLEASE use Eigen::all instead of Eigen::placeholders::all
+static const Eigen::internal::all_t all;
 
-
-namespace placeholders {
-  typedef symbolic::SymbolExpr<internal::symbolic_last_tag> last_t;
-  typedef symbolic::AddExpr<symbolic::SymbolExpr<internal::symbolic_last_tag>,symbolic::ValueExpr<Eigen::internal::FixedInt<1> > > end_t;
-  typedef Eigen::internal::all_t all_t;
-
-  EIGEN_DEPRECATED static const all_t  all  = Eigen::all;    // PLEASE use Eigen::all    instead of Eigen::placeholders::all
-  EIGEN_DEPRECATED static const last_t last = Eigen::last;   // PLEASE use Eigen::last   instead of Eigen::placeholders::last
-  EIGEN_DEPRECATED static const end_t  end  = Eigen::lastp1; // PLEASE use Eigen::lastp1 instead of Eigen::placeholders::end
-}
+} // namespace placeholders
 
 } // end namespace Eigen
 
diff --git a/libs/eigen/Eigen/src/Core/util/IntegralConstant.h b/libs/eigen/Eigen/src/Core/util/IntegralConstant.h
index 945d426..ea275bd 100644
--- a/libs/eigen/Eigen/src/Core/util/IntegralConstant.h
+++ b/libs/eigen/Eigen/src/Core/util/IntegralConstant.h
@@ -11,6 +11,8 @@
 #ifndef EIGEN_INTEGRAL_CONSTANT_H
 #define EIGEN_INTEGRAL_CONSTANT_H
 
+#include "../InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
@@ -29,10 +31,8 @@ template<int N> class VariableAndFixedInt;
   *  - arithmetic and some bitwise operators: -, +, *, /, %, &, |
   *  - c++98/14 compatibility with fix<N> and fix<N>() syntax to define integral constants.
   *
-  * It is strongly discouraged to directly deal with this class FixedInt. Instances are expcected to
-  * be created by the user using Eigen::fix<N> or Eigen::fix<N>(). In C++98-11, the former syntax does
-  * not create a FixedInt<N> instance but rather a point to function that needs to be \em cleaned-up
-  * using the generic helper:
+  * It is strongly discouraged to directly deal with this class FixedInt. Instances are expected to
+  * be created by the user using Eigen::fix<N> or Eigen::fix<N>().
   * \code
   * internal::cleanup_index_type<T>::type
   * internal::cleanup_index_type<T,DynamicKey>::type
@@ -53,7 +53,14 @@ template<int N> class FixedInt
 public:
   static const int value = N;
   EIGEN_CONSTEXPR operator int() const { return value; }
-  FixedInt() {}
+  
+  EIGEN_CONSTEXPR
+  FixedInt() = default;
+  
+  EIGEN_CONSTEXPR
+  FixedInt(std::integral_constant<int,N>) {}
+  
+  EIGEN_CONSTEXPR
   FixedInt( VariableAndFixedInt<N> other) {
     #ifndef EIGEN_INTERNAL_DEBUGGING
     EIGEN_UNUSED_VARIABLE(other);
@@ -61,34 +68,41 @@ public:
     eigen_internal_assert(int(other)==N);
   }
 
+  EIGEN_CONSTEXPR
   FixedInt<-N> operator-() const { return FixedInt<-N>(); }
+  
   template<int M>
+  EIGEN_CONSTEXPR
   FixedInt<N+M> operator+( FixedInt<M>) const { return FixedInt<N+M>(); }
+  
   template<int M>
+  EIGEN_CONSTEXPR
   FixedInt<N-M> operator-( FixedInt<M>) const { return FixedInt<N-M>(); }
+  
   template<int M>
+  EIGEN_CONSTEXPR
   FixedInt<N*M> operator*( FixedInt<M>) const { return FixedInt<N*M>(); }
+  
   template<int M>
+  EIGEN_CONSTEXPR
   FixedInt<N/M> operator/( FixedInt<M>) const { return FixedInt<N/M>(); }
+  
   template<int M>
+  EIGEN_CONSTEXPR
   FixedInt<N%M> operator%( FixedInt<M>) const { return FixedInt<N%M>(); }
+  
   template<int M>
+  EIGEN_CONSTEXPR
   FixedInt<N|M> operator|( FixedInt<M>) const { return FixedInt<N|M>(); }
+  
   template<int M>
+  EIGEN_CONSTEXPR
   FixedInt<N&M> operator&( FixedInt<M>) const { return FixedInt<N&M>(); }
 
-#if EIGEN_HAS_CXX14_VARIABLE_TEMPLATES
   // Needed in C++14 to allow fix<N>():
-  FixedInt operator() () const { return *this; }
+  EIGEN_CONSTEXPR FixedInt operator() () const { return *this; }
 
   VariableAndFixedInt<N> operator() (int val) const { return VariableAndFixedInt<N>(val); }
-#else
-  FixedInt ( FixedInt<N> (*)() ) {}
-#endif
-
-#if EIGEN_HAS_CXX11
-  FixedInt(std::integral_constant<int,N>) {}
-#endif
 };
 
 /** \internal
@@ -138,12 +152,6 @@ template<int N,int Default> struct get_fixed_value<FixedInt<N>,Default> {
   static const int value = N;
 };
 
-#if !EIGEN_HAS_CXX14
-template<int N,int Default> struct get_fixed_value<FixedInt<N> (*)(),Default> {
-  static const int value = N;
-};
-#endif
-
 template<int N,int Default> struct get_fixed_value<VariableAndFixedInt<N>,Default> {
   static const int value = N ;
 };
@@ -154,9 +162,6 @@ struct get_fixed_value<variable_if_dynamic<T,N>,Default> {
 };
 
 template<typename T> EIGEN_DEVICE_FUNC Index get_runtime_value(const T &x) { return x; }
-#if !EIGEN_HAS_CXX14
-template<int N> EIGEN_DEVICE_FUNC Index get_runtime_value(FixedInt<N> (*)()) { return N; }
-#endif
 
 // Cleanup integer/FixedInt/VariableAndFixedInt/etc types:
 
@@ -164,38 +169,21 @@ template<int N> EIGEN_DEVICE_FUNC Index get_runtime_value(FixedInt<N> (*)()) { r
 template<typename T, int DynamicKey=Dynamic, typename EnableIf=void> struct cleanup_index_type { typedef T type; };
 
 // Convert any integral type (e.g., short, int, unsigned int, etc.) to Eigen::Index
-template<typename T, int DynamicKey> struct cleanup_index_type<T,DynamicKey,typename internal::enable_if<internal::is_integral<T>::value>::type> { typedef Index type; };
-
-#if !EIGEN_HAS_CXX14
-// In c++98/c++11, fix<N> is a pointer to function that we better cleanup to a true FixedInt<N>:
-template<int N, int DynamicKey> struct cleanup_index_type<FixedInt<N> (*)(), DynamicKey> { typedef FixedInt<N> type; };
-#endif
+template<typename T, int DynamicKey> struct cleanup_index_type<T,DynamicKey,std::enable_if_t<internal::is_integral<T>::value>> { typedef Index type; };
 
 // If VariableAndFixedInt does not match DynamicKey, then we turn it to a pure compile-time value:
 template<int N, int DynamicKey> struct cleanup_index_type<VariableAndFixedInt<N>, DynamicKey> { typedef FixedInt<N> type; };
 // If VariableAndFixedInt matches DynamicKey, then we turn it to a pure runtime-value (aka Index):
 template<int DynamicKey> struct cleanup_index_type<VariableAndFixedInt<DynamicKey>, DynamicKey> { typedef Index type; };
 
-#if EIGEN_HAS_CXX11
 template<int N, int DynamicKey> struct cleanup_index_type<std::integral_constant<int,N>, DynamicKey> { typedef FixedInt<N> type; };
-#endif
 
 } // end namespace internal
 
 #ifndef EIGEN_PARSED_BY_DOXYGEN
 
-#if EIGEN_HAS_CXX14_VARIABLE_TEMPLATES
 template<int N>
-static const internal::FixedInt<N> fix{};
-#else
-template<int N>
-inline internal::FixedInt<N> fix() { return internal::FixedInt<N>(); }
-
-// The generic typename T is mandatory. Otherwise, a code like fix<N> could refer to either the function above or this next overload.
-// This way a code like fix<N> can only refer to the previous function.
-template<int N,typename T>
-inline internal::VariableAndFixedInt<N> fix(T val) { return internal::VariableAndFixedInt<N>(internal::convert_index<int>(val)); }
-#endif
+constexpr internal::FixedInt<N> fix{};
 
 #else // EIGEN_PARSED_BY_DOXYGEN
 
@@ -221,14 +209,6 @@ inline internal::VariableAndFixedInt<N> fix(T val) { return internal::VariableAn
   * <a href="http://en.cppreference.com/w/cpp/types/integral_constant">\c std::integral_constant </a><tt> <int,N> </tt>
   * Here, \c fix<N> is thus an object of type \c internal::FixedInt<N>.
   *
-  * In c++98/11, it is implemented as a function:
-  * \code
-  * template<int N> inline internal::FixedInt<N> fix();
-  * \endcode
-  * Here internal::FixedInt<N> is thus a pointer to function.
-  *
-  * If for some reason you want a true object in c++98 then you can write: \code fix<N>() \endcode which is also valid in c++14.
-  *
   * \sa fix<N>(int), seq, seqN
   */
 template<int N>
diff --git a/libs/eigen/Eigen/src/Core/util/MKL_support.h b/libs/eigen/Eigen/src/Core/util/MKL_support.h
old mode 100755
new mode 100644
index 17963fa..9cf5f6f
--- a/libs/eigen/Eigen/src/Core/util/MKL_support.h
+++ b/libs/eigen/Eigen/src/Core/util/MKL_support.h
@@ -120,6 +120,8 @@
 #include "../../misc/blas.h"
 #endif
 
+#include "../InternalHeaderCheck.h"
+
 namespace Eigen {
 
 typedef std::complex<double> dcomplex;
diff --git a/libs/eigen/Eigen/src/Core/util/Macros.h b/libs/eigen/Eigen/src/Core/util/Macros.h
index 986c3d4..4b8b277 100644
--- a/libs/eigen/Eigen/src/Core/util/Macros.h
+++ b/libs/eigen/Eigen/src/Core/util/Macros.h
@@ -10,6 +10,7 @@
 
 #ifndef EIGEN_MACROS_H
 #define EIGEN_MACROS_H
+#include "../InternalHeaderCheck.h"
 
 //------------------------------------------------------------------------------------------
 // Eigen version and basic defaults
@@ -17,7 +18,7 @@
 
 #define EIGEN_WORLD_VERSION 3
 #define EIGEN_MAJOR_VERSION 4
-#define EIGEN_MINOR_VERSION 0
+#define EIGEN_MINOR_VERSION 90
 
 #define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \
                                       (EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \
@@ -86,13 +87,20 @@
   #define EIGEN_COMP_LLVM 0
 #endif
 
-/// \internal EIGEN_COMP_ICC set to __INTEL_COMPILER if the compiler is Intel compiler, 0 otherwise
+/// \internal EIGEN_COMP_ICC set to __INTEL_COMPILER if the compiler is Intel icc compiler, 0 otherwise
 #if defined(__INTEL_COMPILER)
   #define EIGEN_COMP_ICC __INTEL_COMPILER
 #else
   #define EIGEN_COMP_ICC 0
 #endif
 
+/// \internal EIGEN_COMP_CLANGICC set to __INTEL_CLANG_COMPILER if the compiler is Intel icx compiler, 0 otherwise
+#if defined(__INTEL_CLANG_COMPILER)
+  #define EIGEN_COMP_CLANGICC __INTEL_CLANG_COMPILER
+#else
+  #define EIGEN_COMP_CLANGICC 0
+#endif
+
 /// \internal EIGEN_COMP_MINGW set to 1 if the compiler is mingw
 #if defined(__MINGW32__)
   #define EIGEN_COMP_MINGW 1
@@ -128,10 +136,6 @@
 
 // For the record, here is a table summarizing the possible values for EIGEN_COMP_MSVC:
 //  name        ver   MSC_VER
-//  2008         9      1500
-//  2010        10      1600
-//  2012        11      1700
-//  2013        12      1800
 //  2015        14      1900
 //  "15"        15      1900
 //  2017-14.1   15.0    1910
@@ -139,6 +143,9 @@
 //  2017-14.12  15.5    1912
 //  2017-14.13  15.6    1913
 //  2017-14.14  15.7    1914
+//  2017        15.8    1915
+//  2017        15.9    1916
+//  2019 RTW    16.0    1920
 
 /// \internal EIGEN_COMP_MSVC_LANG set to _MSVC_LANG if the compiler is Microsoft Visual C++, 0 otherwise.
 #if defined(_MSVC_LANG)
@@ -193,9 +200,52 @@
   #define EIGEN_COMP_EMSCRIPTEN 0
 #endif
 
+/// \internal EIGEN_COMP_FCC set to FCC version if the compiler is Fujitsu Compiler (traditional mode)
+/// \note The Fujitsu C/C++ compiler uses the traditional mode based
+/// on EDG g++ 6.1 by default or if envoked with the -Nnoclang flag
+#if defined(__FUJITSU)
+  #define EIGEN_COMP_FCC (__FCC_major__*100+__FCC_minor__*10+__FCC_patchlevel__)
+#else
+  #define EIGEN_COMP_FCC 0
+#endif
+
+/// \internal EIGEN_COMP_CLANGFCC set to FCC version if the compiler is Fujitsu Compiler (Clang mode)
+/// \note The Fujitsu C/C++ compiler uses the non-traditional mode
+/// based on Clang 7.1.0 if envoked with the -Nclang flag
+#if defined(__CLANG_FUJITSU)
+  #define EIGEN_COMP_CLANGFCC (__FCC_major__*100+__FCC_minor__*10+__FCC_patchlevel__)
+#else
+  #define EIGEN_COMP_CLANGFCC 0
+#endif
+
+/// \internal EIGEN_COMP_CPE set to CPE version if the compiler is HPE Cray Compiler (GCC based)
+/// \note This is the SVE-enabled C/C++ compiler from the HPE Cray
+/// Programming Environment (CPE) based on Cray GCC 8.1
+#if defined(_CRAYC) && !defined(__clang__)
+  #define EIGEN_COMP_CPE (_RELEASE_MAJOR*100+_RELEASE_MINOR*10+_RELEASE_PATCHLEVEL)
+#else
+  #define EIGEN_COMP_CPE 0
+#endif
+
+/// \internal EIGEN_COMP_CLANGCPE set to CPE version if the compiler is HPE Cray Compiler (Clang based)
+/// \note This is the C/C++ compiler from the HPE Cray Programming
+/// Environment (CPE) based on Cray Clang 11.0 without SVE-support
+#if defined(_CRAYC) && defined(__clang__)
+  #define EIGEN_COMP_CLANGCPE (_RELEASE_MAJOR*100+_RELEASE_MINOR*10+_RELEASE_PATCHLEVEL)
+#else
+  #define EIGEN_COMP_CLANGCPE 0
+#endif
+
+/// \internal EIGEN_COMP_LCC set to 1 if the compiler is MCST-LCC (MCST eLbrus Compiler Collection)
+#if defined(__LCC__) && defined(__MCST__)
+  #define EIGEN_COMP_LCC (__LCC__*100+__LCC_MINOR__)
+#else
+  #define EIGEN_COMP_LCC 0
+#endif
+
 
 /// \internal EIGEN_GNUC_STRICT set to 1 if the compiler is really GCC and not a compatible compiler (e.g., ICC, clang, mingw, etc.)
-#if EIGEN_COMP_GNUC && !(EIGEN_COMP_CLANG || EIGEN_COMP_ICC || EIGEN_COMP_MINGW || EIGEN_COMP_PGI || EIGEN_COMP_IBM || EIGEN_COMP_ARM || EIGEN_COMP_EMSCRIPTEN)
+#if EIGEN_COMP_GNUC && !(EIGEN_COMP_CLANG || EIGEN_COMP_ICC || EIGEN_COMP_CLANGICC || EIGEN_COMP_MINGW || EIGEN_COMP_PGI || EIGEN_COMP_IBM || EIGEN_COMP_ARM || EIGEN_COMP_EMSCRIPTEN || EIGEN_COMP_FCC || EIGEN_COMP_CLANGFCC || EIGEN_COMP_CPE || EIGEN_COMP_CLANGCPE || EIGEN_COMP_LCC)
   #define EIGEN_COMP_GNUC_STRICT 1
 #else
   #define EIGEN_COMP_GNUC_STRICT 0
@@ -212,14 +262,6 @@
   #define EIGEN_GNUC_AT(x,y)       0
 #endif
 
-// FIXME: could probably be removed as we do not support gcc 3.x anymore
-#if EIGEN_COMP_GNUC && (__GNUC__ <= 3)
-#define EIGEN_GCC3_OR_OLDER 1
-#else
-#define EIGEN_GCC3_OR_OLDER 0
-#endif
-
-
 
 //------------------------------------------------------------------------------------------
 // Architecture identification, EIGEN_ARCH_*
@@ -575,13 +617,6 @@
 // Detect Compiler/Architecture/OS specific features
 //------------------------------------------------------------------------------------------
 
-#if EIGEN_GNUC_AT_MOST(4,3) && !EIGEN_COMP_CLANG
-  // see bug 89
-  #define EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO 0
-#else
-  #define EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO 1
-#endif
-
 // Cross compiler wrapper around LLVM's __has_builtin
 #ifdef __has_builtin
 #  define EIGEN_HAS_BUILTIN(x) __has_builtin(x)
@@ -595,16 +630,6 @@
 # define __has_feature(x) 0
 #endif
 
-// Some old compilers do not support template specializations like:
-// template<typename T,int N> void foo(const T x[N]);
-#if !(   EIGEN_COMP_CLANG && (   (EIGEN_COMP_CLANG<309)                                                       \
-                              || (defined(__apple_build_version__) && (__apple_build_version__ < 9000000)))  \
-      || EIGEN_COMP_GNUC_STRICT && EIGEN_COMP_GNUC<49)
-#define EIGEN_HAS_STATIC_ARRAY_TEMPLATE 1
-#else
-#define EIGEN_HAS_STATIC_ARRAY_TEMPLATE 0
-#endif
-
 // The macro EIGEN_CPLUSPLUS is a replacement for __cplusplus/_MSVC_LANG that
 // works for both platforms, indicating the C++ standard version number.
 //
@@ -622,14 +647,14 @@
 #define EIGEN_CPLUSPLUS 0
 #endif
 
-// The macro EIGEN_COMP_CXXVER defines the c++ verson expected by the compiler.
+// The macro EIGEN_COMP_CXXVER defines the c++ version expected by the compiler.
 // For instance, if compiling with gcc and -std=c++17, then EIGEN_COMP_CXXVER
 // is defined to 17.
-#if EIGEN_CPLUSPLUS > 201703L
+#if EIGEN_CPLUSPLUS >= 202002L
   #define EIGEN_COMP_CXXVER 20
-#elif EIGEN_CPLUSPLUS > 201402L
+#elif EIGEN_CPLUSPLUS >= 201703L
   #define EIGEN_COMP_CXXVER 17
-#elif EIGEN_CPLUSPLUS > 201103L
+#elif EIGEN_CPLUSPLUS >= 201402L
   #define EIGEN_COMP_CXXVER 14
 #elif EIGEN_CPLUSPLUS >= 201103L
   #define EIGEN_COMP_CXXVER 11
@@ -637,73 +662,37 @@
   #define EIGEN_COMP_CXXVER 03
 #endif
 
-#ifndef EIGEN_HAS_CXX14_VARIABLE_TEMPLATES
-  #if defined(__cpp_variable_templates) && __cpp_variable_templates >= 201304 && EIGEN_MAX_CPP_VER>=14
-    #define EIGEN_HAS_CXX14_VARIABLE_TEMPLATES 1
-  #else
-    #define EIGEN_HAS_CXX14_VARIABLE_TEMPLATES 0
-  #endif
-#endif
-
 
 // The macros EIGEN_HAS_CXX?? defines a rough estimate of available c++ features
-// but in practice we should not rely on them but rather on the availabilty of
+// but in practice we should not rely on them but rather on the availability of
 // individual features as defined later.
 // This is why there is no EIGEN_HAS_CXX17.
-// FIXME: get rid of EIGEN_HAS_CXX14 and maybe even EIGEN_HAS_CXX11.
-#if EIGEN_MAX_CPP_VER>=11 && EIGEN_COMP_CXXVER>=11
-#define EIGEN_HAS_CXX11 1
-#else
-#define EIGEN_HAS_CXX11 0
-#endif
-
-#if EIGEN_MAX_CPP_VER>=14 && EIGEN_COMP_CXXVER>=14
-#define EIGEN_HAS_CXX14 1
-#else
-#define EIGEN_HAS_CXX14 0
-#endif
-
-// Do we support r-value references?
-#ifndef EIGEN_HAS_RVALUE_REFERENCES
-#if EIGEN_MAX_CPP_VER>=11 && \
-    (__has_feature(cxx_rvalue_references) || \
-     (EIGEN_COMP_CXXVER >= 11) || (EIGEN_COMP_MSVC >= 1600))
-  #define EIGEN_HAS_RVALUE_REFERENCES 1
-#else
-  #define EIGEN_HAS_RVALUE_REFERENCES 0
-#endif
+#if EIGEN_MAX_CPP_VER<14 || EIGEN_COMP_CXXVER<14 || (EIGEN_COMP_MSVC && EIGEN_COMP_MSVC < 1900) || \
+  (EIGEN_COMP_ICC && EIGEN_COMP_ICC < 1500) || (EIGEN_COMP_NVCC && EIGEN_COMP_NVCC < 80000) ||     \
+  (EIGEN_COMP_CLANG && ((EIGEN_COMP_CLANG<309) || (defined(__apple_build_version__) && (__apple_build_version__ < 9000000)))) || \
+  (EIGEN_COMP_GNUC_STRICT && EIGEN_COMP_GNUC<51)
+#error This compiler appears to be too old to be supported by Eigen
 #endif
 
 // Does the compiler support C99?
 // Need to include <cmath> to make sure _GLIBCXX_USE_C99 gets defined
 #include <cmath>
 #ifndef EIGEN_HAS_C99_MATH
-#if EIGEN_MAX_CPP_VER>=11 && \
-    ((defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901))       \
+#if ((defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901))       \
   || (defined(__GNUC__) && defined(_GLIBCXX_USE_C99)) \
   || (defined(_LIBCPP_VERSION) && !defined(_MSC_VER)) \
-  || (EIGEN_COMP_MSVC >= 1900) || defined(SYCL_DEVICE_ONLY))
+  || (EIGEN_COMP_MSVC) || defined(SYCL_DEVICE_ONLY))
   #define EIGEN_HAS_C99_MATH 1
 #else
   #define EIGEN_HAS_C99_MATH 0
 #endif
 #endif
 
-// Does the compiler support result_of?
-// result_of was deprecated in c++17 and removed in c++ 20
-#ifndef EIGEN_HAS_STD_RESULT_OF
-#if EIGEN_HAS_CXX11 && EIGEN_COMP_CXXVER < 17
-#define EIGEN_HAS_STD_RESULT_OF 1
-#else
-#define EIGEN_HAS_STD_RESULT_OF 0
-#endif
-#endif
-
 // Does the compiler support std::hash?
 #ifndef EIGEN_HAS_STD_HASH
 // The std::hash struct is defined in C++11 but is not labelled as a __device__
 // function and is not constexpr, so cannot be used on device.
-#if EIGEN_HAS_CXX11 && !defined(EIGEN_GPU_COMPILE_PHASE)
+#if !defined(EIGEN_GPU_COMPILE_PHASE)
 #define EIGEN_HAS_STD_HASH 1
 #else
 #define EIGEN_HAS_STD_HASH 0
@@ -718,128 +707,7 @@
 #endif
 #endif
 
-#ifndef EIGEN_HAS_ALIGNAS
-#if EIGEN_MAX_CPP_VER>=11 && EIGEN_HAS_CXX11 &&   \
-      (     __has_feature(cxx_alignas)            \
-        ||  EIGEN_HAS_CXX14                       \
-        || (EIGEN_COMP_MSVC >= 1800)              \
-        || (EIGEN_GNUC_AT_LEAST(4,8))             \
-        || (EIGEN_COMP_CLANG>=305)                \
-        || (EIGEN_COMP_ICC>=1500)                 \
-        || (EIGEN_COMP_PGI>=1500)                 \
-        || (EIGEN_COMP_SUNCC>=0x5130))
-#define EIGEN_HAS_ALIGNAS 1
-#else
-#define EIGEN_HAS_ALIGNAS 0
-#endif
-#endif
-
-// Does the compiler support type_traits?
-// - full support of type traits was added only to GCC 5.1.0.
-// - 20150626 corresponds to the last release of 4.x libstdc++
-#ifndef EIGEN_HAS_TYPE_TRAITS
-#if EIGEN_MAX_CPP_VER>=11 && (EIGEN_HAS_CXX11 || EIGEN_COMP_MSVC >= 1700) \
-  && ((!EIGEN_COMP_GNUC_STRICT) || EIGEN_GNUC_AT_LEAST(5, 1)) \
-  && ((!defined(__GLIBCXX__))   || __GLIBCXX__ > 20150626)
-#define EIGEN_HAS_TYPE_TRAITS 1
-#define EIGEN_INCLUDE_TYPE_TRAITS
-#else
-#define EIGEN_HAS_TYPE_TRAITS 0
-#endif
-#endif
-
-// Does the compiler support variadic templates?
-#ifndef EIGEN_HAS_VARIADIC_TEMPLATES
-#if EIGEN_MAX_CPP_VER>=11 && (EIGEN_COMP_CXXVER >= 11) \
-  && (!defined(__NVCC__) || !EIGEN_ARCH_ARM_OR_ARM64 || (EIGEN_COMP_NVCC >= 80000) )
-    // ^^ Disable the use of variadic templates when compiling with versions of nvcc older than 8.0 on ARM devices:
-    //    this prevents nvcc from crashing when compiling Eigen on Tegra X1
-#define EIGEN_HAS_VARIADIC_TEMPLATES 1
-#elif  EIGEN_MAX_CPP_VER>=11 && (EIGEN_COMP_CXXVER >= 11) && defined(SYCL_DEVICE_ONLY)
-#define EIGEN_HAS_VARIADIC_TEMPLATES 1
-#else
-#define EIGEN_HAS_VARIADIC_TEMPLATES 0
-#endif
-#endif
-
-// Does the compiler fully support const expressions? (as in c++14)
-#ifndef EIGEN_HAS_CONSTEXPR
-  #if defined(EIGEN_CUDACC)
-  // Const expressions are supported provided that c++11 is enabled and we're using either clang or nvcc 7.5 or above
-    #if EIGEN_MAX_CPP_VER>=14 && (EIGEN_COMP_CXXVER >= 11 && (EIGEN_COMP_CLANG || EIGEN_COMP_NVCC >= 70500))
-      #define EIGEN_HAS_CONSTEXPR 1
-    #endif
-  #elif EIGEN_MAX_CPP_VER>=14 && (__has_feature(cxx_relaxed_constexpr) || (EIGEN_COMP_CXXVER >= 14) || \
-    (EIGEN_GNUC_AT_LEAST(4,8) && (EIGEN_COMP_CXXVER >= 11)) || \
-    (EIGEN_COMP_CLANG >= 306 && (EIGEN_COMP_CXXVER >= 11)))
-    #define EIGEN_HAS_CONSTEXPR 1
-  #endif
-
-  #ifndef EIGEN_HAS_CONSTEXPR
-    #define EIGEN_HAS_CONSTEXPR 0
-  #endif
-
-#endif // EIGEN_HAS_CONSTEXPR
-
-#if EIGEN_HAS_CONSTEXPR
 #define EIGEN_CONSTEXPR constexpr
-#else
-#define EIGEN_CONSTEXPR
-#endif
-
-// Does the compiler support C++11 math?
-// Let's be conservative and enable the default C++11 implementation only if we are sure it exists
-#ifndef EIGEN_HAS_CXX11_MATH
-  #if EIGEN_MAX_CPP_VER>=11 && ((EIGEN_COMP_CXXVER > 11) || (EIGEN_COMP_CXXVER == 11) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_MSVC || EIGEN_COMP_ICC)  \
-      && (EIGEN_ARCH_i386_OR_x86_64) && (EIGEN_OS_GNULINUX || EIGEN_OS_WIN_STRICT || EIGEN_OS_MAC))
-    #define EIGEN_HAS_CXX11_MATH 1
-  #else
-    #define EIGEN_HAS_CXX11_MATH 0
-  #endif
-#endif
-
-// Does the compiler support proper C++11 containers?
-#ifndef EIGEN_HAS_CXX11_CONTAINERS
-  #if    EIGEN_MAX_CPP_VER>=11 && \
-         ((EIGEN_COMP_CXXVER > 11) \
-      || ((EIGEN_COMP_CXXVER == 11) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_MSVC || EIGEN_COMP_ICC>=1400)))
-    #define EIGEN_HAS_CXX11_CONTAINERS 1
-  #else
-    #define EIGEN_HAS_CXX11_CONTAINERS 0
-  #endif
-#endif
-
-// Does the compiler support C++11 noexcept?
-#ifndef EIGEN_HAS_CXX11_NOEXCEPT
-  #if    EIGEN_MAX_CPP_VER>=11 && \
-         (__has_feature(cxx_noexcept) \
-      || (EIGEN_COMP_CXXVER > 11) \
-      || ((EIGEN_COMP_CXXVER == 11) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_MSVC || EIGEN_COMP_ICC>=1400)))
-    #define EIGEN_HAS_CXX11_NOEXCEPT 1
-  #else
-    #define EIGEN_HAS_CXX11_NOEXCEPT 0
-  #endif
-#endif
-
-#ifndef EIGEN_HAS_CXX11_ATOMIC
-  #if    EIGEN_MAX_CPP_VER>=11 && \
-         (__has_feature(cxx_atomic) \
-      || (EIGEN_COMP_CXXVER > 11) \
-      || ((EIGEN_COMP_CXXVER == 11) && (EIGEN_COMP_MSVC==0 || EIGEN_COMP_MSVC >= 1700)))
-    #define EIGEN_HAS_CXX11_ATOMIC 1
-  #else
-    #define EIGEN_HAS_CXX11_ATOMIC 0
-  #endif
-#endif
-
-#ifndef EIGEN_HAS_CXX11_OVERRIDE_FINAL
-  #if    EIGEN_MAX_CPP_VER>=11 && \
-       (EIGEN_COMP_CXXVER >= 11 || EIGEN_COMP_MSVC >= 1700)
-    #define EIGEN_HAS_CXX11_OVERRIDE_FINAL 1
-  #else
-    #define EIGEN_HAS_CXX11_OVERRIDE_FINAL 0
-  #endif
-#endif
 
 // NOTE: the required Apple's clang version is very conservative
 //       and it could be that XCode 9 works just fine.
@@ -858,7 +726,7 @@
 #endif
 #endif
 
-#if defined(EIGEN_CUDACC) && EIGEN_HAS_CONSTEXPR
+#if defined(EIGEN_CUDACC)
   // While available already with c++11, this is useful mostly starting with c++14 and relaxed constexpr rules
   #if defined(__NVCC__)
     // nvcc considers constexpr functions as __host__ __device__ with the option --expt-relaxed-constexpr
@@ -918,15 +786,11 @@
 #endif
 #endif
 
-// EIGEN_ALWAYS_INLINE is the stronget, it has the effect of making the function inline and adding every possible
+// EIGEN_ALWAYS_INLINE is the strongest, it has the effect of making the function inline and adding every possible
 // attribute to maximize inlining. This should only be used when really necessary: in particular,
 // it uses __attribute__((always_inline)) on GCC, which most of the time is useless and can severely harm compile times.
 // FIXME with the always_inline attribute,
-// gcc 3.4.x and 4.1 reports the following compilation error:
-//   Eval.h:91: sorry, unimplemented: inlining failed in call to 'const Eigen::Eval<Derived> Eigen::MatrixBase<Scalar, Derived>::eval() const'
-//    : function body not available
-//   See also bug 1367
-#if EIGEN_GNUC_AT_LEAST(4,2) && !defined(SYCL_DEVICE_ONLY)
+#if EIGEN_COMP_GNUC && !defined(SYCL_DEVICE_ONLY)
 #define EIGEN_ALWAYS_INLINE __attribute__((always_inline)) inline
 #else
 #define EIGEN_ALWAYS_INLINE EIGEN_STRONG_INLINE
@@ -998,38 +862,7 @@
     #define eigen_plain_assert(x)
   #endif
 #else
-  #if EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO
-    namespace Eigen {
-    namespace internal {
-    inline bool copy_bool(bool b) { return b; }
-    }
-    }
     #define eigen_plain_assert(x) assert(x)
-  #else
-    // work around bug 89
-    #include <cstdlib>   // for abort
-    #include <iostream>  // for std::cerr
-
-    namespace Eigen {
-    namespace internal {
-    // trivial function copying a bool. Must be EIGEN_DONT_INLINE, so we implement it after including Eigen headers.
-    // see bug 89.
-    namespace {
-    EIGEN_DONT_INLINE bool copy_bool(bool b) { return b; }
-    }
-    inline void assert_fail(const char *condition, const char *function, const char *file, int line)
-    {
-      std::cerr << "assertion failed: " << condition << " in function " << function << " at " << file << ":" << line << std::endl;
-      abort();
-    }
-    }
-    }
-    #define eigen_plain_assert(x) \
-      do { \
-        if(!Eigen::internal::copy_bool(x)) \
-          Eigen::internal::assert_fail(EIGEN_MAKESTRING(x), __PRETTY_FUNCTION__, __FILE__, __LINE__); \
-      } while(false)
-  #endif
 #endif
 
 // eigen_assert can be overridden
@@ -1067,10 +900,26 @@
 #define EIGEN_UNUSED
 #endif
 
+#if EIGEN_COMP_GNUC
+  #define EIGEN_PRAGMA(tokens) _Pragma(#tokens)
+  #define EIGEN_DIAGNOSTICS(tokens) EIGEN_PRAGMA(GCC diagnostic tokens)
+  #define EIGEN_DIAGNOSTICS_OFF(msc, gcc) EIGEN_DIAGNOSTICS(gcc)
+#elif EIGEN_COMP_MSVC
+  #define EIGEN_PRAGMA(tokens) __pragma(tokens)
+  #define EIGEN_DIAGNOSTICS(tokens) EIGEN_PRAGMA(warning(tokens))
+  #define EIGEN_DIAGNOSTICS_OFF(msc, gcc) EIGEN_DIAGNOSTICS(msc)
+#else
+  #define EIGEN_PRAGMA(tokens)
+  #define EIGEN_DIAGNOSTICS(tokens)
+  #define EIGEN_DIAGNOSTICS_OFF(msc, gcc)
+#endif
+
+#define EIGEN_DISABLE_DEPRECATED_WARNING EIGEN_DIAGNOSTICS_OFF(disable : 4996, ignored "-Wdeprecated-declarations")
+
 // Suppresses 'unused variable' warnings.
 namespace Eigen {
   namespace internal {
-    template<typename T> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ignore_unused_variable(const T&) {}
+    template<typename T> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void ignore_unused_variable(const T&) {}
   }
 }
 #define EIGEN_UNUSED_VARIABLE(var) Eigen::internal::ignore_unused_variable(var);
@@ -1130,8 +979,17 @@ namespace Eigen {
       // General, Altivec, VSX.
       #define EIGEN_OPTIMIZATION_BARRIER(X)  __asm__  ("" : "+r,v,wa" (X));
     #elif EIGEN_ARCH_ARM_OR_ARM64
-      // General, NEON.
-      #define EIGEN_OPTIMIZATION_BARRIER(X)  __asm__  ("" : "+g,w" (X));
+      #ifdef __ARM_FP
+        // General, VFP or NEON.
+        // Clang doesn't like "r",
+        //    error: non-trivial scalar-to-vector conversion, possible invalid
+        //           constraint for vector typ
+        #define EIGEN_OPTIMIZATION_BARRIER(X)  __asm__  ("" : "+g,w" (X));
+      #else
+        // Arm without VFP or NEON.
+        // "w" constraint will not compile.
+        #define EIGEN_OPTIMIZATION_BARRIER(X)  __asm__  ("" : "+g" (X));
+      #endif
     #elif EIGEN_ARCH_i386_OR_x86_64
       // General, SSE.
       #define EIGEN_OPTIMIZATION_BARRIER(X)  __asm__  ("" : "+g,x" (X));
@@ -1185,8 +1043,8 @@ namespace Eigen {
   #define EIGEN_USING_STD(FUNC) using std::FUNC;
 #endif
 
-#if EIGEN_COMP_MSVC_STRICT && (EIGEN_COMP_MSVC < 1900 || (EIGEN_COMP_MSVC == 1900 && EIGEN_COMP_NVCC))
-  // For older MSVC versions, as well as 1900 && CUDA 8, using the base operator is necessary,
+#if EIGEN_COMP_MSVC_STRICT && EIGEN_COMP_NVCC
+  // Wwhen compiling with NVCC, using the base operator is necessary,
   //   otherwise we get duplicate definition errors
   // For later MSVC versions, we require explicit operator= definition, otherwise we get
   //   use of implicitly deleted operator errors.
@@ -1215,11 +1073,7 @@ namespace Eigen {
  * \brief Macro to explicitly define the default copy constructor.
  * This is necessary, because the implicit definition is deprecated if the copy-assignment is overridden.
  */
-#if EIGEN_HAS_CXX11
-#define EIGEN_DEFAULT_COPY_CONSTRUCTOR(CLASS) CLASS(const CLASS&) = default;
-#else
-#define EIGEN_DEFAULT_COPY_CONSTRUCTOR(CLASS)
-#endif
+#define EIGEN_DEFAULT_COPY_CONSTRUCTOR(CLASS) EIGEN_DEVICE_FUNC CLASS(const CLASS&) = default;
 
 
 
@@ -1239,15 +1093,9 @@ namespace Eigen {
  *
  * Hiding the default destructor lead to problems in C++03 mode together with boost::multiprecision
  */
-#if EIGEN_HAS_CXX11
 #define EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(Derived)  \
-    Derived() = default; \
-    ~Derived() = default;
-#else
-#define EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(Derived)  \
-    Derived() {}; \
-    /* ~Derived() {}; */
-#endif
+    EIGEN_DEVICE_FUNC Derived() = default; \
+    EIGEN_DEVICE_FUNC ~Derived() = default;
 
 
 
@@ -1285,35 +1133,6 @@ namespace Eigen {
   typedef typename Base::PacketScalar PacketScalar;
 
 
-#define EIGEN_PLAIN_ENUM_MIN(a,b) (((int)a <= (int)b) ? (int)a : (int)b)
-#define EIGEN_PLAIN_ENUM_MAX(a,b) (((int)a >= (int)b) ? (int)a : (int)b)
-
-// EIGEN_SIZE_MIN_PREFER_DYNAMIC gives the min between compile-time sizes. 0 has absolute priority, followed by 1,
-// followed by Dynamic, followed by other finite values. The reason for giving Dynamic the priority over
-// finite values is that min(3, Dynamic) should be Dynamic, since that could be anything between 0 and 3.
-#define EIGEN_SIZE_MIN_PREFER_DYNAMIC(a,b) (((int)a == 0 || (int)b == 0) ? 0 \
-                           : ((int)a == 1 || (int)b == 1) ? 1 \
-                           : ((int)a == Dynamic || (int)b == Dynamic) ? Dynamic \
-                           : ((int)a <= (int)b) ? (int)a : (int)b)
-
-// EIGEN_SIZE_MIN_PREFER_FIXED is a variant of EIGEN_SIZE_MIN_PREFER_DYNAMIC comparing MaxSizes. The difference is that finite values
-// now have priority over Dynamic, so that min(3, Dynamic) gives 3. Indeed, whatever the actual value is
-// (between 0 and 3), it is not more than 3.
-#define EIGEN_SIZE_MIN_PREFER_FIXED(a,b)  (((int)a == 0 || (int)b == 0) ? 0 \
-                           : ((int)a == 1 || (int)b == 1) ? 1 \
-                           : ((int)a == Dynamic && (int)b == Dynamic) ? Dynamic \
-                           : ((int)a == Dynamic) ? (int)b \
-                           : ((int)b == Dynamic) ? (int)a \
-                           : ((int)a <= (int)b) ? (int)a : (int)b)
-
-// see EIGEN_SIZE_MIN_PREFER_DYNAMIC. No need for a separate variant for MaxSizes here.
-#define EIGEN_SIZE_MAX(a,b) (((int)a == Dynamic || (int)b == Dynamic) ? Dynamic \
-                           : ((int)a >= (int)b) ? (int)a : (int)b)
-
-#define EIGEN_LOGICAL_XOR(a,b) (((a) || (b)) && !((a) && (b)))
-
-#define EIGEN_IMPLIES(a,b) (!(a) || (b))
-
 #if EIGEN_HAS_BUILTIN(__builtin_expect) || EIGEN_COMP_GNUC
 #define EIGEN_PREDICT_FALSE(x) (__builtin_expect(x, false))
 #define EIGEN_PREDICT_TRUE(x) (__builtin_expect(false || (x), true))
@@ -1352,16 +1171,9 @@ namespace Eigen {
   CwiseBinaryOp<EIGEN_CAT(EIGEN_CAT(internal::scalar_,OPNAME),_op)<SCALAR,typename internal::traits<EXPR>::Scalar>, \
                 const typename internal::plain_constant_type<EXPR,SCALAR>::type, const EXPR>
 
-// Workaround for MSVC 2010 (see ML thread "patch with compile for for MSVC 2010")
-#if EIGEN_COMP_MSVC_STRICT && (EIGEN_COMP_MSVC_STRICT<=1600)
-#define EIGEN_MSVC10_WORKAROUND_BINARYOP_RETURN_TYPE(X) typename internal::enable_if<true,X>::type
-#else
-#define EIGEN_MSVC10_WORKAROUND_BINARYOP_RETURN_TYPE(X) X
-#endif
-
 #define EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(METHOD,OPNAME) \
   template <typename T> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
-  EIGEN_MSVC10_WORKAROUND_BINARYOP_RETURN_TYPE(const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,typename internal::promote_scalar_arg<Scalar EIGEN_COMMA T EIGEN_COMMA EIGEN_SCALAR_BINARY_SUPPORTED(OPNAME,Scalar,T)>::type,OPNAME))\
+  const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,typename internal::promote_scalar_arg<Scalar EIGEN_COMMA T EIGEN_COMMA EIGEN_SCALAR_BINARY_SUPPORTED(OPNAME,Scalar,T)>::type,OPNAME)\
   (METHOD)(const T& scalar) const { \
     typedef typename internal::promote_scalar_arg<Scalar,T,EIGEN_SCALAR_BINARY_SUPPORTED(OPNAME,Scalar,T)>::type PromotedT; \
     return EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,PromotedT,OPNAME)(derived(), \
@@ -1370,7 +1182,7 @@ namespace Eigen {
 
 #define EIGEN_MAKE_SCALAR_BINARY_OP_ONTHELEFT(METHOD,OPNAME) \
   template <typename T> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE friend \
-  EIGEN_MSVC10_WORKAROUND_BINARYOP_RETURN_TYPE(const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(typename internal::promote_scalar_arg<Scalar EIGEN_COMMA T EIGEN_COMMA EIGEN_SCALAR_BINARY_SUPPORTED(OPNAME,T,Scalar)>::type,Derived,OPNAME)) \
+  const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(typename internal::promote_scalar_arg<Scalar EIGEN_COMMA T EIGEN_COMMA EIGEN_SCALAR_BINARY_SUPPORTED(OPNAME,T,Scalar)>::type,Derived,OPNAME) \
   (METHOD)(const T& scalar, const StorageBaseType& matrix) { \
     typedef typename internal::promote_scalar_arg<Scalar,T,EIGEN_SCALAR_BINARY_SUPPORTED(OPNAME,T,Scalar)>::type PromotedT; \
     return EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(PromotedT,Derived,OPNAME)( \
@@ -1408,26 +1220,12 @@ namespace Eigen {
 #endif
 
 
-#if EIGEN_HAS_CXX11_NOEXCEPT
-#   define EIGEN_INCLUDE_TYPE_TRAITS
-#   define EIGEN_NOEXCEPT noexcept
-#   define EIGEN_NOEXCEPT_IF(x) noexcept(x)
-#   define EIGEN_NO_THROW noexcept(true)
-#   define EIGEN_EXCEPTION_SPEC(X) noexcept(false)
-#else
-#   define EIGEN_NOEXCEPT
-#   define EIGEN_NOEXCEPT_IF(x)
-#   define EIGEN_NO_THROW throw()
-#   if EIGEN_COMP_MSVC || EIGEN_COMP_CXXVER>=17
-      // MSVC does not support exception specifications (warning C4290),
-      // and they are deprecated in c++11 anyway. This is even an error in c++17.
-#     define EIGEN_EXCEPTION_SPEC(X) throw()
-#   else
-#     define EIGEN_EXCEPTION_SPEC(X) throw(X)
-#   endif
-#endif
+#define EIGEN_NOEXCEPT noexcept
+#define EIGEN_NOEXCEPT_IF(x) noexcept(x)
+#define EIGEN_NO_THROW noexcept(true)
+#define EIGEN_EXCEPTION_SPEC(X) noexcept(false)
+
 
-#if EIGEN_HAS_VARIADIC_TEMPLATES
 // The all function is used to enable a variadic version of eigen_assert which can take a parameter pack as its input.
 namespace Eigen {
 namespace internal {
@@ -1439,16 +1237,10 @@ bool all(T t, Ts ... ts){ return t && all(ts...); }
 
 }
 }
-#endif
 
-#if EIGEN_HAS_CXX11_OVERRIDE_FINAL
 // provide override and final specifiers if they are available:
-#   define EIGEN_OVERRIDE override
-#   define EIGEN_FINAL final
-#else
-#   define EIGEN_OVERRIDE
-#   define EIGEN_FINAL
-#endif
+#define EIGEN_OVERRIDE override
+#define EIGEN_FINAL final
 
 // Wrapping #pragma unroll in a macro since it is required for SYCL
 #if defined(SYCL_DEVICE_ONLY)
@@ -1461,4 +1253,12 @@ bool all(T t, Ts ... ts){ return t && all(ts...); }
   #define EIGEN_UNROLL_LOOP
 #endif
 
+// Notice: Use this macro with caution. The code in the if body should still
+// compile with C++14.
+#if defined(EIGEN_HAS_CXX17_IFCONSTEXPR)
+#define EIGEN_IF_CONSTEXPR(X) if constexpr (X)
+#else
+#define EIGEN_IF_CONSTEXPR(X) if (X)
+#endif
+
 #endif // EIGEN_MACROS_H
diff --git a/libs/eigen/Eigen/src/Core/util/Memory.h b/libs/eigen/Eigen/src/Core/util/Memory.h
index 875318c..e4a8793 100644
--- a/libs/eigen/Eigen/src/Core/util/Memory.h
+++ b/libs/eigen/Eigen/src/Core/util/Memory.h
@@ -59,6 +59,8 @@
 
 #endif
 
+#include "../InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
@@ -94,19 +96,17 @@ inline void throw_std_bad_alloc()
 
 /* ----- Hand made implementations of aligned malloc/free and realloc ----- */
 
-/** \internal Like malloc, but the returned pointer is guaranteed to be 16-byte aligned.
-  * Fast, but wastes 16 additional bytes of memory. Does not throw any exception.
+/** \internal Like malloc, but the returned pointer is guaranteed to be aligned to `alignment`.
+  * Fast, but wastes `alignment` additional bytes of memory. Does not throw any exception.
   */
 EIGEN_DEVICE_FUNC inline void* handmade_aligned_malloc(std::size_t size, std::size_t alignment = EIGEN_DEFAULT_ALIGN_BYTES)
 {
-  eigen_assert(alignment >= sizeof(void*) && (alignment & (alignment-1)) == 0 && "Alignment must be at least sizeof(void*) and a power of 2");
-
-  EIGEN_USING_STD(malloc)
-  void *original = malloc(size+alignment);
-  
+  eigen_assert(alignment >= sizeof(void*) && alignment <= 128 && (alignment & (alignment-1)) == 0 && "Alignment must be at least sizeof(void*), less than or equal to 128, and a power of 2");
+  void* original = std::malloc(size + alignment);
   if (original == 0) return 0;
-  void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(alignment-1))) + alignment);
-  *(reinterpret_cast<void**>(aligned) - 1) = original;
+  uint8_t offset = static_cast<uint8_t>(alignment - (reinterpret_cast<std::size_t>(original) & (alignment - 1)));
+  void* aligned = static_cast<void*>(static_cast<uint8_t*>(original) + offset);
+  *(static_cast<uint8_t*>(aligned) - 1) = offset;
   return aligned;
 }
 
@@ -114,8 +114,9 @@ EIGEN_DEVICE_FUNC inline void* handmade_aligned_malloc(std::size_t size, std::si
 EIGEN_DEVICE_FUNC inline void handmade_aligned_free(void *ptr)
 {
   if (ptr) {
-    EIGEN_USING_STD(free)
-    free(*(reinterpret_cast<void**>(ptr) - 1));
+    uint8_t offset = static_cast<uint8_t>(*(static_cast<uint8_t*>(ptr) - 1));
+    void* original = static_cast<void*>(static_cast<uint8_t*>(ptr) - offset);
+    std::free(original);
   }
 }
 
@@ -124,19 +125,22 @@ EIGEN_DEVICE_FUNC inline void handmade_aligned_free(void *ptr)
   * Since we know that our handmade version is based on std::malloc
   * we can use std::realloc to implement efficient reallocation.
   */
-inline void* handmade_aligned_realloc(void* ptr, std::size_t size, std::size_t = 0)
+EIGEN_DEVICE_FUNC inline void* handmade_aligned_realloc(void* ptr, std::size_t new_size, std::size_t old_size, std::size_t alignment = EIGEN_DEFAULT_ALIGN_BYTES)
 {
-  if (ptr == 0) return handmade_aligned_malloc(size);
-  void *original = *(reinterpret_cast<void**>(ptr) - 1);
-  std::ptrdiff_t previous_offset = static_cast<char *>(ptr)-static_cast<char *>(original);
-  original = std::realloc(original,size+EIGEN_DEFAULT_ALIGN_BYTES);
+  if (ptr == 0) return handmade_aligned_malloc(new_size, alignment);
+  uint8_t old_offset = *(static_cast<uint8_t*>(ptr) - 1);
+  void* old_original = static_cast<uint8_t*>(ptr) - old_offset;
+  void* original = std::realloc(old_original, new_size + alignment);
   if (original == 0) return 0;
-  void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1))) + EIGEN_DEFAULT_ALIGN_BYTES);
-  void *previous_aligned = static_cast<char *>(original)+previous_offset;
-  if(aligned!=previous_aligned)
-    std::memmove(aligned, previous_aligned, size);
-
-  *(reinterpret_cast<void**>(aligned) - 1) = original;
+  if (original == old_original) return ptr;
+  uint8_t offset = static_cast<uint8_t>(alignment - (reinterpret_cast<std::size_t>(original) & (alignment - 1)));
+  void* aligned = static_cast<void*>(static_cast<uint8_t*>(original) + offset);
+  if (offset != old_offset) {
+    const void* src = static_cast<const void*>(static_cast<uint8_t*>(original) + old_offset);
+    std::size_t count = (std::min)(new_size, old_size);
+    std::memmove(aligned, src, count);
+  }
+  *(static_cast<uint8_t*>(aligned) - 1) = offset;
   return aligned;
 }
 
@@ -212,12 +216,12 @@ EIGEN_DEVICE_FUNC inline void aligned_free(void *ptr)
   * \brief Reallocates an aligned block of memory.
   * \throws std::bad_alloc on allocation failure
   */
-inline void* aligned_realloc(void *ptr, std::size_t new_size, std::size_t old_size)
+EIGEN_DEVICE_FUNC inline void* aligned_realloc(void *ptr, std::size_t new_size, std::size_t old_size)
 {
-  EIGEN_UNUSED_VARIABLE(old_size)
-
+  if (ptr == 0) return aligned_malloc(new_size);
   void *result;
 #if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED
+  EIGEN_UNUSED_VARIABLE(old_size)
   result = std::realloc(ptr,new_size);
 #else
   result = handmade_aligned_realloc(ptr,new_size,old_size);
@@ -226,6 +230,11 @@ inline void* aligned_realloc(void *ptr, std::size_t new_size, std::size_t old_si
   if (!result && new_size)
     throw_std_bad_alloc();
 
+#ifdef EIGEN_RUNTIME_NO_MALLOC
+  if (result != ptr)
+    check_that_malloc_is_allowed();
+#endif
+
   return result;
 }
 
@@ -265,12 +274,12 @@ template<> EIGEN_DEVICE_FUNC inline void conditional_aligned_free<false>(void *p
   free(ptr);
 }
 
-template<bool Align> inline void* conditional_aligned_realloc(void* ptr, std::size_t new_size, std::size_t old_size)
+template<bool Align> EIGEN_DEVICE_FUNC inline void* conditional_aligned_realloc(void* ptr, std::size_t new_size, std::size_t old_size)
 {
   return aligned_realloc(ptr, new_size, old_size);
 }
 
-template<> inline void* conditional_aligned_realloc<false>(void* ptr, std::size_t new_size, std::size_t)
+template<> EIGEN_DEVICE_FUNC inline void* conditional_aligned_realloc<false>(void* ptr, std::size_t new_size, std::size_t)
 {
   return std::realloc(ptr, new_size);
 }
@@ -292,20 +301,55 @@ template<typename T> EIGEN_DEVICE_FUNC inline void destruct_elements_of_array(T
 /** \internal Constructs the elements of an array.
   * The \a size parameter tells on how many objects to call the constructor of T.
   */
-template<typename T> EIGEN_DEVICE_FUNC inline T* construct_elements_of_array(T *ptr, std::size_t size)
+template<typename T> EIGEN_DEVICE_FUNC inline T* default_construct_elements_of_array(T *ptr, std::size_t size)
 {
-  std::size_t i;
+  std::size_t i=0;
   EIGEN_TRY
   {
       for (i = 0; i < size; ++i) ::new (ptr + i) T;
-      return ptr;
   }
   EIGEN_CATCH(...)
   {
     destruct_elements_of_array(ptr, i);
     EIGEN_THROW;
   }
-  return NULL;
+  return ptr;
+}
+
+/** \internal Copy-constructs the elements of an array.
+  * The \a size parameter tells on how many objects to copy.
+  */
+template<typename T> EIGEN_DEVICE_FUNC inline T* copy_construct_elements_of_array(T *ptr, const T* src, std::size_t size)
+{
+  std::size_t i=0;
+  EIGEN_TRY
+  {
+      for (i = 0; i < size; ++i) ::new (ptr + i) T(*(src + i));
+  }
+  EIGEN_CATCH(...)
+  {
+    destruct_elements_of_array(ptr, i);
+    EIGEN_THROW;
+  }
+  return ptr;
+}
+
+/** \internal Move-constructs the elements of an array.
+  * The \a size parameter tells on how many objects to move.
+  */
+template<typename T> EIGEN_DEVICE_FUNC inline T* move_construct_elements_of_array(T *ptr, T* src, std::size_t size)
+{
+  std::size_t i=0;
+  EIGEN_TRY
+  {
+      for (i = 0; i < size; ++i) ::new (ptr + i) T(std::move(*(src + i)));
+  }
+  EIGEN_CATCH(...)
+  {
+    destruct_elements_of_array(ptr, i);
+    EIGEN_THROW;
+  }
+  return ptr;
 }
 
 /*****************************************************************************
@@ -326,10 +370,10 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void check_size_for_overflow(std::size_t s
 template<typename T> EIGEN_DEVICE_FUNC inline T* aligned_new(std::size_t size)
 {
   check_size_for_overflow<T>(size);
-  T *result = reinterpret_cast<T*>(aligned_malloc(sizeof(T)*size));
+  T *result = static_cast<T*>(aligned_malloc(sizeof(T)*size));
   EIGEN_TRY
   {
-    return construct_elements_of_array(result, size);
+    return default_construct_elements_of_array(result, size);
   }
   EIGEN_CATCH(...)
   {
@@ -342,10 +386,10 @@ template<typename T> EIGEN_DEVICE_FUNC inline T* aligned_new(std::size_t size)
 template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_new(std::size_t size)
 {
   check_size_for_overflow<T>(size);
-  T *result = reinterpret_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T)*size));
+  T *result = static_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T)*size));
   EIGEN_TRY
   {
-    return construct_elements_of_array(result, size);
+    return default_construct_elements_of_array(result, size);
   }
   EIGEN_CATCH(...)
   {
@@ -377,21 +421,32 @@ template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned
 {
   check_size_for_overflow<T>(new_size);
   check_size_for_overflow<T>(old_size);
-  if(new_size < old_size)
-    destruct_elements_of_array(pts+new_size, old_size-new_size);
-  T *result = reinterpret_cast<T*>(conditional_aligned_realloc<Align>(reinterpret_cast<void*>(pts), sizeof(T)*new_size, sizeof(T)*old_size));
-  if(new_size > old_size)
+  
+  // If elements need to be explicitly initialized, we cannot simply realloc
+  // (or memcpy) the memory block - each element needs to be reconstructed.
+  // Otherwise, objects that contain internal pointers like mpfr or
+  // AnnoyingScalar can be pointing to the wrong thing.
+  T* result = static_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T)*new_size));
+  EIGEN_TRY
   {
-    EIGEN_TRY
-    {
-      construct_elements_of_array(result+old_size, new_size-old_size);
-    }
-    EIGEN_CATCH(...)
-    {
-      conditional_aligned_free<Align>(result);
-      EIGEN_THROW;
+    // Move-construct initial elements.
+    std::size_t copy_size = (std::min)(old_size, new_size);
+    move_construct_elements_of_array(result, pts, copy_size);
+    
+    // Default-construct remaining elements.
+    if (new_size > old_size) {
+      default_construct_elements_of_array(result + copy_size, new_size - old_size);
     }
+    
+    // Delete old elements.
+    conditional_aligned_delete<T, Align>(pts, old_size);      
   }
+  EIGEN_CATCH(...)
+  {
+    conditional_aligned_free<Align>(result);
+    EIGEN_THROW;
+  }
+
   return result;
 }
 
@@ -401,12 +456,12 @@ template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned
   if(size==0)
     return 0; // short-cut. Also fixes Bug 884
   check_size_for_overflow<T>(size);
-  T *result = reinterpret_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T)*size));
+  T *result = static_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T)*size));
   if(NumTraits<T>::RequireInitialization)
   {
     EIGEN_TRY
     {
-      construct_elements_of_array(result, size);
+      default_construct_elements_of_array(result, size);
     }
     EIGEN_CATCH(...)
     {
@@ -417,26 +472,15 @@ template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned
   return result;
 }
 
-template<typename T, bool Align> inline T* conditional_aligned_realloc_new_auto(T* pts, std::size_t new_size, std::size_t old_size)
+template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_realloc_new_auto(T* pts, std::size_t new_size, std::size_t old_size)
 {
+  if (NumTraits<T>::RequireInitialization) {
+    return conditional_aligned_realloc_new<T, Align>(pts, new_size, old_size);
+  }
+  
   check_size_for_overflow<T>(new_size);
   check_size_for_overflow<T>(old_size);
-  if(NumTraits<T>::RequireInitialization && (new_size < old_size))
-    destruct_elements_of_array(pts+new_size, old_size-new_size);
-  T *result = reinterpret_cast<T*>(conditional_aligned_realloc<Align>(reinterpret_cast<void*>(pts), sizeof(T)*new_size, sizeof(T)*old_size));
-  if(NumTraits<T>::RequireInitialization && (new_size > old_size))
-  {
-    EIGEN_TRY
-    {
-      construct_elements_of_array(result+old_size, new_size-old_size);
-    }
-    EIGEN_CATCH(...)
-    {
-      conditional_aligned_free<Align>(result);
-      EIGEN_THROW;
-    }
-  }
-  return result;
+  return static_cast<T*>(conditional_aligned_realloc<Align>(static_cast<void*>(pts), sizeof(T)*new_size, sizeof(T)*old_size));
 }
 
 template<typename T, bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_delete_auto(T *ptr, std::size_t size)
@@ -566,17 +610,10 @@ template<typename T> struct smart_memmove_helper<T,false> {
   }
 };
 
-#if EIGEN_HAS_RVALUE_REFERENCES
 template<typename T> EIGEN_DEVICE_FUNC T* smart_move(T* start, T* end, T* target)
 {
   return std::move(start, end, target);
 }
-#else
-template<typename T> EIGEN_DEVICE_FUNC T* smart_move(T* start, T* end, T* target)
-{
-  return std::copy(start, end, target);
-}
-#endif
 
 /*****************************************************************************
 *** Implementation of runtime stack allocation (falling back to malloc)    ***
@@ -617,7 +654,7 @@ template<typename T> class aligned_stack_memory_handler : noncopyable
       : m_ptr(ptr), m_size(size), m_deallocate(dealloc)
     {
       if(NumTraits<T>::RequireInitialization && m_ptr)
-        Eigen::internal::construct_elements_of_array(m_ptr, size);
+        Eigen::internal::default_construct_elements_of_array(m_ptr, size);
     }
     EIGEN_DEVICE_FUNC
     ~aligned_stack_memory_handler()
@@ -640,7 +677,7 @@ template<typename Xpr, int NbEvaluations,
          >
 struct local_nested_eval_wrapper
 {
-  static const bool NeedExternalBuffer = false;
+  static constexpr bool NeedExternalBuffer = false;
   typedef typename Xpr::Scalar Scalar;
   typedef typename nested_eval<Xpr,NbEvaluations>::type ObjectType;
   ObjectType object;
@@ -656,7 +693,7 @@ struct local_nested_eval_wrapper
 template<typename Xpr, int NbEvaluations>
 struct local_nested_eval_wrapper<Xpr,NbEvaluations,true>
 {
-  static const bool NeedExternalBuffer = true;
+  static constexpr bool NeedExternalBuffer = true;
   typedef typename Xpr::Scalar Scalar;
   typedef typename plain_object_eval<Xpr>::type PlainObject;
   typedef Map<PlainObject,EIGEN_DEFAULT_ALIGN_BYTES> ObjectType;
@@ -668,7 +705,7 @@ struct local_nested_eval_wrapper<Xpr,NbEvaluations,true>
       m_deallocate(ptr==0)
   {
     if(NumTraits<Scalar>::RequireInitialization && object.data())
-      Eigen::internal::construct_elements_of_array(object.data(), object.size());
+      Eigen::internal::default_construct_elements_of_array(object.data(), object.size());
     object = xpr;
   }
 
@@ -853,7 +890,7 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
 /** \class aligned_allocator
 * \ingroup Core_Module
 *
-* \brief STL compatible allocator to use with types requiring a non standrad alignment.
+* \brief STL compatible allocator to use with types requiring a non-standard alignment.
 *
 * The memory is aligned as for dynamically aligned matrix/array types such as MatrixXd.
 * By default, it will thus provide at least 16 bytes alignment and more in following cases:
@@ -941,7 +978,7 @@ public:
          __asm__ __volatile__ ("cpuid": "=a" (abcd[0]), "=b" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "0" (func), "2" (id) );
 #    endif
 #  elif EIGEN_COMP_MSVC
-#    if (EIGEN_COMP_MSVC > 1500) && EIGEN_ARCH_i386_OR_x86_64
+#    if EIGEN_ARCH_i386_OR_x86_64
 #      define EIGEN_CPUID(abcd,func,id) __cpuidex((int*)abcd,func,id)
 #    endif
 #  endif
@@ -1156,6 +1193,38 @@ inline int queryTopLevelCacheSize()
   return (std::max)(l2,l3);
 }
 
+
+
+/** \internal
+ * This wraps C++20's std::construct_at, using placement new instead if it is not available.
+ */
+
+#if EIGEN_COMP_CXXVER >= 20
+using std::construct_at;
+#else
+template<class T, class... Args>
+EIGEN_DEVICE_FUNC T* construct_at( T* p, Args&&... args )
+{
+  return ::new (const_cast<void*>(static_cast<const volatile void*>(p)))
+    T(std::forward<Args>(args)...);
+}
+#endif
+
+/** \internal
+ * This wraps C++17's std::destroy_at.  If it's not available it calls the destructor.
+ * The wrapper is not a full replacement for C++20's std::destroy_at as it cannot
+ * be applied to std::array.
+ */
+#if EIGEN_COMP_CXXVER >= 17
+using std::destroy_at;
+#else
+template<class T>
+EIGEN_DEVICE_FUNC void destroy_at(T* p)
+{
+  p->~T();
+}
+#endif
+
 } // end namespace internal
 
 } // end namespace Eigen
diff --git a/libs/eigen/Eigen/src/Core/util/Meta.h b/libs/eigen/Eigen/src/Core/util/Meta.h
old mode 100755
new mode 100644
index 81ae2a3..6c6fb71
--- a/libs/eigen/Eigen/src/Core/util/Meta.h
+++ b/libs/eigen/Eigen/src/Core/util/Meta.h
@@ -11,6 +11,8 @@
 #ifndef EIGEN_META_H
 #define EIGEN_META_H
 
+#include "../InternalHeaderCheck.h"
+
 #if defined(EIGEN_GPU_COMPILE_PHASE)
 
  #include <cfloat>
@@ -26,11 +28,11 @@
 #endif
 
 // Recent versions of ICC require <cstdint> for pointer types below.
-#define EIGEN_ICC_NEEDS_CSTDINT (EIGEN_COMP_ICC>=1600 && EIGEN_COMP_CXXVER >= 11)
+#define EIGEN_ICC_NEEDS_CSTDINT (EIGEN_COMP_ICC>=1600)
 
 // Define portable (u)int{32,64} types
-#if EIGEN_HAS_CXX11 || EIGEN_ICC_NEEDS_CSTDINT
 #include <cstdint>
+
 namespace Eigen {
 namespace numext {
 typedef std::uint8_t  uint8_t;
@@ -41,25 +43,34 @@ typedef std::uint32_t uint32_t;
 typedef std::int32_t  int32_t;
 typedef std::uint64_t uint64_t;
 typedef std::int64_t  int64_t;
+
+template <size_t Size>
+struct get_integer_by_size {
+    typedef void signed_type;
+    typedef void unsigned_type;
+};
+template <>
+struct get_integer_by_size<1> {
+    typedef int8_t signed_type;
+    typedef uint8_t unsigned_type;
+};
+template <>
+struct get_integer_by_size<2> {
+    typedef int16_t signed_type;
+    typedef uint16_t unsigned_type;
+};
+template <>
+struct get_integer_by_size<4> {
+    typedef int32_t signed_type;
+    typedef uint32_t unsigned_type;
+};
+template <>
+struct get_integer_by_size<8> {
+    typedef int64_t signed_type;
+    typedef uint64_t unsigned_type;
+};
 }
 }
-#else
-// Without c++11, all compilers able to compile Eigen also
-// provide the C99 stdint.h header file.
-#include <stdint.h>
-namespace Eigen {
-namespace numext {
-typedef ::uint8_t  uint8_t;
-typedef ::int8_t   int8_t;
-typedef ::uint16_t uint16_t;
-typedef ::int16_t  int16_t;
-typedef ::uint32_t uint32_t;
-typedef ::int32_t  int32_t;
-typedef ::uint64_t uint64_t;
-typedef ::int64_t  int64_t;
-}
-}
-#endif
 
 namespace Eigen {
 
@@ -105,23 +116,11 @@ struct bool_constant<true> : true_type {};
 template<>
 struct bool_constant<false> : false_type {};
 
-template<bool Condition, typename Then, typename Else>
-struct conditional { typedef Then type; };
-
-template<typename Then, typename Else>
-struct conditional <false, Then, Else> { typedef Else type; };
-
-template<typename T> struct remove_reference { typedef T type; };
-template<typename T> struct remove_reference<T&> { typedef T type; };
-
-template<typename T> struct remove_pointer { typedef T type; };
-template<typename T> struct remove_pointer<T*> { typedef T type; };
-template<typename T> struct remove_pointer<T*const> { typedef T type; };
-
-template <class T> struct remove_const { typedef T type; };
-template <class T> struct remove_const<const T> { typedef T type; };
-template <class T> struct remove_const<const T[]> { typedef T type[]; };
-template <class T, unsigned int Size> struct remove_const<const T[Size]> { typedef T type[Size]; };
+// Third-party libraries rely on these.
+using std::conditional;
+using std::remove_reference;
+using std::remove_pointer;
+using std::remove_const;
 
 template<typename T> struct remove_all { typedef T type; };
 template<typename T> struct remove_all<const T>   { typedef typename remove_all<T>::type type; };
@@ -130,6 +129,9 @@ template<typename T> struct remove_all<T&>        { typedef typename remove_all<
 template<typename T> struct remove_all<T const*>  { typedef typename remove_all<T>::type type; };
 template<typename T> struct remove_all<T*>        { typedef typename remove_all<T>::type type; };
 
+template<typename T>
+using remove_all_t = typename remove_all<T>::type;
+
 template<typename T> struct is_arithmetic      { enum { value = false }; };
 template<> struct is_arithmetic<float>         { enum { value = true }; };
 template<> struct is_arithmetic<double>        { enum { value = true }; };
@@ -149,64 +151,13 @@ template<typename T, typename U> struct is_same { enum { value = 0 }; };
 template<typename T> struct is_same<T,T> { enum { value = 1 }; };
 
 template< class T >
-struct is_void : is_same<void, typename remove_const<T>::type> {};
+struct is_void : is_same<void, std::remove_const_t<T>> {};
 
-#if EIGEN_HAS_CXX11
 template<> struct is_arithmetic<signed long long>   { enum { value = true }; };
 template<> struct is_arithmetic<unsigned long long> { enum { value = true }; };
 using std::is_integral;
-#else
-template<typename T> struct is_integral               { enum { value = false }; };
-template<> struct is_integral<bool>                   { enum { value = true }; };
-template<> struct is_integral<char>                   { enum { value = true }; };
-template<> struct is_integral<signed char>            { enum { value = true }; };
-template<> struct is_integral<unsigned char>          { enum { value = true }; };
-template<> struct is_integral<signed short>           { enum { value = true }; };
-template<> struct is_integral<unsigned short>         { enum { value = true }; };
-template<> struct is_integral<signed int>             { enum { value = true }; };
-template<> struct is_integral<unsigned int>           { enum { value = true }; };
-template<> struct is_integral<signed long>            { enum { value = true }; };
-template<> struct is_integral<unsigned long>          { enum { value = true }; };
-#if EIGEN_COMP_MSVC
-template<> struct is_integral<signed __int64>         { enum { value = true }; };
-template<> struct is_integral<unsigned __int64>       { enum { value = true }; };
-#endif
-#endif
 
-#if EIGEN_HAS_CXX11
 using std::make_unsigned;
-#else
-// TODO: Possibly improve this implementation of make_unsigned.
-// It is currently used only by
-// template<typename Scalar> struct random_default_impl<Scalar, false, true>.
-template<typename> struct make_unsigned;
-template<> struct make_unsigned<char>             { typedef unsigned char type; };
-template<> struct make_unsigned<signed char>      { typedef unsigned char type; };
-template<> struct make_unsigned<unsigned char>    { typedef unsigned char type; };
-template<> struct make_unsigned<signed short>     { typedef unsigned short type; };
-template<> struct make_unsigned<unsigned short>   { typedef unsigned short type; };
-template<> struct make_unsigned<signed int>       { typedef unsigned int type; };
-template<> struct make_unsigned<unsigned int>     { typedef unsigned int type; };
-template<> struct make_unsigned<signed long>      { typedef unsigned long type; };
-template<> struct make_unsigned<unsigned long>    { typedef unsigned long type; };
-#if EIGEN_COMP_MSVC
-template<> struct make_unsigned<signed __int64>   { typedef unsigned __int64 type; };
-template<> struct make_unsigned<unsigned __int64> { typedef unsigned __int64 type; };
-#endif
-
-// Some platforms define int64_t as `long long` even for C++03, where
-// `long long` is not guaranteed by the standard. In this case we are missing
-// the definition for make_unsigned. If we just define it, we run into issues
-// where `long long` doesn't exist in some compilers for C++03. We therefore add
-// the specialization for these platforms only.
-#if EIGEN_OS_MAC || EIGEN_COMP_MINGW
-template<> struct make_unsigned<unsigned long long> { typedef unsigned long long type; };
-template<> struct make_unsigned<long long>          { typedef unsigned long long type; };
-#endif
-#endif
-
-template <typename T> struct add_const { typedef const T type; };
-template <typename T> struct add_const<T&> { typedef T& type; };
 
 template <typename T> struct is_const { enum { value = 0 }; };
 template <typename T> struct is_const<T const> { enum { value = 1 }; };
@@ -217,205 +168,11 @@ template<typename T> struct add_const_on_value_type<T*>        { typedef T const
 template<typename T> struct add_const_on_value_type<T* const>  { typedef T const* const type; };
 template<typename T> struct add_const_on_value_type<T const* const>  { typedef T const* const type; };
 
-#if EIGEN_HAS_CXX11
+template<typename T>
+using add_const_on_value_type_t = typename add_const_on_value_type<T>::type;
 
 using std::is_convertible;
 
-#else
-
-template<typename From, typename To>
-struct is_convertible_impl
-{
-private:
-  struct any_conversion
-  {
-    template <typename T> any_conversion(const volatile T&);
-    template <typename T> any_conversion(T&);
-  };
-  struct yes {int a[1];};
-  struct no  {int a[2];};
-
-  template<typename T>
-  static yes test(T, int);
-
-  template<typename T>
-  static no  test(any_conversion, ...);
-
-public:
-  static typename internal::remove_reference<From>::type* ms_from;
-#ifdef __INTEL_COMPILER
-  #pragma warning push
-  #pragma warning ( disable : 2259 )
-#endif
-  enum { value = sizeof(test<To>(*ms_from, 0))==sizeof(yes) };
-#ifdef __INTEL_COMPILER
-  #pragma warning pop
-#endif
-};
-
-template<typename From, typename To>
-struct is_convertible
-{
-  enum { value = is_convertible_impl<From,To>::value };
-};
-
-template<typename T>
-struct is_convertible<T,T&> { enum { value = false }; };
-
-template<typename T>
-struct is_convertible<const T,const T&> { enum { value = true }; };
-
-#endif
-
-/** \internal Allows to enable/disable an overload
-  * according to a compile time condition.
-  */
-template<bool Condition, typename T=void> struct enable_if;
-
-template<typename T> struct enable_if<true,T>
-{ typedef T type; };
-
-#if defined(EIGEN_GPU_COMPILE_PHASE) && !EIGEN_HAS_CXX11
-#if !defined(__FLT_EPSILON__)
-#define __FLT_EPSILON__ FLT_EPSILON
-#define __DBL_EPSILON__ DBL_EPSILON
-#endif
-
-namespace device {
-
-template<typename T> struct numeric_limits
-{
-  EIGEN_DEVICE_FUNC
-  static EIGEN_CONSTEXPR T epsilon() { return 0; }
-  static T (max)() { assert(false && "Highest not supported for this type"); }
-  static T (min)() { assert(false && "Lowest not supported for this type"); }
-  static T infinity() { assert(false && "Infinity not supported for this type"); }
-  static T quiet_NaN() { assert(false && "quiet_NaN not supported for this type"); }
-};
-template<> struct numeric_limits<float>
-{
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
-  static float epsilon() { return __FLT_EPSILON__; }
-  EIGEN_DEVICE_FUNC
-  static float (max)() {
-  #if defined(EIGEN_CUDA_ARCH)
-    return CUDART_MAX_NORMAL_F;
-  #else
-    return HIPRT_MAX_NORMAL_F;
-  #endif
-  }
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
-  static float (min)() { return FLT_MIN; }
-  EIGEN_DEVICE_FUNC
-  static float infinity() {
-  #if defined(EIGEN_CUDA_ARCH)
-    return CUDART_INF_F;
-  #else
-    return HIPRT_INF_F;
-  #endif
-  }
-  EIGEN_DEVICE_FUNC
-  static float quiet_NaN() {
-  #if defined(EIGEN_CUDA_ARCH)
-    return CUDART_NAN_F;
-  #else
-    return HIPRT_NAN_F;
-  #endif
-  }
-};
-template<> struct numeric_limits<double>
-{
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
-  static double epsilon() { return __DBL_EPSILON__; }
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
-  static double (max)() { return DBL_MAX; }
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
-  static double (min)() { return DBL_MIN; }
-  EIGEN_DEVICE_FUNC
-  static double infinity() {
-  #if defined(EIGEN_CUDA_ARCH)
-    return CUDART_INF;
-  #else
-    return HIPRT_INF;
-  #endif
-  }
-  EIGEN_DEVICE_FUNC
-  static double quiet_NaN() {
-  #if defined(EIGEN_CUDA_ARCH)
-    return CUDART_NAN;
-  #else
-    return HIPRT_NAN;
-  #endif
-  }
-};
-template<> struct numeric_limits<int>
-{
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
-  static int epsilon() { return 0; }
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
-  static int (max)() { return INT_MAX; }
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
-  static int (min)() { return INT_MIN; }
-};
-template<> struct numeric_limits<unsigned int>
-{
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
-  static unsigned int epsilon() { return 0; }
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
-  static unsigned int (max)() { return UINT_MAX; }
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
-  static unsigned int (min)() { return 0; }
-};
-template<> struct numeric_limits<long>
-{
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
-  static long epsilon() { return 0; }
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
-  static long (max)() { return LONG_MAX; }
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
-  static long (min)() { return LONG_MIN; }
-};
-template<> struct numeric_limits<unsigned long>
-{
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
-  static unsigned long epsilon() { return 0; }
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
-  static unsigned long (max)() { return ULONG_MAX; }
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
-  static unsigned long (min)() { return 0; }
-};
-template<> struct numeric_limits<long long>
-{
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
-  static long long epsilon() { return 0; }
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
-  static long long (max)() { return LLONG_MAX; }
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
-  static long long (min)() { return LLONG_MIN; }
-};
-template<> struct numeric_limits<unsigned long long>
-{
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
-  static unsigned long long epsilon() { return 0; }
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
-  static unsigned long long (max)() { return ULLONG_MAX; }
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
-  static unsigned long long (min)() { return 0; }
-};
-template<> struct numeric_limits<bool>
-{
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
-  static bool epsilon() { return false; }
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
-  static bool (max)() { return true; }
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR 
-  static bool (min)() { return false; }
-};
-
-}
-
-#endif // defined(EIGEN_GPU_COMPILE_PHASE) && !EIGEN_HAS_CXX11
-
 /** \internal
   * A base class do disable default copy ctor and copy assignment operator.
   */
@@ -446,7 +203,7 @@ template<typename T, typename EnableIf = void> struct array_size {
   enum { value = Dynamic };
 };
 
-template<typename T> struct array_size<T,typename internal::enable_if<((T::SizeAtCompileTime&0)==0)>::type> {
+template<typename T> struct array_size<T, std::enable_if_t<((T::SizeAtCompileTime&0)==0)>> {
   enum { value = T::SizeAtCompileTime };
 };
 
@@ -457,44 +214,50 @@ template<typename T, int N> struct array_size<T (&)[N]> {
   enum { value = N };
 };
 
-#if EIGEN_HAS_CXX11
 template<typename T, std::size_t N> struct array_size<const std::array<T,N> > {
   enum { value = N };
 };
 template<typename T, std::size_t N> struct array_size<std::array<T,N> > {
   enum { value = N };
 };
-#endif
+
 
 /** \internal
-  * Analogue of the std::size free function.
-  * It returns the size of the container or view \a x of type \c T
+  * Analogue of the std::ssize free function.
+  * It returns the signed size of the container or view \a x of type \c T
   *
   * It currently supports:
   *  - any types T defining a member T::size() const
   *  - plain C arrays as T[N]
   *
+  * For C++20, this function just forwards to `std::ssize`, or any ADL discoverable `ssize` function.
   */
-template<typename T>
-EIGEN_CONSTEXPR Index size(const T& x) { return x.size(); }
+#if EIGEN_COMP_CXXVER < 20  || EIGEN_GNUC_AT_MOST(9,4)
+template <typename T>
+EIGEN_CONSTEXPR auto index_list_size(const T& x) {
+  using R = std::common_type_t<std::ptrdiff_t, std::make_signed_t<decltype(x.size())>>;
+  return static_cast<R>(x.size());
+}
 
-template<typename T,std::size_t N>
-EIGEN_CONSTEXPR Index size(const T (&) [N]) { return N; }
+template<typename T, std::ptrdiff_t N>
+EIGEN_CONSTEXPR std::ptrdiff_t index_list_size(const T (&)[N]) { return N; }
+#else
+template <typename T>
+EIGEN_CONSTEXPR auto index_list_size(T&& x) {
+  using std::ssize;
+  return ssize(std::forward<T>(x));
+}
+#endif // EIGEN_COMP_CXXVER
 
 /** \internal
   * Convenient struct to get the result type of a nullary, unary, binary, or
   * ternary functor.
-  * 
-  * Pre C++11:
-  * Supports both a Func::result_type member and templated
-  * Func::result<Func(ArgTypes...)>::type member.
-  * 
-  * If none of these members is provided, then the type of the first
-  * argument is returned.
-  * 
-  * Post C++11:
+  *
+  * Pre C++17:
   * This uses std::result_of. However, note the `type` member removes
   * const and converts references/pointers to their corresponding value type.
+  *
+  * Post C++17: Uses std::invoke_result
   */
 #if EIGEN_HAS_STD_INVOKE_RESULT
 template<typename T> struct result_of;
@@ -502,152 +265,37 @@ template<typename T> struct result_of;
 template<typename F, typename... ArgTypes>
 struct result_of<F(ArgTypes...)> {
   typedef typename std::invoke_result<F, ArgTypes...>::type type1;
-  typedef typename remove_all<type1>::type type;
-};
-#elif EIGEN_HAS_STD_RESULT_OF
-template<typename T> struct result_of {
-  typedef typename std::result_of<T>::type type1;
-  typedef typename remove_all<type1>::type type;
-};
-#else
-template<typename T> struct result_of { };
-
-struct has_none {int a[1];};
-struct has_std_result_type {int a[2];};
-struct has_tr1_result {int a[3];};
-
-template<typename Func, int SizeOf>
-struct nullary_result_of_select {};
-
-template<typename Func>
-struct nullary_result_of_select<Func, sizeof(has_std_result_type)> {typedef typename Func::result_type type;};
-
-template<typename Func>
-struct nullary_result_of_select<Func, sizeof(has_tr1_result)> {typedef typename Func::template result<Func()>::type type;};
-
-template<typename Func>
-struct result_of<Func()> {
-    template<typename T>
-    static has_std_result_type    testFunctor(T const *, typename T::result_type const * = 0);
-    template<typename T>
-    static has_tr1_result         testFunctor(T const *, typename T::template result<T()>::type const * = 0);
-    static has_none               testFunctor(...);
-
-    // note that the following indirection is needed for gcc-3.3
-    enum {FunctorType = sizeof(testFunctor(static_cast<Func*>(0)))};
-    typedef typename nullary_result_of_select<Func, FunctorType>::type type;
+  typedef remove_all_t<type1> type;
 };
 
-template<typename Func, typename ArgType, int SizeOf=sizeof(has_none)>
-struct unary_result_of_select {typedef typename internal::remove_all<ArgType>::type type;};
-
-template<typename Func, typename ArgType>
-struct unary_result_of_select<Func, ArgType, sizeof(has_std_result_type)> {typedef typename Func::result_type type;};
-
-template<typename Func, typename ArgType>
-struct unary_result_of_select<Func, ArgType, sizeof(has_tr1_result)> {typedef typename Func::template result<Func(ArgType)>::type type;};
-
-template<typename Func, typename ArgType>
-struct result_of<Func(ArgType)> {
-    template<typename T>
-    static has_std_result_type    testFunctor(T const *, typename T::result_type const * = 0);
-    template<typename T>
-    static has_tr1_result         testFunctor(T const *, typename T::template result<T(ArgType)>::type const * = 0);
-    static has_none               testFunctor(...);
-
-    // note that the following indirection is needed for gcc-3.3
-    enum {FunctorType = sizeof(testFunctor(static_cast<Func*>(0)))};
-    typedef typename unary_result_of_select<Func, ArgType, FunctorType>::type type;
-};
-
-template<typename Func, typename ArgType0, typename ArgType1, int SizeOf=sizeof(has_none)>
-struct binary_result_of_select {typedef typename internal::remove_all<ArgType0>::type type;};
-
-template<typename Func, typename ArgType0, typename ArgType1>
-struct binary_result_of_select<Func, ArgType0, ArgType1, sizeof(has_std_result_type)>
-{typedef typename Func::result_type type;};
-
-template<typename Func, typename ArgType0, typename ArgType1>
-struct binary_result_of_select<Func, ArgType0, ArgType1, sizeof(has_tr1_result)>
-{typedef typename Func::template result<Func(ArgType0,ArgType1)>::type type;};
-
-template<typename Func, typename ArgType0, typename ArgType1>
-struct result_of<Func(ArgType0,ArgType1)> {
-    template<typename T>
-    static has_std_result_type    testFunctor(T const *, typename T::result_type const * = 0);
-    template<typename T>
-    static has_tr1_result         testFunctor(T const *, typename T::template result<T(ArgType0,ArgType1)>::type const * = 0);
-    static has_none               testFunctor(...);
-
-    // note that the following indirection is needed for gcc-3.3
-    enum {FunctorType = sizeof(testFunctor(static_cast<Func*>(0)))};
-    typedef typename binary_result_of_select<Func, ArgType0, ArgType1, FunctorType>::type type;
-};
-
-template<typename Func, typename ArgType0, typename ArgType1, typename ArgType2, int SizeOf=sizeof(has_none)>
-struct ternary_result_of_select {typedef typename internal::remove_all<ArgType0>::type type;};
-
-template<typename Func, typename ArgType0, typename ArgType1, typename ArgType2>
-struct ternary_result_of_select<Func, ArgType0, ArgType1, ArgType2, sizeof(has_std_result_type)>
-{typedef typename Func::result_type type;};
-
-template<typename Func, typename ArgType0, typename ArgType1, typename ArgType2>
-struct ternary_result_of_select<Func, ArgType0, ArgType1, ArgType2, sizeof(has_tr1_result)>
-{typedef typename Func::template result<Func(ArgType0,ArgType1,ArgType2)>::type type;};
-
-template<typename Func, typename ArgType0, typename ArgType1, typename ArgType2>
-struct result_of<Func(ArgType0,ArgType1,ArgType2)> {
-    template<typename T>
-    static has_std_result_type    testFunctor(T const *, typename T::result_type const * = 0);
-    template<typename T>
-    static has_tr1_result         testFunctor(T const *, typename T::template result<T(ArgType0,ArgType1,ArgType2)>::type const * = 0);
-    static has_none               testFunctor(...);
-
-    // note that the following indirection is needed for gcc-3.3
-    enum {FunctorType = sizeof(testFunctor(static_cast<Func*>(0)))};
-    typedef typename ternary_result_of_select<Func, ArgType0, ArgType1, ArgType2, FunctorType>::type type;
-};
-
-#endif
-
-#if EIGEN_HAS_STD_INVOKE_RESULT
 template<typename F, typename... ArgTypes>
 struct invoke_result {
   typedef typename std::invoke_result<F, ArgTypes...>::type type1;
-  typedef typename remove_all<type1>::type type;
-};
-#elif EIGEN_HAS_CXX11
-template<typename F, typename... ArgTypes>
-struct invoke_result {
-  typedef typename result_of<F(ArgTypes...)>::type type1;
-  typedef typename remove_all<type1>::type type;
+  typedef remove_all_t<type1> type;
 };
 #else
-template<typename F, typename ArgType0 = void, typename ArgType1 = void, typename ArgType2 = void>
+template<typename T> struct result_of {
+  typedef typename std::result_of<T>::type type1;
+  typedef remove_all_t<type1> type;
+};
+
+template<typename F, typename... ArgTypes>
 struct invoke_result {
-  typedef typename result_of<F(ArgType0, ArgType1, ArgType2)>::type type1;
-  typedef typename remove_all<type1>::type type;
-};
-
-template<typename F>
-struct invoke_result<F, void, void, void> {
-  typedef typename result_of<F()>::type type1;
-  typedef typename remove_all<type1>::type type;
-};
-
-template<typename F, typename ArgType0>
-struct invoke_result<F, ArgType0, void, void> {
-  typedef typename result_of<F(ArgType0)>::type type1;
-  typedef typename remove_all<type1>::type type;
-};
-
-template<typename F, typename ArgType0, typename ArgType1>
-struct invoke_result<F, ArgType0, ArgType1, void> {
-  typedef typename result_of<F(ArgType0, ArgType1)>::type type1;
-  typedef typename remove_all<type1>::type type;
+    typedef typename result_of<F(ArgTypes...)>::type type1;
+    typedef remove_all_t<type1> type;
 };
 #endif
 
+// Reduces a sequence of bools to true if all are true, false otherwise.
+template<bool... values>
+using reduce_all = std::is_same<std::integer_sequence<bool, values..., true>,
+    std::integer_sequence<bool, true, values...> >;
+
+// Reduces a sequence of bools to true if any are true, false if all false.
+template<bool... values>
+using reduce_any = std::integral_constant<bool,
+    !std::is_same<std::integer_sequence<bool, values..., false>, std::integer_sequence<bool, false, values...> >::value>;
+
 struct meta_yes { char a[1]; };
 struct meta_no  { char a[2]; };
 
@@ -666,7 +314,7 @@ template<typename T> const T* return_ptr();
 template <typename T, typename IndexType=Index>
 struct has_nullary_operator
 {
-  template <typename C> static meta_yes testFunctor(C const *,typename enable_if<(sizeof(return_ptr<C>()->operator()())>0)>::type * = 0);
+  template <typename C> static meta_yes testFunctor(C const *,std::enable_if_t<(sizeof(return_ptr<C>()->operator()())>0)> * = 0);
   static meta_no testFunctor(...);
 
   enum { value = sizeof(testFunctor(static_cast<T*>(0))) == sizeof(meta_yes) };
@@ -675,7 +323,7 @@ struct has_nullary_operator
 template <typename T, typename IndexType=Index>
 struct has_unary_operator
 {
-  template <typename C> static meta_yes testFunctor(C const *,typename enable_if<(sizeof(return_ptr<C>()->operator()(IndexType(0)))>0)>::type * = 0);
+  template <typename C> static meta_yes testFunctor(C const *,std::enable_if_t<(sizeof(return_ptr<C>()->operator()(IndexType(0)))>0)> * = 0);
   static meta_no testFunctor(...);
 
   enum { value = sizeof(testFunctor(static_cast<T*>(0))) == sizeof(meta_yes) };
@@ -684,7 +332,7 @@ struct has_unary_operator
 template <typename T, typename IndexType=Index>
 struct has_binary_operator
 {
-  template <typename C> static meta_yes testFunctor(C const *,typename enable_if<(sizeof(return_ptr<C>()->operator()(IndexType(0),IndexType(0)))>0)>::type * = 0);
+  template <typename C> static meta_yes testFunctor(C const *,std::enable_if_t<(sizeof(return_ptr<C>()->operator()(IndexType(0),IndexType(0)))>0)> * = 0);
   static meta_no testFunctor(...);
 
   enum { value = sizeof(testFunctor(static_cast<T*>(0))) == sizeof(meta_yes) };
@@ -696,8 +344,7 @@ struct has_binary_operator
 template<int Y,
          int InfX = 0,
          int SupX = ((Y==1) ? 1 : Y/2),
-         bool Done = ((SupX-InfX)<=1 ? true : ((SupX*SupX <= Y) && ((SupX+1)*(SupX+1) > Y))) >
-                                // use ?: instead of || just to shut up a stupid gcc 4.3 warning
+         bool Done = ((SupX - InfX) <= 1 || ((SupX * SupX <= Y) && ((SupX + 1) * (SupX + 1) > Y)))>
 class meta_sqrt
 {
     enum {
@@ -743,7 +390,7 @@ template<typename T, typename U> struct scalar_product_traits
 // FIXME quick workaround around current limitation of result_of
 // template<typename Scalar, typename ArgType0, typename ArgType1>
 // struct result_of<scalar_product_op<Scalar>(ArgType0,ArgType1)> {
-// typedef typename scalar_product_traits<typename remove_all<ArgType0>::type, typename remove_all<ArgType1>::type>::ReturnType type;
+// typedef typename scalar_product_traits<remove_all_t<ArgType0>, remove_all_t<ArgType1>>::ReturnType type;
 // };
 
 /** \internal Obtains a POD type suitable to use as storage for an object of a size
@@ -758,6 +405,8 @@ struct aligned_storage {
 
 } // end namespace internal
 
+template<typename T> struct NumTraits;
+
 namespace numext {
 
 #if defined(EIGEN_GPU_COMPILE_PHASE)
@@ -766,11 +415,7 @@ template<typename T> EIGEN_DEVICE_FUNC   void swap(T &a, T &b) { T tmp = b; b =
 template<typename T> EIGEN_STRONG_INLINE void swap(T &a, T &b) { std::swap(a,b); }
 #endif
 
-#if defined(EIGEN_GPU_COMPILE_PHASE) && !EIGEN_HAS_CXX11
-using internal::device::numeric_limits;
-#else
 using std::numeric_limits;
-#endif
 
 // Integer division with rounding up.
 // T is assumed to be an integer type with a>=0, and b>0
@@ -794,6 +439,20 @@ template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
 bool equal_strict(const double& x,const double& y) { return std::equal_to<double>()(x,y); }
 #endif
 
+/**
+ * \internal Performs an exact comparison of x to zero, e.g. to decide whether a term can be ignored.
+ * Use this to to bypass -Wfloat-equal warnings when exact zero is what needs to be tested.
+*/
+template<typename X> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
+bool is_exactly_zero(const X& x) { return equal_strict(x, typename NumTraits<X>::Literal{0}); }
+
+/**
+ * \internal Performs an exact comparison of x to one, e.g. to decide whether a factor needs to be multiplied.
+ * Use this to to bypass -Wfloat-equal warnings when exact one is what needs to be tested.
+*/
+template<typename X> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
+bool is_exactly_one(const X& x) { return equal_strict(x, typename NumTraits<X>::Literal{1}); }
+
 template<typename X, typename Y> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
 bool not_equal_strict(const X& x,const Y& y) { return x != y; }
 
@@ -807,6 +466,101 @@ bool not_equal_strict(const double& x,const double& y) { return std::not_equal_t
 
 } // end namespace numext
 
+namespace internal {
+
+template<typename Scalar>
+struct is_identically_zero_impl {
+  static inline bool run(const Scalar& s) {
+    return numext::is_exactly_zero(s);
+  }
+};
+
+template<typename Scalar> EIGEN_STRONG_INLINE
+bool is_identically_zero(const Scalar& s) { return is_identically_zero_impl<Scalar>::run(s); }
+
+/// \internal Returns true if its argument is of integer or enum type.
+/// FIXME this has the same purpose as `is_valid_index_type` in XprHelper.h
+template<typename A>
+constexpr bool is_int_or_enum_v = std::is_enum<A>::value || std::is_integral<A>::value;
+
+/// \internal Gets the minimum of two values which may be integers or enums
+template<typename A, typename B>
+inline constexpr int plain_enum_min(A a, B b) {
+  static_assert(is_int_or_enum_v<A>, "Argument a must be an integer or enum");
+  static_assert(is_int_or_enum_v<B>, "Argument b must be an integer or enum");
+  return ((int) a <= (int) b) ? (int) a : (int) b;
+}
+
+/// \internal Gets the maximum of two values which may be integers or enums
+template<typename A, typename B>
+inline constexpr int plain_enum_max(A a, B b) {
+  static_assert(is_int_or_enum_v<A>, "Argument a must be an integer or enum");
+  static_assert(is_int_or_enum_v<B>, "Argument b must be an integer or enum");
+  return ((int) a >= (int) b) ? (int) a : (int) b;
+}
+
+/**
+ * \internal
+ *  `min_size_prefer_dynamic` gives the min between compile-time sizes. 0 has absolute priority, followed by 1,
+ *  followed by Dynamic, followed by other finite values. The reason for giving Dynamic the priority over
+ *  finite values is that min(3, Dynamic) should be Dynamic, since that could be anything between 0 and 3.
+ */
+template<typename A, typename B>
+inline constexpr int min_size_prefer_dynamic(A a, B b) {
+  static_assert(is_int_or_enum_v<A>, "Argument a must be an integer or enum");
+  static_assert(is_int_or_enum_v<B>, "Argument b must be an integer or enum");
+  if ((int) a == 0 || (int) b == 0) return 0;
+  if ((int) a == 1 || (int) b == 1) return 1;
+  if ((int) a == Dynamic || (int) b == Dynamic) return Dynamic;
+  return plain_enum_min(a, b);
+}
+
+/**
+ * \internal
+ *  min_size_prefer_fixed is a variant of `min_size_prefer_dynamic` comparing MaxSizes. The difference is that finite values
+ *  now have priority over Dynamic, so that min(3, Dynamic) gives 3. Indeed, whatever the actual value is
+ *  (between 0 and 3), it is not more than 3.
+ */
+template<typename A, typename B>
+inline constexpr int min_size_prefer_fixed(A a, B b) {
+  static_assert(is_int_or_enum_v<A>, "Argument a must be an integer or enum");
+  static_assert(is_int_or_enum_v<B>, "Argument b must be an integer or enum");
+  if ((int) a == 0 || (int) b == 0) return 0;
+  if ((int) a == 1 || (int) b == 1) return 1;
+  if ((int) a == Dynamic && (int) b == Dynamic) return Dynamic;
+  if ((int) a == Dynamic) return (int) b;
+  if ((int) b == Dynamic) return (int) a;
+  return plain_enum_min(a, b);
+}
+
+/// \internal see `min_size_prefer_fixed`. No need for a separate variant for MaxSizes here.
+template<typename A, typename B>
+inline constexpr int max_size_prefer_dynamic(A a, B b) {
+  static_assert(is_int_or_enum_v<A>, "Argument a must be an integer or enum");
+  static_assert(is_int_or_enum_v<B>, "Argument b must be an integer or enum");
+  if ((int) a == Dynamic || (int) b == Dynamic) return Dynamic;
+  return plain_enum_max(a, b);
+}
+
+/// \internal Calculate logical XOR at compile time
+inline constexpr bool logical_xor(bool a, bool b) {
+  return a != b;
+}
+
+/// \internal Calculate logical IMPLIES at compile time
+inline constexpr bool check_implication(bool a, bool b) {
+  return !a || b;
+}
+
+/// \internal Provide fallback for std::is_constant_evaluated for pre-C++20.
+#if EIGEN_COMP_CXXVER >= 20
+using std::is_constant_evaluated;
+#else
+constexpr bool is_constant_evaluated() { return false; }
+#endif
+
+} // end namespace internal
+
 } // end namespace Eigen
 
 #endif // EIGEN_META_H
diff --git a/libs/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h b/libs/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h
index 1ce6fd1..7021e6d 100644
--- a/libs/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h
+++ b/libs/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h
@@ -1,5 +1,5 @@
 #ifdef EIGEN_WARNINGS_DISABLED_2
-// "DisableStupidWarnings.h" was included twice recursively: Do not reenable warnings yet!
+// "DisableStupidWarnings.h" was included twice recursively: Do not re-enable warnings yet!
 #  undef EIGEN_WARNINGS_DISABLED_2
 
 #elif defined(EIGEN_WARNINGS_DISABLED)
@@ -12,18 +12,26 @@
     #pragma warning pop
   #elif defined __clang__
     #pragma clang diagnostic pop
-  #elif defined __GNUC__  &&  (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6))
+  #elif defined __GNUC__  &&  !defined(__FUJITSU) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6))
     #pragma GCC diagnostic pop
   #endif
 
   #if defined __NVCC__
-//    Don't reenable the diagnostic messages, as it turns out these messages need
+//    Don't re-enable the diagnostic messages, as it turns out these messages need
 //    to be disabled at the point of the template instantiation (i.e the user code)
 //    otherwise they'll be triggered by nvcc.
-//    #pragma diag_default code_is_unreachable
-//    #pragma diag_default initialization_not_reachable
-//    #pragma diag_default 2651
-//    #pragma diag_default 2653
+//    #define EIGEN_MAKE_PRAGMA(X) _Pragma(#X)
+//    #if __NVCC_DIAG_PRAGMA_SUPPORT__
+//      #define EIGEN_NV_DIAG_DEFAULT(X) EIGEN_MAKE_PRAGMA(nv_diag_default X)
+//    #else
+//      #define EIGEN_NV_DIAG_DEFAULT(X) EIGEN_MAKE_PRAGMA(diag_default X)
+//    #endif
+//    EIGEN_NV_DIAG_DEFAULT(code_is_unreachable)
+//    EIGEN_NV_DIAG_DEFAULT(initialization_not_reachable)
+//    EIGEN_NV_DIAG_DEFAULT(2651)
+//    EIGEN_NV_DIAG_DEFAULT(2653)
+//    #undef EIGEN_NV_DIAG_DEFAULT
+//    #undef EIGEN_MAKE_PRAGMA
   #endif
 
 #endif
diff --git a/libs/eigen/Eigen/src/Core/util/ReshapedHelper.h b/libs/eigen/Eigen/src/Core/util/ReshapedHelper.h
index 4124321..6daea03 100644
--- a/libs/eigen/Eigen/src/Core/util/ReshapedHelper.h
+++ b/libs/eigen/Eigen/src/Core/util/ReshapedHelper.h
@@ -11,6 +11,8 @@
 #ifndef EIGEN_RESHAPED_HELPER_H
 #define EIGEN_RESHAPED_HELPER_H
 
+#include "../InternalHeaderCheck.h"
+
 namespace Eigen {
 
 enum AutoSize_t   { AutoSize };
@@ -39,10 +41,9 @@ inline Index get_runtime_reshape_size(AutoSize_t /*size*/, Index other, Index to
   return total/other;
 }
 
-template<int Flags, int Order>
-struct get_compiletime_reshape_order {
-  enum { value = Order == AutoOrder ? Flags & RowMajorBit : Order };
-};
+constexpr inline int get_compiletime_reshape_order(int flags, int order) {
+  return order == AutoOrder ? flags & RowMajorBit : order;
+}
 
 }
 
diff --git a/libs/eigen/Eigen/src/Core/util/Serializer.h b/libs/eigen/Eigen/src/Core/util/Serializer.h
new file mode 100644
index 0000000..cbfc04a
--- /dev/null
+++ b/libs/eigen/Eigen/src/Core/util/Serializer.h
@@ -0,0 +1,220 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2021 The Eigen Team
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_SERIALIZER_H
+#define EIGEN_SERIALIZER_H
+
+#include <type_traits>
+
+// The Serializer class encodes data into a memory buffer so it can be later
+// reconstructed. This is mainly used to send objects back-and-forth between
+// the CPU and GPU.
+
+namespace Eigen {
+
+/**
+ * Serializes an object to a memory buffer.
+ * 
+ * Useful for transferring data (e.g. back-and-forth to a device).
+ */
+template<typename T, typename EnableIf = void>
+class Serializer;
+
+// Specialization for POD types.
+template<typename T>
+class Serializer<T, typename std::enable_if_t<
+                      std::is_trivial<T>::value 
+                      && std::is_standard_layout<T>::value>> {
+ public:
+ 
+  /**
+   * Determines the required size of the serialization buffer for a value.
+   * 
+   * \param value the value to serialize.
+   * \return the required size.
+   */
+  EIGEN_DEVICE_FUNC size_t size(const T& value) const {
+    return sizeof(value);
+  }
+  
+  /**
+   * Serializes a value to a byte buffer.
+   * \param dest the destination buffer; if this is nullptr, does nothing.
+   * \param end the end of the destination buffer.
+   * \param value the value to serialize.
+   * \return the next memory address past the end of the serialized data.
+   */
+  EIGEN_DEVICE_FUNC uint8_t* serialize(uint8_t* dest, uint8_t* end, const T& value) {
+    if (EIGEN_PREDICT_FALSE(dest == nullptr)) return nullptr;
+    if (EIGEN_PREDICT_FALSE(dest + sizeof(value) > end)) return nullptr;
+    EIGEN_USING_STD(memcpy)
+    memcpy(dest, &value, sizeof(value));
+    return dest + sizeof(value);
+  }
+  
+  /**
+   * Deserializes a value from a byte buffer.
+   * \param src the source buffer; if this is nullptr, does nothing.
+   * \param end the end of the source buffer.
+   * \param value the value to populate.
+   * \return the next unprocessed memory address; nullptr if parsing errors are detected.
+   */
+  EIGEN_DEVICE_FUNC const uint8_t* deserialize(const uint8_t* src, const uint8_t* end, T& value) const {
+    if (EIGEN_PREDICT_FALSE(src == nullptr)) return nullptr;
+    if (EIGEN_PREDICT_FALSE(src + sizeof(value) > end)) return nullptr;
+    EIGEN_USING_STD(memcpy)
+    memcpy(&value, src, sizeof(value));
+    return src + sizeof(value);
+  }
+};
+
+// Specialization for DenseBase.
+// Serializes [rows, cols, data...].
+template<typename Derived>
+class Serializer<DenseBase<Derived>, void> {
+ public:
+  typedef typename Derived::Scalar Scalar;
+  
+  struct Header {
+    typename Derived::Index rows;
+    typename Derived::Index cols;
+  };
+  
+  EIGEN_DEVICE_FUNC size_t size(const Derived& value) const {
+    return sizeof(Header) + sizeof(Scalar) * value.size();
+  }
+  
+  EIGEN_DEVICE_FUNC uint8_t* serialize(uint8_t* dest, uint8_t* end, const Derived& value) {
+    if (EIGEN_PREDICT_FALSE(dest == nullptr)) return nullptr;
+    if (EIGEN_PREDICT_FALSE(dest + size(value) > end)) return nullptr;
+    const size_t header_bytes = sizeof(Header);
+    const size_t data_bytes = sizeof(Scalar) * value.size();
+    Header header = {value.rows(), value.cols()};
+    EIGEN_USING_STD(memcpy)
+    memcpy(dest, &header, header_bytes);
+    dest += header_bytes;
+    memcpy(dest, value.data(), data_bytes);
+    return dest + data_bytes;
+  }
+  
+  EIGEN_DEVICE_FUNC const uint8_t* deserialize(const uint8_t* src, const uint8_t* end, Derived& value) const {
+    if (EIGEN_PREDICT_FALSE(src == nullptr)) return nullptr;
+    if (EIGEN_PREDICT_FALSE(src + sizeof(Header) > end)) return nullptr;
+    const size_t header_bytes = sizeof(Header);
+    Header header;
+    EIGEN_USING_STD(memcpy)
+    memcpy(&header, src, header_bytes);
+    src += header_bytes;
+    const size_t data_bytes = sizeof(Scalar) * header.rows * header.cols;
+    if (EIGEN_PREDICT_FALSE(src + data_bytes > end)) return nullptr;
+    value.resize(header.rows, header.cols);
+    memcpy(value.data(), src, data_bytes);
+    return src + data_bytes;
+  }
+};
+
+template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
+class Serializer<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> > : public
+  Serializer<DenseBase<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> > > {};
+  
+template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
+class Serializer<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> > : public
+  Serializer<DenseBase<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> > > {};
+  
+namespace internal {
+ 
+// Recursive serialization implementation helper.
+template<size_t N, typename... Types>
+struct serialize_impl;
+
+template<size_t N, typename T1, typename... Ts>
+struct serialize_impl<N, T1, Ts...> {
+  using Serializer = Eigen::Serializer<typename std::decay<T1>::type>;
+  
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  size_t serialize_size(const T1& value, const Ts&... args) {
+    Serializer serializer;
+    size_t size = serializer.size(value);
+    return size + serialize_impl<N-1, Ts...>::serialize_size(args...);
+  }
+  
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  uint8_t* serialize(uint8_t* dest, uint8_t* end, const T1& value, const Ts&... args) {
+    Serializer serializer;
+    dest = serializer.serialize(dest, end, value);
+    return serialize_impl<N-1, Ts...>::serialize(dest, end, args...);
+  }
+  
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  const uint8_t* deserialize(const uint8_t* src, const uint8_t* end, T1& value, Ts&... args) {
+    Serializer serializer;
+    src = serializer.deserialize(src, end, value);
+    return serialize_impl<N-1, Ts...>::deserialize(src, end, args...);
+  }
+};
+
+// Base case.
+template<>
+struct serialize_impl<0> {
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  size_t serialize_size() { return 0; }
+  
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  uint8_t* serialize(uint8_t* dest, uint8_t* /*end*/) { return dest; }
+  
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  const uint8_t* deserialize(const uint8_t* src, const uint8_t* /*end*/) { return src; }
+};
+
+}  // namespace internal
+
+
+/**
+ * Determine the buffer size required to serialize a set of values.
+ * 
+ * \param args ... arguments to serialize in sequence.
+ * \return the total size of the required buffer.
+ */
+template<typename... Args>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+size_t serialize_size(const Args&... args) {
+  return internal::serialize_impl<sizeof...(args), Args...>::serialize_size(args...);
+}
+
+/**
+ * Serialize a set of values to the byte buffer.
+ * 
+ * \param dest output byte buffer; if this is nullptr, does nothing.
+ * \param end the end of the output byte buffer.
+ * \param args ... arguments to serialize in sequence.
+ * \return the next address after all serialized values.
+ */
+template<typename... Args>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+uint8_t* serialize(uint8_t* dest, uint8_t* end, const Args&... args) {
+  return internal::serialize_impl<sizeof...(args), Args...>::serialize(dest, end, args...);
+}
+
+/**
+ * Deserialize a set of values from the byte buffer.
+ * 
+ * \param src input byte buffer; if this is nullptr, does nothing.
+ * \param end the end of input byte buffer.
+ * \param args ... arguments to deserialize in sequence.
+ * \return the next address after all parsed values; nullptr if parsing errors are detected.
+ */
+template<typename... Args>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+const uint8_t* deserialize(const uint8_t* src, const uint8_t* end, Args&... args) {
+  return internal::serialize_impl<sizeof...(args), Args...>::deserialize(src, end, args...);
+}
+
+}  // namespace Eigen
+
+#endif // EIGEN_SERIALIZER_H
diff --git a/libs/eigen/Eigen/src/Core/util/StaticAssert.h b/libs/eigen/Eigen/src/Core/util/StaticAssert.h
index c45de59..c938eb8 100644
--- a/libs/eigen/Eigen/src/Core/util/StaticAssert.h
+++ b/libs/eigen/Eigen/src/Core/util/StaticAssert.h
@@ -16,10 +16,6 @@
  *  - in EIGEN_STATIC_ASSERT(CONDITION,MSG) the parameter CONDITION must be a compile time boolean
  *    expression, and MSG an enum listed in struct internal::static_assertion<true>
  *
- *  - define EIGEN_NO_STATIC_ASSERT to disable them (and save compilation time)
- *    in that case, the static assertion is converted to the following runtime assert:
- *      eigen_assert(CONDITION && "MSG")
- *
  *  - currently EIGEN_STATIC_ASSERT can only be used in function scope
  *
  */
@@ -27,113 +23,11 @@
 #ifndef EIGEN_STATIC_ASSERT
 #ifndef EIGEN_NO_STATIC_ASSERT
 
-  #if EIGEN_MAX_CPP_VER>=11 && (__has_feature(cxx_static_assert) || (EIGEN_COMP_CXXVER >= 11) || (EIGEN_COMP_MSVC >= 1600))
-
-    // if native static_assert is enabled, let's use it
-    #define EIGEN_STATIC_ASSERT(X,MSG) static_assert(X,#MSG);
-
-  #else // not CXX0X
-
-    namespace Eigen {
-
-    namespace internal {
-
-    template<bool condition>
-    struct static_assertion {};
-
-    template<>
-    struct static_assertion<true>
-    {
-      enum {
-        YOU_TRIED_CALLING_A_VECTOR_METHOD_ON_A_MATRIX=1,
-        YOU_MIXED_VECTORS_OF_DIFFERENT_SIZES=1,
-        YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES=1,
-        THIS_METHOD_IS_ONLY_FOR_VECTORS_OF_A_SPECIFIC_SIZE=1,
-        THIS_METHOD_IS_ONLY_FOR_MATRICES_OF_A_SPECIFIC_SIZE=1,
-        THIS_METHOD_IS_ONLY_FOR_OBJECTS_OF_A_SPECIFIC_SIZE=1,
-        OUT_OF_RANGE_ACCESS=1,
-        YOU_MADE_A_PROGRAMMING_MISTAKE=1,
-        EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT=1,
-        EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE=1,
-        YOU_CALLED_A_FIXED_SIZE_METHOD_ON_A_DYNAMIC_SIZE_MATRIX_OR_VECTOR=1,
-        YOU_CALLED_A_DYNAMIC_SIZE_METHOD_ON_A_FIXED_SIZE_MATRIX_OR_VECTOR=1,
-        UNALIGNED_LOAD_AND_STORE_OPERATIONS_UNIMPLEMENTED_ON_ALTIVEC=1,
-        THIS_FUNCTION_IS_NOT_FOR_INTEGER_NUMERIC_TYPES=1,
-        FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED=1,
-        NUMERIC_TYPE_MUST_BE_REAL=1,
-        COEFFICIENT_WRITE_ACCESS_TO_SELFADJOINT_NOT_SUPPORTED=1,
-        WRITING_TO_TRIANGULAR_PART_WITH_UNIT_DIAGONAL_IS_NOT_SUPPORTED=1,
-        THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE=1,
-        INVALID_MATRIX_PRODUCT=1,
-        INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS=1,
-        INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION=1,
-        YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY=1,
-        THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES=1,
-        THIS_METHOD_IS_ONLY_FOR_ROW_MAJOR_MATRICES=1,
-        INVALID_MATRIX_TEMPLATE_PARAMETERS=1,
-        INVALID_MATRIXBASE_TEMPLATE_PARAMETERS=1,
-        BOTH_MATRICES_MUST_HAVE_THE_SAME_STORAGE_ORDER=1,
-        THIS_METHOD_IS_ONLY_FOR_DIAGONAL_MATRIX=1,
-        THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE=1,
-        THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES=1,
-        YOU_ALREADY_SPECIFIED_THIS_STRIDE=1,
-        INVALID_STORAGE_ORDER_FOR_THIS_VECTOR_EXPRESSION=1,
-        THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD=1,
-        PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1=1,
-        THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS=1,
-        YOU_CANNOT_MIX_ARRAYS_AND_MATRICES=1,
-        YOU_PERFORMED_AN_INVALID_TRANSFORMATION_CONVERSION=1,
-        THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY=1,
-        YOU_ARE_TRYING_TO_USE_AN_INDEX_BASED_ACCESSOR_ON_AN_EXPRESSION_THAT_DOES_NOT_SUPPORT_THAT=1,
-        THIS_METHOD_IS_ONLY_FOR_1x1_EXPRESSIONS=1,
-        THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS=1,
-        THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL=1,
-        THIS_METHOD_IS_ONLY_FOR_ARRAYS_NOT_MATRICES=1,
-        YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED=1,
-        YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED=1,
-        THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE=1,
-        THE_STORAGE_ORDER_OF_BOTH_SIDES_MUST_MATCH=1,
-        OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG=1,
-        IMPLICIT_CONVERSION_TO_SCALAR_IS_FOR_INNER_PRODUCT_ONLY=1,
-        STORAGE_LAYOUT_DOES_NOT_MATCH=1,
-        EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT__INVALID_COST_VALUE=1,
-        THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS=1,
-        MATRIX_FREE_CONJUGATE_GRADIENT_IS_COMPATIBLE_WITH_UPPER_UNION_LOWER_MODE_ONLY=1,
-        THIS_TYPE_IS_NOT_SUPPORTED=1,
-        STORAGE_KIND_MUST_MATCH=1,
-        STORAGE_INDEX_MUST_MATCH=1,
-        CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY=1,
-        SELFADJOINTVIEW_ACCEPTS_UPPER_AND_LOWER_MODE_ONLY=1,
-        INVALID_TEMPLATE_PARAMETER=1,
-        GPU_TENSOR_CONTRACTION_DOES_NOT_SUPPORT_OUTPUT_KERNELS=1,
-        THE_ARRAY_SIZE_SHOULD_EQUAL_WITH_PACKET_SIZE=1
-      };
-    };
-
-    } // end namespace internal
-
-    } // end namespace Eigen
-
-    // Specialized implementation for MSVC to avoid "conditional
-    // expression is constant" warnings.  This implementation doesn't
-    // appear to work under GCC, hence the multiple implementations.
-    #if EIGEN_COMP_MSVC
-
-      #define EIGEN_STATIC_ASSERT(CONDITION,MSG) \
-        {Eigen::internal::static_assertion<bool(CONDITION)>::MSG;}
-
-    #else
-      // In some cases clang interprets bool(CONDITION) as function declaration
-      #define EIGEN_STATIC_ASSERT(CONDITION,MSG) \
-        if (Eigen::internal::static_assertion<static_cast<bool>(CONDITION)>::MSG) {}
-
-    #endif
-
-  #endif // not CXX0X
+#define EIGEN_STATIC_ASSERT(X,MSG) static_assert(X,#MSG);
 
 #else // EIGEN_NO_STATIC_ASSERT
 
-  #define EIGEN_STATIC_ASSERT(CONDITION,MSG) eigen_assert((CONDITION) && #MSG);
+#define EIGEN_STATIC_ASSERT(CONDITION,MSG)
 
 #endif // EIGEN_NO_STATIC_ASSERT
 #endif // EIGEN_STATIC_ASSERT
diff --git a/libs/eigen/Eigen/src/Core/util/SymbolicIndex.h b/libs/eigen/Eigen/src/Core/util/SymbolicIndex.h
index 354dd9a..3b19185 100644
--- a/libs/eigen/Eigen/src/Core/util/SymbolicIndex.h
+++ b/libs/eigen/Eigen/src/Core/util/SymbolicIndex.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_SYMBOLIC_INDEX_H
 #define EIGEN_SYMBOLIC_INDEX_H
 
+#include "../InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /** \namespace Eigen::symbolic
@@ -30,12 +32,11 @@ namespace Eigen {
   * // And evaluate it: (c++14)
   * std::cout << expr.eval(x=6,y=3,z=-13) << "\n";
   *
-  * // In c++98/11, only one symbol per expression is supported for now:
-  * auto expr98 = (3-x)/2;
-  * std::cout << expr98.eval(x=6) << "\n";
   * \endcode
   *
-  * It is currently only used internally to define and manipulate the Eigen::last and Eigen::lastp1 symbols in Eigen::seq and Eigen::seqN.
+  * It is currently only used internally to define and manipulate the
+  * Eigen::placeholders::last and Eigen::placeholders::lastp1 symbols in
+  * Eigen::seq and Eigen::seqN.
   *
   */
 namespace symbolic {
@@ -88,10 +89,8 @@ public:
   template<typename T>
   Index eval(const T& values) const { return derived().eval_impl(values); }
 
-#if EIGEN_HAS_CXX14
   template<typename... Types>
   Index eval(Types&&... values) const { return derived().eval_impl(std::make_tuple(values...)); }
-#endif
 
   NegateExpr<Derived> operator-() const { return NegateExpr<Derived>(derived()); }
 
@@ -139,34 +138,6 @@ public:
   friend QuotientExpr<ValueExpr<internal::FixedInt<N> >,Derived> operator/(internal::FixedInt<N>, const BaseExpr& b)
   { return QuotientExpr<ValueExpr<internal::FixedInt<N> > ,Derived>(ValueExpr<internal::FixedInt<N> >(),b.derived()); }
 
-#if (!EIGEN_HAS_CXX14)
-  template<int N>
-  AddExpr<Derived,ValueExpr<internal::FixedInt<N> > > operator+(internal::FixedInt<N> (*)()) const
-  { return AddExpr<Derived,ValueExpr<internal::FixedInt<N> > >(derived(), ValueExpr<internal::FixedInt<N> >()); }
-  template<int N>
-  AddExpr<Derived,ValueExpr<internal::FixedInt<-N> > > operator-(internal::FixedInt<N> (*)()) const
-  { return AddExpr<Derived,ValueExpr<internal::FixedInt<-N> > >(derived(), ValueExpr<internal::FixedInt<-N> >()); }
-  template<int N>
-  ProductExpr<Derived,ValueExpr<internal::FixedInt<N> > > operator*(internal::FixedInt<N> (*)()) const
-  { return ProductExpr<Derived,ValueExpr<internal::FixedInt<N> > >(derived(),ValueExpr<internal::FixedInt<N> >()); }
-  template<int N>
-  QuotientExpr<Derived,ValueExpr<internal::FixedInt<N> > > operator/(internal::FixedInt<N> (*)()) const
-  { return QuotientExpr<Derived,ValueExpr<internal::FixedInt<N> > >(derived(),ValueExpr<internal::FixedInt<N> >()); }
-
-  template<int N>
-  friend AddExpr<Derived,ValueExpr<internal::FixedInt<N> > > operator+(internal::FixedInt<N> (*)(), const BaseExpr& b)
-  { return AddExpr<Derived,ValueExpr<internal::FixedInt<N> > >(b.derived(), ValueExpr<internal::FixedInt<N> >()); }
-  template<int N>
-  friend AddExpr<NegateExpr<Derived>,ValueExpr<internal::FixedInt<N> > > operator-(internal::FixedInt<N> (*)(), const BaseExpr& b)
-  { return AddExpr<NegateExpr<Derived>,ValueExpr<internal::FixedInt<N> > >(-b.derived(), ValueExpr<internal::FixedInt<N> >()); }
-  template<int N>
-  friend ProductExpr<ValueExpr<internal::FixedInt<N> >,Derived> operator*(internal::FixedInt<N> (*)(), const BaseExpr& b)
-  { return ProductExpr<ValueExpr<internal::FixedInt<N> >,Derived>(ValueExpr<internal::FixedInt<N> >(),b.derived()); }
-  template<int N>
-  friend QuotientExpr<ValueExpr<internal::FixedInt<N> >,Derived> operator/(internal::FixedInt<N> (*)(), const BaseExpr& b)
-  { return QuotientExpr<ValueExpr<internal::FixedInt<N> > ,Derived>(ValueExpr<internal::FixedInt<N> >(),b.derived()); }
-#endif
-
 
   template<typename OtherDerived>
   AddExpr<Derived,OtherDerived> operator+(const BaseExpr<OtherDerived> &b) const
@@ -228,11 +199,9 @@ public:
 
   Index eval_impl(const SymbolValue<Tag> &values) const { return values.value(); }
 
-#if EIGEN_HAS_CXX14
   // C++14 versions suitable for multiple symbols
   template<typename... Types>
   Index eval_impl(const std::tuple<Types...>& values) const { return std::get<SymbolValue<Tag> >(values).value(); }
-#endif
 };
 
 template<typename Arg0>
diff --git a/libs/eigen/Eigen/src/Core/util/XprHelper.h b/libs/eigen/Eigen/src/Core/util/XprHelper.h
index 71c32b8..b5f91bf 100644
--- a/libs/eigen/Eigen/src/Core/util/XprHelper.h
+++ b/libs/eigen/Eigen/src/Core/util/XprHelper.h
@@ -11,16 +11,7 @@
 #ifndef EIGEN_XPRHELPER_H
 #define EIGEN_XPRHELPER_H
 
-// just a workaround because GCC seems to not really like empty structs
-// FIXME: gcc 4.3 generates bad code when strict-aliasing is enabled
-// so currently we simply disable this optimization for gcc 4.3
-#if EIGEN_COMP_GNUC && !EIGEN_GNUC_AT(4,3)
-  #define EIGEN_EMPTY_STRUCT_CTOR(X) \
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE X() {} \
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE X(const X& ) {}
-#else
-  #define EIGEN_EMPTY_STRUCT_CTOR(X)
-#endif
+#include "../InternalHeaderCheck.h"
 
 namespace Eigen {
 
@@ -37,15 +28,7 @@ inline IndexDest convert_index(const IndexSrc& idx) {
 // true if T can be considered as an integral index (i.e., and integral type or enum)
 template<typename T> struct is_valid_index_type
 {
-  enum { value =
-#if EIGEN_HAS_TYPE_TRAITS
-    internal::is_integral<T>::value || std::is_enum<T>::value
-#elif EIGEN_COMP_MSVC
-    internal::is_integral<T>::value || __is_enum(T)
-#else
-    // without C++11, we use is_convertible to Index instead of is_integral in order to treat enums as Index.
-    internal::is_convertible<T,Index>::value && !internal::is_same<T,float>::value && !is_same<T,double>::value
-#endif
+  enum { value = internal::is_integral<T>::value || std::is_enum<T>::value
   };
 };
 
@@ -119,7 +102,7 @@ class no_assignment_operator
 template<typename I1, typename I2>
 struct promote_index_type
 {
-  typedef typename conditional<(sizeof(I1)<sizeof(I2)), I2, I1>::type type;
+  typedef std::conditional_t<(sizeof(I1)<sizeof(I2)), I2, I1> type;
 };
 
 /** \internal If the template parameter Value is Dynamic, this class is just a wrapper around a T variable that
@@ -154,7 +137,6 @@ template<typename T> class variable_if_dynamic<T, Dynamic>
 template<typename T, int Value> class variable_if_dynamicindex
 {
   public:
-    EIGEN_EMPTY_STRUCT_CTOR(variable_if_dynamicindex)
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit variable_if_dynamicindex(T v) { EIGEN_ONLY_USED_FOR_DEBUG(v); eigen_assert(v == T(Value)); }
     EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
     T value() { return T(Value); }
@@ -209,83 +191,68 @@ struct find_best_packet
 };
 
 #if EIGEN_MAX_STATIC_ALIGN_BYTES>0
-template<int ArrayBytes, int AlignmentBytes,
-         bool Match     =  bool((ArrayBytes%AlignmentBytes)==0),
-         bool TryHalf   =  bool(EIGEN_MIN_ALIGN_BYTES<AlignmentBytes) >
-struct compute_default_alignment_helper
-{
-  enum { value = 0 };
-};
-
-template<int ArrayBytes, int AlignmentBytes, bool TryHalf>
-struct compute_default_alignment_helper<ArrayBytes, AlignmentBytes, true, TryHalf> // Match
-{
-  enum { value = AlignmentBytes };
-};
-
-template<int ArrayBytes, int AlignmentBytes>
-struct compute_default_alignment_helper<ArrayBytes, AlignmentBytes, false, true> // Try-half
-{
-  // current packet too large, try with an half-packet
-  enum { value = compute_default_alignment_helper<ArrayBytes, AlignmentBytes/2>::value };
-};
+constexpr inline int compute_default_alignment_helper(int ArrayBytes, int AlignmentBytes) {
+  if((ArrayBytes % AlignmentBytes) == 0) {
+    return AlignmentBytes;
+  } else if (EIGEN_MIN_ALIGN_BYTES<AlignmentBytes) {
+    return compute_default_alignment_helper(ArrayBytes, AlignmentBytes/2);
+  } else {
+    return 0;
+  }
+}
 #else
 // If static alignment is disabled, no need to bother.
-// This also avoids a division by zero in "bool Match =  bool((ArrayBytes%AlignmentBytes)==0)"
-template<int ArrayBytes, int AlignmentBytes>
-struct compute_default_alignment_helper
-{
-  enum { value = 0 };
-};
+// This also avoids a division by zero
+constexpr inline int compute_default_alignment_helper(int ArrayBytes, int AlignmentBytes) {
+  EIGEN_UNUSED_VARIABLE(ArrayBytes);
+  EIGEN_UNUSED_VARIABLE(AlignmentBytes);
+  return 0;
+}
 #endif
 
 template<typename T, int Size> struct compute_default_alignment {
-  enum { value = compute_default_alignment_helper<Size*sizeof(T),EIGEN_MAX_STATIC_ALIGN_BYTES>::value };
+  enum { value = compute_default_alignment_helper(Size*sizeof(T), EIGEN_MAX_STATIC_ALIGN_BYTES) };
 };
 
 template<typename T> struct compute_default_alignment<T,Dynamic> {
   enum { value = EIGEN_MAX_ALIGN_BYTES };
 };
 
-template<typename _Scalar, int _Rows, int _Cols,
-         int _Options = AutoAlign |
-                          ( (_Rows==1 && _Cols!=1) ? RowMajor
-                          : (_Cols==1 && _Rows!=1) ? ColMajor
+template<typename Scalar_, int Rows_, int Cols_,
+         int Options_ = AutoAlign |
+                          ( (Rows_==1 && Cols_!=1) ? RowMajor
+                          : (Cols_==1 && Rows_!=1) ? ColMajor
                           : EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION ),
-         int _MaxRows = _Rows,
-         int _MaxCols = _Cols
+         int MaxRows_ = Rows_,
+         int MaxCols_ = Cols_
 > class make_proper_matrix_type
 {
     enum {
-      IsColVector = _Cols==1 && _Rows!=1,
-      IsRowVector = _Rows==1 && _Cols!=1,
-      Options = IsColVector ? (_Options | ColMajor) & ~RowMajor
-              : IsRowVector ? (_Options | RowMajor) & ~ColMajor
-              : _Options
+      IsColVector = Cols_==1 && Rows_!=1,
+      IsRowVector = Rows_==1 && Cols_!=1,
+      Options = IsColVector ? (Options_ | ColMajor) & ~RowMajor
+              : IsRowVector ? (Options_ | RowMajor) & ~ColMajor
+              : Options_
     };
   public:
-    typedef Matrix<_Scalar, _Rows, _Cols, Options, _MaxRows, _MaxCols> type;
+    typedef Matrix<Scalar_, Rows_, Cols_, Options, MaxRows_, MaxCols_> type;
 };
 
-template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
-class compute_matrix_flags
-{
-    enum { row_major_bit = Options&RowMajor ? RowMajorBit : 0 };
-  public:
-    // FIXME currently we still have to handle DirectAccessBit at the expression level to handle DenseCoeffsBase<>
-    // and then propagate this information to the evaluator's flags.
-    // However, I (Gael) think that DirectAccessBit should only matter at the evaluation stage.
-    enum { ret = DirectAccessBit | LvalueBit | NestByRefBit | row_major_bit };
-};
+constexpr inline unsigned compute_matrix_flags(int Options) {
+  unsigned row_major_bit = Options&RowMajor ? RowMajorBit : 0;
+  // FIXME currently we still have to handle DirectAccessBit at the expression level to handle DenseCoeffsBase<>
+  // and then propagate this information to the evaluator's flags.
+  // However, I (Gael) think that DirectAccessBit should only matter at the evaluation stage.
+  return DirectAccessBit | LvalueBit | NestByRefBit | row_major_bit;
+}
 
-template<int _Rows, int _Cols> struct size_at_compile_time
-{
-  enum { ret = (_Rows==Dynamic || _Cols==Dynamic) ? Dynamic : _Rows * _Cols };
-};
+constexpr inline int size_at_compile_time(int rows, int cols) {
+  return (rows==Dynamic || cols==Dynamic) ? Dynamic : rows * cols;
+}
 
 template<typename XprType> struct size_of_xpr_at_compile_time
 {
-  enum { ret = size_at_compile_time<traits<XprType>::RowsAtCompileTime,traits<XprType>::ColsAtCompileTime>::ret };
+  enum { ret = size_at_compile_time(traits<XprType>::RowsAtCompileTime, traits<XprType>::ColsAtCompileTime) };
 };
 
 /* plain_matrix_type : the difference from eval is that plain_matrix_type is always a plain matrix type,
@@ -303,6 +270,11 @@ template<typename T> struct plain_matrix_type<T,DiagonalShape>
   typedef typename T::PlainObject type;
 };
 
+template<typename T> struct plain_matrix_type<T,SkewSymmetricShape>
+{
+  typedef typename T::PlainObject type;
+};
+
 template<typename T, int Flags> struct plain_matrix_type_dense<T,MatrixXpr,Flags>
 {
   typedef Matrix<typename traits<T>::Scalar,
@@ -349,17 +321,22 @@ template<typename T> struct eval<T,DiagonalShape>
   typedef typename plain_matrix_type<T>::type type;
 };
 
-// for matrices, no need to evaluate, just use a const reference to avoid a useless copy
-template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
-struct eval<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>, Dense>
+template<typename T> struct eval<T,SkewSymmetricShape>
 {
-  typedef const Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>& type;
+  typedef typename plain_matrix_type<T>::type type;
 };
 
-template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
-struct eval<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>, Dense>
+// for matrices, no need to evaluate, just use a const reference to avoid a useless copy
+template<typename Scalar_, int Rows_, int Cols_, int Options_, int MaxRows_, int MaxCols_>
+struct eval<Matrix<Scalar_, Rows_, Cols_, Options_, MaxRows_, MaxCols_>, Dense>
 {
-  typedef const Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>& type;
+  typedef const Matrix<Scalar_, Rows_, Cols_, Options_, MaxRows_, MaxCols_>& type;
+};
+
+template<typename Scalar_, int Rows_, int Cols_, int Options_, int MaxRows_, int MaxCols_>
+struct eval<Array<Scalar_, Rows_, Cols_, Options_, MaxRows_, MaxCols_>, Dense>
+{
+  typedef const Array<Scalar_, Rows_, Cols_, Options_, MaxRows_, MaxCols_>& type;
 };
 
 
@@ -415,28 +392,28 @@ template<typename T> struct plain_matrix_type_row_major
 template <typename T>
 struct ref_selector
 {
-  typedef typename conditional<
+  typedef std::conditional_t<
     bool(traits<T>::Flags & NestByRefBit),
     T const&,
     const T
-  >::type type;
+  > type;
 
-  typedef typename conditional<
+  typedef std::conditional_t<
     bool(traits<T>::Flags & NestByRefBit),
     T &,
     T
-  >::type non_const_type;
+  > non_const_type;
 };
 
 /** \internal Adds the const qualifier on the value-type of T2 if and only if T1 is a const type */
 template<typename T1, typename T2>
 struct transfer_constness
 {
-  typedef typename conditional<
+  typedef std::conditional_t<
     bool(internal::is_const<T1>::value),
-    typename internal::add_const_on_value_type<T2>::type,
+    add_const_on_value_type_t<T2>,
     T2
-  >::type type;
+  > type;
 };
 
 
@@ -469,7 +446,7 @@ template<typename T, int n, typename PlainObject = typename plain_object_eval<T>
     Evaluate = (int(evaluator<T>::Flags) & EvalBeforeNestingBit) || (int(CostEval) < int(CostNoEval))
   };
 
-  typedef typename conditional<Evaluate, PlainObject, typename ref_selector<T>::type>::type type;
+  typedef std::conditional_t<Evaluate, PlainObject, typename ref_selector<T>::type> type;
 };
 
 template<typename T>
@@ -509,10 +486,10 @@ struct generic_xpr_base<Derived, XprKind, Dense>
 template<typename XprType, typename CastType> struct cast_return_type
 {
   typedef typename XprType::Scalar CurrentScalarType;
-  typedef typename remove_all<CastType>::type _CastType;
-  typedef typename _CastType::Scalar NewScalarType;
-  typedef typename conditional<is_same<CurrentScalarType,NewScalarType>::value,
-                              const XprType&,CastType>::type type;
+  typedef remove_all_t<CastType> CastType_;
+  typedef typename CastType_::Scalar NewScalarType;
+  typedef std::conditional_t<is_same<CurrentScalarType,NewScalarType>::value,
+                              const XprType&,CastType> type;
 };
 
 template <typename A, typename B> struct promote_storage_type;
@@ -587,6 +564,12 @@ template <typename B, int ProductTag> struct product_promote_storage_type<Diagon
 template <int ProductTag>             struct product_promote_storage_type<Dense,              DiagonalShape,      ProductTag> { typedef Dense ret; };
 template <int ProductTag>             struct product_promote_storage_type<DiagonalShape,      Dense,              ProductTag> { typedef Dense ret; };
 
+template <typename A, int ProductTag> struct product_promote_storage_type<A,                  SkewSymmetricShape, ProductTag> { typedef A ret; };
+template <typename B, int ProductTag> struct product_promote_storage_type<SkewSymmetricShape, B,                  ProductTag> { typedef B ret; };
+template <int ProductTag>             struct product_promote_storage_type<Dense,              SkewSymmetricShape, ProductTag> { typedef Dense ret; };
+template <int ProductTag>             struct product_promote_storage_type<SkewSymmetricShape, Dense,              ProductTag> { typedef Dense ret; };
+template <int ProductTag>             struct product_promote_storage_type<SkewSymmetricShape, SkewSymmetricShape, ProductTag> { typedef Dense ret; };
+
 template <typename A, int ProductTag> struct product_promote_storage_type<A,                  PermutationStorage, ProductTag> { typedef A ret; };
 template <typename B, int ProductTag> struct product_promote_storage_type<PermutationStorage, B,                  ProductTag> { typedef B ret; };
 template <int ProductTag>             struct product_promote_storage_type<Dense,              PermutationStorage, ProductTag> { typedef Dense ret; };
@@ -603,11 +586,11 @@ struct plain_row_type
   typedef Array<Scalar, 1, ExpressionType::ColsAtCompileTime,
                  int(ExpressionType::PlainObject::Options) | int(RowMajor), 1, ExpressionType::MaxColsAtCompileTime> ArrayRowType;
 
-  typedef typename conditional<
+  typedef std::conditional_t<
     is_same< typename traits<ExpressionType>::XprKind, MatrixXpr >::value,
     MatrixRowType,
     ArrayRowType
-  >::type type;
+  > type;
 };
 
 template<typename ExpressionType, typename Scalar = typename ExpressionType::Scalar>
@@ -618,27 +601,28 @@ struct plain_col_type
   typedef Array<Scalar, ExpressionType::RowsAtCompileTime, 1,
                  ExpressionType::PlainObject::Options & ~RowMajor, ExpressionType::MaxRowsAtCompileTime, 1> ArrayColType;
 
-  typedef typename conditional<
+  typedef std::conditional_t<
     is_same< typename traits<ExpressionType>::XprKind, MatrixXpr >::value,
     MatrixColType,
     ArrayColType
-  >::type type;
+  > type;
 };
 
 template<typename ExpressionType, typename Scalar = typename ExpressionType::Scalar>
 struct plain_diag_type
 {
-  enum { diag_size = EIGEN_SIZE_MIN_PREFER_DYNAMIC(ExpressionType::RowsAtCompileTime, ExpressionType::ColsAtCompileTime),
-         max_diag_size = EIGEN_SIZE_MIN_PREFER_FIXED(ExpressionType::MaxRowsAtCompileTime, ExpressionType::MaxColsAtCompileTime)
+  enum { diag_size = internal::min_size_prefer_dynamic(ExpressionType::RowsAtCompileTime, ExpressionType::ColsAtCompileTime),
+         max_diag_size = min_size_prefer_fixed(ExpressionType::MaxRowsAtCompileTime,
+                                               ExpressionType::MaxColsAtCompileTime)
   };
   typedef Matrix<Scalar, diag_size, 1, ExpressionType::PlainObject::Options & ~RowMajor, max_diag_size, 1> MatrixDiagType;
   typedef Array<Scalar, diag_size, 1, ExpressionType::PlainObject::Options & ~RowMajor, max_diag_size, 1> ArrayDiagType;
 
-  typedef typename conditional<
+  typedef std::conditional_t<
     is_same< typename traits<ExpressionType>::XprKind, MatrixXpr >::value,
     MatrixDiagType,
     ArrayDiagType
-  >::type type;
+  > type;
 };
 
 template<typename Expr,typename Scalar = typename Expr::Scalar>
@@ -652,7 +636,7 @@ struct plain_constant_type
   typedef Matrix<Scalar,  traits<Expr>::RowsAtCompileTime,   traits<Expr>::ColsAtCompileTime,
                  Options, traits<Expr>::MaxRowsAtCompileTime,traits<Expr>::MaxColsAtCompileTime> matrix_type;
 
-  typedef CwiseNullaryOp<scalar_constant_op<Scalar>, const typename conditional<is_same< typename traits<Expr>::XprKind, MatrixXpr >::value, matrix_type, array_type>::type > type;
+  typedef CwiseNullaryOp<scalar_constant_op<Scalar>, const std::conditional_t<is_same< typename traits<Expr>::XprKind, MatrixXpr >::value, matrix_type, array_type> > type;
 };
 
 template<typename ExpressionType>
@@ -692,14 +676,14 @@ struct possibly_same_dense {
 
 template<typename T1, typename T2>
 EIGEN_DEVICE_FUNC
-bool is_same_dense(const T1 &mat1, const T2 &mat2, typename enable_if<possibly_same_dense<T1,T2>::value>::type * = 0)
+bool is_same_dense(const T1 &mat1, const T2 &mat2, std::enable_if_t<possibly_same_dense<T1,T2>::value> * = 0)
 {
   return (mat1.data()==mat2.data()) && (mat1.innerStride()==mat2.innerStride()) && (mat1.outerStride()==mat2.outerStride());
 }
 
 template<typename T1, typename T2>
 EIGEN_DEVICE_FUNC
-bool is_same_dense(const T1 &, const T2 &, typename enable_if<!possibly_same_dense<T1,T2>::value>::type * = 0)
+bool is_same_dense(const T1 &, const T2 &, std::enable_if_t<!possibly_same_dense<T1,T2>::value> * = 0)
 {
   return false;
 }
@@ -721,9 +705,9 @@ struct scalar_div_cost<std::complex<T>, Vectorized> {
 
 
 template<bool Vectorized>
-struct scalar_div_cost<signed long,Vectorized,typename conditional<sizeof(long)==8,void,false_type>::type> { enum { value = 24 }; };
+struct scalar_div_cost<signed long,Vectorized, std::conditional_t<sizeof(long)==8,void,false_type>> { enum { value = 24 }; };
 template<bool Vectorized>
-struct scalar_div_cost<unsigned long,Vectorized,typename conditional<sizeof(long)==8,void,false_type>::type> { enum { value = 21 }; };
+struct scalar_div_cost<unsigned long,Vectorized, std::conditional_t<sizeof(long)==8,void,false_type>> { enum { value = 21 }; };
 
 
 #ifdef EIGEN_DEBUG_ASSIGN
@@ -812,12 +796,12 @@ struct ScalarBinaryOpTraits<T,T,BinaryOp>
 };
 
 template <typename T, typename BinaryOp>
-struct ScalarBinaryOpTraits<T, typename NumTraits<typename internal::enable_if<NumTraits<T>::IsComplex,T>::type>::Real, BinaryOp>
+struct ScalarBinaryOpTraits<T, typename NumTraits<std::enable_if_t<NumTraits<T>::IsComplex,T>>::Real, BinaryOp>
 {
   typedef T ReturnType;
 };
 template <typename T, typename BinaryOp>
-struct ScalarBinaryOpTraits<typename NumTraits<typename internal::enable_if<NumTraits<T>::IsComplex,T>::type>::Real, T, BinaryOp>
+struct ScalarBinaryOpTraits<typename NumTraits<std::enable_if_t<NumTraits<T>::IsComplex,T>>::Real, T, BinaryOp>
 {
   typedef T ReturnType;
 };
diff --git a/libs/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h b/libs/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h
index 081e918..1cfc0ca 100644
--- a/libs/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h
+++ b/libs/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h
@@ -14,6 +14,8 @@
 
 #include "./ComplexSchur.h"
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 /** \eigenvalues_module \ingroup Eigenvalues_Module
@@ -23,7 +25,7 @@ namespace Eigen {
   *
   * \brief Computes eigenvalues and eigenvectors of general complex matrices
   *
-  * \tparam _MatrixType the type of the matrix of which we are
+  * \tparam MatrixType_ the type of the matrix of which we are
   * computing the eigendecomposition; this is expected to be an
   * instantiation of the Matrix class template.
   *
@@ -42,12 +44,12 @@ namespace Eigen {
   *
   * \sa class EigenSolver, class SelfAdjointEigenSolver
   */
-template<typename _MatrixType> class ComplexEigenSolver
+template<typename MatrixType_> class ComplexEigenSolver
 {
   public:
 
-    /** \brief Synonym for the template parameter \p _MatrixType. */
-    typedef _MatrixType MatrixType;
+    /** \brief Synonym for the template parameter \p MatrixType_. */
+    typedef MatrixType_ MatrixType;
 
     enum {
       RowsAtCompileTime = MatrixType::RowsAtCompileTime,
@@ -236,12 +238,9 @@ template<typename _MatrixType> class ComplexEigenSolver
     }
 
   protected:
-    
-    static void check_template_parameters()
-    {
-      EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
-    }
-    
+
+    EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
+
     EigenvectorType m_eivec;
     EigenvalueType m_eivalues;
     ComplexSchur<MatrixType> m_schur;
@@ -260,8 +259,6 @@ template<typename InputType>
 ComplexEigenSolver<MatrixType>& 
 ComplexEigenSolver<MatrixType>::compute(const EigenBase<InputType>& matrix, bool computeEigenvectors)
 {
-  check_template_parameters();
-  
   // this code is inspired from Jampack
   eigen_assert(matrix.cols() == matrix.rows());
 
diff --git a/libs/eigen/Eigen/src/Eigenvalues/ComplexSchur.h b/libs/eigen/Eigen/src/Eigenvalues/ComplexSchur.h
index fc71468..80a28fb 100644
--- a/libs/eigen/Eigen/src/Eigenvalues/ComplexSchur.h
+++ b/libs/eigen/Eigen/src/Eigenvalues/ComplexSchur.h
@@ -14,6 +14,8 @@
 
 #include "./HessenbergDecomposition.h"
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
@@ -27,7 +29,7 @@ template<typename MatrixType, bool IsComplex> struct complex_schur_reduce_to_hes
   *
   * \brief Performs a complex Schur decomposition of a real or complex square matrix
   *
-  * \tparam _MatrixType the type of the matrix of which we are
+  * \tparam MatrixType_ the type of the matrix of which we are
   * computing the Schur decomposition; this is expected to be an
   * instantiation of the Matrix class template.
   *
@@ -48,10 +50,10 @@ template<typename MatrixType, bool IsComplex> struct complex_schur_reduce_to_hes
   *
   * \sa class RealSchur, class EigenSolver, class ComplexEigenSolver
   */
-template<typename _MatrixType> class ComplexSchur
+template<typename MatrixType_> class ComplexSchur
 {
   public:
-    typedef _MatrixType MatrixType;
+    typedef MatrixType_ MatrixType;
     enum {
       RowsAtCompileTime = MatrixType::RowsAtCompileTime,
       ColsAtCompileTime = MatrixType::ColsAtCompileTime,
@@ -60,12 +62,12 @@ template<typename _MatrixType> class ComplexSchur
       MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
     };
 
-    /** \brief Scalar type for matrices of type \p _MatrixType. */
+    /** \brief Scalar type for matrices of type \p MatrixType_. */
     typedef typename MatrixType::Scalar Scalar;
     typedef typename NumTraits<Scalar>::Real RealScalar;
     typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3
 
-    /** \brief Complex scalar type for \p _MatrixType. 
+    /** \brief Complex scalar type for \p MatrixType_.
       *
       * This is \c std::complex<Scalar> if #Scalar is real (e.g.,
       * \c float or \c double) and just \c Scalar if #Scalar is
@@ -76,7 +78,7 @@ template<typename _MatrixType> class ComplexSchur
     /** \brief Type for the matrices in the Schur decomposition.
       *
       * This is a square matrix with entries of type #ComplexScalar. 
-      * The size is the same as the size of \p _MatrixType.
+      * The size is the same as the size of \p MatrixType_.
       */
     typedef Matrix<ComplexScalar, RowsAtCompileTime, ColsAtCompileTime, Options, MaxRowsAtCompileTime, MaxColsAtCompileTime> ComplexMatrixType;
 
@@ -259,7 +261,7 @@ template<typename _MatrixType> class ComplexSchur
     friend struct internal::complex_schur_reduce_to_hessenberg<MatrixType, NumTraits<Scalar>::IsComplex>;
 };
 
-/** If m_matT(i+1,i) is neglegible in floating point arithmetic
+/** If m_matT(i+1,i) is negligible in floating point arithmetic
   * compared to m_matT(i,i) and m_matT(j,j), then set it to zero and
   * return true, else return false. */
 template<typename MatrixType>
@@ -306,7 +308,7 @@ typename ComplexSchur<MatrixType>::ComplexScalar ComplexSchur<MatrixType>::compu
   // In this case, det==0, and all we have to do is checking that eival2_norm!=0
   if(eival1_norm > eival2_norm)
     eival2 = det / eival1;
-  else if(eival2_norm!=RealScalar(0))
+  else if(!numext::is_exactly_zero(eival2_norm))
     eival1 = det / eival2;
 
   // choose the eigenvalue closest to the bottom entry of the diagonal
diff --git a/libs/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h b/libs/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h
index 4980a3e..144eb2a 100644
--- a/libs/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h
+++ b/libs/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h
@@ -33,6 +33,8 @@
 #ifndef EIGEN_COMPLEX_SCHUR_LAPACKE_H
 #define EIGEN_COMPLEX_SCHUR_LAPACKE_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 /** \internal Specialization for the data types supported by LAPACKe */
diff --git a/libs/eigen/Eigen/src/Eigenvalues/EigenSolver.h b/libs/eigen/Eigen/src/Eigenvalues/EigenSolver.h
index 572b29e..f6ff140 100644
--- a/libs/eigen/Eigen/src/Eigenvalues/EigenSolver.h
+++ b/libs/eigen/Eigen/src/Eigenvalues/EigenSolver.h
@@ -13,6 +13,8 @@
 
 #include "./RealSchur.h"
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 /** \eigenvalues_module \ingroup Eigenvalues_Module
@@ -22,7 +24,7 @@ namespace Eigen {
   *
   * \brief Computes eigenvalues and eigenvectors of general matrices
   *
-  * \tparam _MatrixType the type of the matrix of which we are computing the
+  * \tparam MatrixType_ the type of the matrix of which we are computing the
   * eigendecomposition; this is expected to be an instantiation of the Matrix
   * class template. Currently, only real matrices are supported.
   *
@@ -61,12 +63,12 @@ namespace Eigen {
   *
   * \sa MatrixBase::eigenvalues(), class ComplexEigenSolver, class SelfAdjointEigenSolver
   */
-template<typename _MatrixType> class EigenSolver
+template<typename MatrixType_> class EigenSolver
 {
   public:
 
-    /** \brief Synonym for the template parameter \p _MatrixType. */
-    typedef _MatrixType MatrixType;
+    /** \brief Synonym for the template parameter \p MatrixType_. */
+    typedef MatrixType_ MatrixType;
 
     enum {
       RowsAtCompileTime = MatrixType::RowsAtCompileTime,
diff --git a/libs/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h b/libs/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h
index 87d789b..d62c411 100644
--- a/libs/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h
+++ b/libs/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h
@@ -14,6 +14,8 @@
 
 #include "./RealQZ.h"
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 /** \eigenvalues_module \ingroup Eigenvalues_Module
@@ -23,7 +25,7 @@ namespace Eigen {
   *
   * \brief Computes the generalized eigenvalues and eigenvectors of a pair of general matrices
   *
-  * \tparam _MatrixType the type of the matrices of which we are computing the
+  * \tparam MatrixType_ the type of the matrices of which we are computing the
   * eigen-decomposition; this is expected to be an instantiation of the Matrix
   * class template. Currently, only real matrices are supported.
   *
@@ -55,12 +57,12 @@ namespace Eigen {
   *
   * \sa MatrixBase::eigenvalues(), class ComplexEigenSolver, class SelfAdjointEigenSolver
   */
-template<typename _MatrixType> class GeneralizedEigenSolver
+template<typename MatrixType_> class GeneralizedEigenSolver
 {
   public:
 
-    /** \brief Synonym for the template parameter \p _MatrixType. */
-    typedef _MatrixType MatrixType;
+    /** \brief Synonym for the template parameter \p MatrixType_. */
+    typedef MatrixType_ MatrixType;
 
     enum {
       RowsAtCompileTime = MatrixType::RowsAtCompileTime,
@@ -119,8 +121,8 @@ template<typename _MatrixType> class GeneralizedEigenSolver
       : m_eivec(),
         m_alphas(),
         m_betas(),
-        m_valuesOkay(false),
-        m_vectorsOkay(false),
+        m_computeEigenvectors(false),
+        m_isInitialized(false),
         m_realQZ()
     {}
 
@@ -134,8 +136,8 @@ template<typename _MatrixType> class GeneralizedEigenSolver
       : m_eivec(size, size),
         m_alphas(size),
         m_betas(size),
-        m_valuesOkay(false),
-        m_vectorsOkay(false),
+        m_computeEigenvectors(false),
+        m_isInitialized(false),
         m_realQZ(size),
         m_tmp(size)
     {}
@@ -156,8 +158,8 @@ template<typename _MatrixType> class GeneralizedEigenSolver
       : m_eivec(A.rows(), A.cols()),
         m_alphas(A.cols()),
         m_betas(A.cols()),
-        m_valuesOkay(false),
-        m_vectorsOkay(false),
+        m_computeEigenvectors(false),
+        m_isInitialized(false),
         m_realQZ(A.cols()),
         m_tmp(A.cols())
     {
@@ -177,7 +179,8 @@ template<typename _MatrixType> class GeneralizedEigenSolver
       * \sa eigenvalues()
       */
     EigenvectorsType eigenvectors() const {
-      eigen_assert(m_vectorsOkay && "Eigenvectors for GeneralizedEigenSolver were not calculated.");
+      eigen_assert(info() == Success && "GeneralizedEigenSolver failed to compute eigenvectors");
+      eigen_assert(m_computeEigenvectors && "Eigenvectors for GeneralizedEigenSolver were not calculated");
       return m_eivec;
     }
 
@@ -201,7 +204,7 @@ template<typename _MatrixType> class GeneralizedEigenSolver
       */
     EigenvalueType eigenvalues() const
     {
-      eigen_assert(m_valuesOkay && "GeneralizedEigenSolver is not initialized.");
+      eigen_assert(info() == Success && "GeneralizedEigenSolver failed to compute eigenvalues.");
       return EigenvalueType(m_alphas,m_betas);
     }
 
@@ -210,9 +213,9 @@ template<typename _MatrixType> class GeneralizedEigenSolver
       * This vector permits to reconstruct the j-th eigenvalues as alphas(i)/betas(j).
       *
       * \sa betas(), eigenvalues() */
-    ComplexVectorType alphas() const
+    const ComplexVectorType& alphas() const
     {
-      eigen_assert(m_valuesOkay && "GeneralizedEigenSolver is not initialized.");
+      eigen_assert(info() == Success && "GeneralizedEigenSolver failed to compute alphas.");
       return m_alphas;
     }
 
@@ -221,9 +224,9 @@ template<typename _MatrixType> class GeneralizedEigenSolver
       * This vector permits to reconstruct the j-th eigenvalues as alphas(i)/betas(j).
       *
       * \sa alphas(), eigenvalues() */
-    VectorType betas() const
+    const VectorType& betas() const
     {
-      eigen_assert(m_valuesOkay && "GeneralizedEigenSolver is not initialized.");
+      eigen_assert(info() == Success && "GeneralizedEigenSolver failed to compute betas.");
       return m_betas;
     }
 
@@ -254,7 +257,7 @@ template<typename _MatrixType> class GeneralizedEigenSolver
 
     ComputationInfo info() const
     {
-      eigen_assert(m_valuesOkay && "EigenSolver is not initialized.");
+      eigen_assert(m_isInitialized && "EigenSolver is not initialized.");
       return m_realQZ.info();
     }
 
@@ -267,17 +270,15 @@ template<typename _MatrixType> class GeneralizedEigenSolver
     }
 
   protected:
-    
-    static void check_template_parameters()
-    {
-      EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
-      EIGEN_STATIC_ASSERT(!NumTraits<Scalar>::IsComplex, NUMERIC_TYPE_MUST_BE_REAL);
-    }
-    
+
+    EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
+    EIGEN_STATIC_ASSERT(!NumTraits<Scalar>::IsComplex, NUMERIC_TYPE_MUST_BE_REAL)
+
     EigenvectorsType m_eivec;
     ComplexVectorType m_alphas;
     VectorType m_betas;
-    bool m_valuesOkay, m_vectorsOkay;
+    bool m_computeEigenvectors;
+    bool m_isInitialized;
     RealQZ<MatrixType> m_realQZ;
     ComplexVectorType m_tmp;
 };
@@ -286,14 +287,10 @@ template<typename MatrixType>
 GeneralizedEigenSolver<MatrixType>&
 GeneralizedEigenSolver<MatrixType>::compute(const MatrixType& A, const MatrixType& B, bool computeEigenvectors)
 {
-  check_template_parameters();
-  
   using std::sqrt;
   using std::abs;
   eigen_assert(A.cols() == A.rows() && B.cols() == A.rows() && B.cols() == B.rows());
   Index size = A.cols();
-  m_valuesOkay = false;
-  m_vectorsOkay = false;
   // Reduce to generalized real Schur form:
   // A = Q S Z and B = Q T Z
   m_realQZ.compute(A, B, computeEigenvectors);
@@ -406,10 +403,9 @@ GeneralizedEigenSolver<MatrixType>::compute(const MatrixType& A, const MatrixTyp
         i += 2;
       }
     }
-
-    m_valuesOkay = true;
-    m_vectorsOkay = computeEigenvectors;
   }
+  m_computeEigenvectors = computeEigenvectors;
+  m_isInitialized = true;
   return *this;
 }
 
diff --git a/libs/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h b/libs/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h
index d0f9091..dab66ca 100644
--- a/libs/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h
+++ b/libs/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h
@@ -13,6 +13,8 @@
 
 #include "./Tridiagonalization.h"
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 /** \eigenvalues_module \ingroup Eigenvalues_Module
@@ -22,7 +24,7 @@ namespace Eigen {
   *
   * \brief Computes eigenvalues and eigenvectors of the generalized selfadjoint eigen problem
   *
-  * \tparam _MatrixType the type of the matrix of which we are computing the
+  * \tparam MatrixType_ the type of the matrix of which we are computing the
   * eigendecomposition; this is expected to be an instantiation of the Matrix
   * class template.
   *
@@ -44,19 +46,19 @@ namespace Eigen {
   *
   * \sa class SelfAdjointEigenSolver, class EigenSolver, class ComplexEigenSolver
   */
-template<typename _MatrixType>
-class GeneralizedSelfAdjointEigenSolver : public SelfAdjointEigenSolver<_MatrixType>
+template<typename MatrixType_>
+class GeneralizedSelfAdjointEigenSolver : public SelfAdjointEigenSolver<MatrixType_>
 {
-    typedef SelfAdjointEigenSolver<_MatrixType> Base;
+    typedef SelfAdjointEigenSolver<MatrixType_> Base;
   public:
 
-    typedef _MatrixType MatrixType;
+    typedef MatrixType_ MatrixType;
 
     /** \brief Default constructor for fixed-size matrices.
       *
       * The default constructor is useful in cases in which the user intends to
       * perform decompositions via compute(). This constructor
-      * can only be used if \p _MatrixType is a fixed-size matrix; use
+      * can only be used if \p MatrixType_ is a fixed-size matrix; use
       * GeneralizedSelfAdjointEigenSolver(Index) for dynamic-size matrices.
       */
     GeneralizedSelfAdjointEigenSolver() : Base() {}
diff --git a/libs/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h b/libs/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h
index 1f21139..fafab99 100644
--- a/libs/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h
+++ b/libs/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h
@@ -11,6 +11,8 @@
 #ifndef EIGEN_HESSENBERGDECOMPOSITION_H
 #define EIGEN_HESSENBERGDECOMPOSITION_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
@@ -31,7 +33,7 @@ struct traits<HessenbergDecompositionMatrixHReturnType<MatrixType> >
   *
   * \brief Reduces a square matrix to Hessenberg form by an orthogonal similarity transformation
   *
-  * \tparam _MatrixType the type of the matrix of which we are computing the Hessenberg decomposition
+  * \tparam MatrixType_ the type of the matrix of which we are computing the Hessenberg decomposition
   *
   * This class performs an Hessenberg decomposition of a matrix \f$ A \f$. In
   * the real case, the Hessenberg decomposition consists of an orthogonal
@@ -54,12 +56,12 @@ struct traits<HessenbergDecompositionMatrixHReturnType<MatrixType> >
   *
   * \sa class ComplexSchur, class Tridiagonalization, \ref QR_Module "QR Module"
   */
-template<typename _MatrixType> class HessenbergDecomposition
+template<typename MatrixType_> class HessenbergDecomposition
 {
   public:
 
-    /** \brief Synonym for the template parameter \p _MatrixType. */
-    typedef _MatrixType MatrixType;
+    /** \brief Synonym for the template parameter \p MatrixType_. */
+    typedef MatrixType_ MatrixType;
 
     enum {
       Size = MatrixType::RowsAtCompileTime,
@@ -82,7 +84,7 @@ template<typename _MatrixType> class HessenbergDecomposition
     typedef Matrix<Scalar, SizeMinusOne, 1, Options & ~RowMajor, MaxSizeMinusOne, 1> CoeffVectorType;
 
     /** \brief Return type of matrixQ() */
-    typedef HouseholderSequence<MatrixType,typename internal::remove_all<typename CoeffVectorType::ConjugateReturnType>::type> HouseholderSequenceType;
+    typedef HouseholderSequence<MatrixType,internal::remove_all_t<typename CoeffVectorType::ConjugateReturnType>> HouseholderSequenceType;
     
     typedef internal::HessenbergDecompositionMatrixHReturnType<MatrixType> MatrixHReturnType;
 
diff --git a/libs/eigen/Eigen/src/Eigenvalues/InternalHeaderCheck.h b/libs/eigen/Eigen/src/Eigenvalues/InternalHeaderCheck.h
new file mode 100644
index 0000000..374cbd4
--- /dev/null
+++ b/libs/eigen/Eigen/src/Eigenvalues/InternalHeaderCheck.h
@@ -0,0 +1,3 @@
+#ifndef EIGEN_EIGENVALUES_MODULE_H
+#error "Please include Eigen/Eigenvalues instead of including headers inside the src directory directly."
+#endif
diff --git a/libs/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h b/libs/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h
index 66e5a3d..c8df260 100644
--- a/libs/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h
+++ b/libs/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h
@@ -11,6 +11,8 @@
 #ifndef EIGEN_MATRIXBASEEIGENVALUES_H
 #define EIGEN_MATRIXBASEEIGENVALUES_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
diff --git a/libs/eigen/Eigen/src/Eigenvalues/RealQZ.h b/libs/eigen/Eigen/src/Eigenvalues/RealQZ.h
index 5091301..545918f 100644
--- a/libs/eigen/Eigen/src/Eigenvalues/RealQZ.h
+++ b/libs/eigen/Eigen/src/Eigenvalues/RealQZ.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_REAL_QZ_H
 #define EIGEN_REAL_QZ_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
   /** \eigenvalues_module \ingroup Eigenvalues_Module
@@ -19,7 +21,7 @@ namespace Eigen {
    *
    * \brief Performs a real QZ decomposition of a pair of square matrices
    *
-   * \tparam _MatrixType the type of the matrix of which we are computing the
+   * \tparam MatrixType_ the type of the matrix of which we are computing the
    * real QZ decomposition; this is expected to be an instantiation of the
    * Matrix class template.
    *
@@ -54,10 +56,10 @@ namespace Eigen {
    * \sa class RealSchur, class ComplexSchur, class EigenSolver, class ComplexEigenSolver
    */
 
-  template<typename _MatrixType> class RealQZ
+  template<typename MatrixType_> class RealQZ
   {
     public:
-      typedef _MatrixType MatrixType;
+      typedef MatrixType_ MatrixType;
       enum {
         RowsAtCompileTime = MatrixType::RowsAtCompileTime,
         ColsAtCompileTime = MatrixType::ColsAtCompileTime,
@@ -237,7 +239,7 @@ namespace Eigen {
         for (Index i=dim-1; i>=j+2; i--) {
           JRs G;
           // kill S(i,j)
-          if(m_S.coeff(i,j) != 0)
+          if(!numext::is_exactly_zero(m_S.coeff(i, j)))
           {
             G.makeGivens(m_S.coeff(i-1,j), m_S.coeff(i,j), &m_S.coeffRef(i-1, j));
             m_S.coeffRef(i,j) = Scalar(0.0);
@@ -248,7 +250,7 @@ namespace Eigen {
               m_Q.applyOnTheRight(i-1,i,G);
           }
           // kill T(i,i-1)
-          if(m_T.coeff(i,i-1)!=Scalar(0))
+          if(!numext::is_exactly_zero(m_T.coeff(i, i - 1)))
           {
             G.makeGivens(m_T.coeff(i,i), m_T.coeff(i,i-1), &m_T.coeffRef(i,i));
             m_T.coeffRef(i,i-1) = Scalar(0.0);
@@ -286,7 +288,7 @@ namespace Eigen {
       while (res > 0)
       {
         Scalar s = abs(m_S.coeff(res-1,res-1)) + abs(m_S.coeff(res,res));
-        if (s == Scalar(0.0))
+        if (numext::is_exactly_zero(s))
           s = m_normOfS;
         if (abs(m_S.coeff(res,res-1)) < NumTraits<Scalar>::epsilon() * s)
           break;
@@ -316,7 +318,7 @@ namespace Eigen {
       using std::abs;
       using std::sqrt;
       const Index dim=m_S.cols();
-      if (abs(m_S.coeff(i+1,i))==Scalar(0))
+      if (numext::is_exactly_zero(abs(m_S.coeff(i + 1, i))))
         return;
       Index j = findSmallDiagEntry(i,i+1);
       if (j==i-1)
@@ -627,7 +629,7 @@ namespace Eigen {
       {
         for(Index i=0; i<dim-1; ++i)
         {
-          if(m_S.coeff(i+1, i) != Scalar(0))
+          if(!numext::is_exactly_zero(m_S.coeff(i + 1, i)))
           {
             JacobiRotation<Scalar> j_left, j_right;
             internal::real_2x2_jacobi_svd(m_T, i, i+1, &j_left, &j_right);
diff --git a/libs/eigen/Eigen/src/Eigenvalues/RealSchur.h b/libs/eigen/Eigen/src/Eigenvalues/RealSchur.h
index 7304ef3..9817666 100644
--- a/libs/eigen/Eigen/src/Eigenvalues/RealSchur.h
+++ b/libs/eigen/Eigen/src/Eigenvalues/RealSchur.h
@@ -13,6 +13,8 @@
 
 #include "./HessenbergDecomposition.h"
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 /** \eigenvalues_module \ingroup Eigenvalues_Module
@@ -22,7 +24,7 @@ namespace Eigen {
   *
   * \brief Performs a real Schur decomposition of a square matrix
   *
-  * \tparam _MatrixType the type of the matrix of which we are computing the
+  * \tparam MatrixType_ the type of the matrix of which we are computing the
   * real Schur decomposition; this is expected to be an instantiation of the
   * Matrix class template.
   *
@@ -51,10 +53,10 @@ namespace Eigen {
   *
   * \sa class ComplexSchur, class EigenSolver, class ComplexEigenSolver
   */
-template<typename _MatrixType> class RealSchur
+template<typename MatrixType_> class RealSchur
 {
   public:
-    typedef _MatrixType MatrixType;
+    typedef MatrixType_ MatrixType;
     enum {
       RowsAtCompileTime = MatrixType::RowsAtCompileTime,
       ColsAtCompileTime = MatrixType::ColsAtCompileTime,
@@ -312,7 +314,7 @@ RealSchur<MatrixType>& RealSchur<MatrixType>::computeFromHessenberg(const HessMa
   Scalar considerAsZero = numext::maxi<Scalar>( norm * numext::abs2(NumTraits<Scalar>::epsilon()),
                                                 (std::numeric_limits<Scalar>::min)() );
 
-  if(norm!=Scalar(0))
+  if(!numext::is_exactly_zero(norm))
   {
     while (iu >= 0)
     {
@@ -515,7 +517,7 @@ inline void RealSchur<MatrixType>::performFrancisQRStep(Index il, Index im, Inde
     Matrix<Scalar, 2, 1> ess;
     v.makeHouseholder(ess, tau, beta);
     
-    if (beta != Scalar(0)) // if v is not zero
+    if (!numext::is_exactly_zero(beta)) // if v is not zero
     {
       if (firstIteration && k > il)
         m_matT.coeffRef(k,k-1) = -m_matT.coeff(k,k-1);
@@ -535,7 +537,7 @@ inline void RealSchur<MatrixType>::performFrancisQRStep(Index il, Index im, Inde
   Matrix<Scalar, 1, 1> ess;
   v.makeHouseholder(ess, tau, beta);
 
-  if (beta != Scalar(0)) // if v is not zero
+  if (!numext::is_exactly_zero(beta)) // if v is not zero
   {
     m_matT.coeffRef(iu-1, iu-2) = beta;
     m_matT.block(iu-1, iu-1, 2, size-iu+1).applyHouseholderOnTheLeft(ess, tau, workspace);
diff --git a/libs/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h b/libs/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h
index 2c22517..0a6ed21 100644
--- a/libs/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h
+++ b/libs/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h
@@ -33,6 +33,8 @@
 #ifndef EIGEN_REAL_SCHUR_LAPACKE_H
 #define EIGEN_REAL_SCHUR_LAPACKE_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 /** \internal Specialization for the data types supported by LAPACKe */
diff --git a/libs/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h b/libs/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h
index 1469236..d196ec0 100644
--- a/libs/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h
+++ b/libs/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h
@@ -13,9 +13,11 @@
 
 #include "./Tridiagonalization.h"
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
-template<typename _MatrixType>
+template<typename MatrixType_>
 class GeneralizedSelfAdjointEigenSolver;
 
 namespace internal {
@@ -33,7 +35,7 @@ ComputationInfo computeFromTridiagonal_impl(DiagType& diag, SubDiagType& subdiag
   *
   * \brief Computes eigenvalues and eigenvectors of selfadjoint matrices
   *
-  * \tparam _MatrixType the type of the matrix of which we are computing the
+  * \tparam MatrixType_ the type of the matrix of which we are computing the
   * eigendecomposition; this is expected to be an instantiation of the Matrix
   * class template.
   *
@@ -73,11 +75,11 @@ ComputationInfo computeFromTridiagonal_impl(DiagType& diag, SubDiagType& subdiag
   *
   * \sa MatrixBase::eigenvalues(), class EigenSolver, class ComplexEigenSolver
   */
-template<typename _MatrixType> class SelfAdjointEigenSolver
+template<typename MatrixType_> class SelfAdjointEigenSolver
 {
   public:
 
-    typedef _MatrixType MatrixType;
+    typedef MatrixType_ MatrixType;
     enum {
       Size = MatrixType::RowsAtCompileTime,
       ColsAtCompileTime = MatrixType::ColsAtCompileTime,
@@ -85,13 +87,13 @@ template<typename _MatrixType> class SelfAdjointEigenSolver
       MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
     };
     
-    /** \brief Scalar type for matrices of type \p _MatrixType. */
+    /** \brief Scalar type for matrices of type \p MatrixType_. */
     typedef typename MatrixType::Scalar Scalar;
     typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3
     
     typedef Matrix<Scalar,Size,Size,ColMajor,MaxColsAtCompileTime,MaxColsAtCompileTime> EigenvectorsType;
 
-    /** \brief Real scalar type for \p _MatrixType.
+    /** \brief Real scalar type for \p MatrixType_.
       *
       * This is just \c Scalar if #Scalar is real (e.g., \c float or
       * \c double), and the type of the real part of \c Scalar if #Scalar is
@@ -104,7 +106,7 @@ template<typename _MatrixType> class SelfAdjointEigenSolver
     /** \brief Type for vector of eigenvalues as returned by eigenvalues().
       *
       * This is a column vector with entries of type #RealScalar.
-      * The length of the vector is the size of \p _MatrixType.
+      * The length of the vector is the size of \p MatrixType_.
       */
     typedef typename internal::plain_col_type<MatrixType, RealScalar>::type RealVectorType;
     typedef Tridiagonalization<MatrixType> TridiagonalizationType;
@@ -114,7 +116,7 @@ template<typename _MatrixType> class SelfAdjointEigenSolver
       *
       * The default constructor is useful in cases in which the user intends to
       * perform decompositions via compute(). This constructor
-      * can only be used if \p _MatrixType is a fixed-size matrix; use
+      * can only be used if \p MatrixType_ is a fixed-size matrix; use
       * SelfAdjointEigenSolver(Index) for dynamic-size matrices.
       *
       * Example: \include SelfAdjointEigenSolver_SelfAdjointEigenSolver.cpp
@@ -372,12 +374,8 @@ template<typename _MatrixType> class SelfAdjointEigenSolver
     static const int m_maxIterations = 30;
 
   protected:
-    static EIGEN_DEVICE_FUNC
-    void check_template_parameters()
-    {
-      EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
-    }
-    
+    EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
+
     EigenvectorsType m_eivec;
     RealVectorType m_eivalues;
     typename TridiagonalizationType::SubDiagonalType m_subdiag;
@@ -419,10 +417,8 @@ EIGEN_DEVICE_FUNC
 SelfAdjointEigenSolver<MatrixType>& SelfAdjointEigenSolver<MatrixType>
 ::compute(const EigenBase<InputType>& a_matrix, int options)
 {
-  check_template_parameters();
-  
   const InputType &matrix(a_matrix.derived());
-  
+
   EIGEN_USING_STD(abs);
   eigen_assert(matrix.cols() == matrix.rows());
   eigen_assert((options&~(EigVecMask|GenEigMask))==0
@@ -451,7 +447,7 @@ SelfAdjointEigenSolver<MatrixType>& SelfAdjointEigenSolver<MatrixType>
   // map the matrix coefficients to [-1:1] to avoid over- and underflow.
   mat = matrix.template triangularView<Lower>();
   RealScalar scale = mat.cwiseAbs().maxCoeff();
-  if(scale==RealScalar(0)) scale = RealScalar(1);
+  if(numext::is_exactly_zero(scale)) scale = RealScalar(1);
   mat.template triangularView<Lower>() /= scale;
   m_subdiag.resize(n-1);
   m_hcoeffs.resize(n-1);
@@ -530,7 +526,7 @@ ComputationInfo computeFromTridiagonal_impl(DiagType& diag, SubDiagType& subdiag
     }
 
     // find the largest unreduced block at the end of the matrix.
-    while (end>0 && subdiag[end-1]==RealScalar(0))
+    while (end>0 && numext::is_exactly_zero(subdiag[end - 1]))
     {
       end--;
     }
@@ -542,7 +538,7 @@ ComputationInfo computeFromTridiagonal_impl(DiagType& diag, SubDiagType& subdiag
     if(iter > maxIterations * n) break;
 
     start = end - 1;
-    while (start>0 && subdiag[start-1]!=0)
+    while (start>0 && !numext::is_exactly_zero(subdiag[start - 1]))
       start--;
 
     internal::tridiagonal_qr_step<MatrixType::Flags&RowMajorBit ? RowMajor : ColMajor>(diag.data(), subdiag.data(), start, end, computeEigenvectors ? eivec.data() : (Scalar*)0, n);
@@ -847,12 +843,12 @@ static void tridiagonal_qr_step(RealScalar* diag, RealScalar* subdiag, Index sta
   //   RealScalar mu = diag[end] - e2 / (td + (td>0 ? 1 : -1) * sqrt(td*td + e2));
   // This explain the following, somewhat more complicated, version:
   RealScalar mu = diag[end];
-  if(td==RealScalar(0)) {
+  if(numext::is_exactly_zero(td)) {
     mu -= numext::abs(e);
-  } else if (e != RealScalar(0)) {
+  } else if (!numext::is_exactly_zero(e)) {
     const RealScalar e2 = numext::abs2(e);
     const RealScalar h = numext::hypot(td,e);
-    if(e2 == RealScalar(0)) {
+    if(numext::is_exactly_zero(e2)) {
       mu -= e / ((td + (td>RealScalar(0) ? h : -h)) / e);
     } else {
       mu -= e2 / (td + (td>RealScalar(0) ? h : -h)); 
@@ -863,7 +859,7 @@ static void tridiagonal_qr_step(RealScalar* diag, RealScalar* subdiag, Index sta
   RealScalar z = subdiag[start];
   // If z ever becomes zero, the Givens rotation will be the identity and
   // z will stay zero for all future iterations.
-  for (Index k = start; k < end && z != RealScalar(0); ++k)
+  for (Index k = start; k < end && !numext::is_exactly_zero(z); ++k)
   {
     JacobiRotation<RealScalar> rot;
     rot.makeGivens(x, z);
diff --git a/libs/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h b/libs/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h
index b0c947d..b24de67 100644
--- a/libs/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h
+++ b/libs/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h
@@ -33,6 +33,8 @@
 #ifndef EIGEN_SAEIGENSOLVER_LAPACKE_H
 #define EIGEN_SAEIGENSOLVER_LAPACKE_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 /** \internal Specialization for the data types supported by LAPACKe */
diff --git a/libs/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h b/libs/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h
index 674c92a..9b002fe 100644
--- a/libs/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h
+++ b/libs/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h
@@ -11,6 +11,8 @@
 #ifndef EIGEN_TRIDIAGONALIZATION_H
 #define EIGEN_TRIDIAGONALIZATION_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
@@ -36,7 +38,7 @@ void tridiagonalization_inplace(MatrixType& matA, CoeffVectorType& hCoeffs);
   *
   * \brief Tridiagonal decomposition of a selfadjoint matrix
   *
-  * \tparam _MatrixType the type of the matrix of which we are computing the
+  * \tparam MatrixType_ the type of the matrix of which we are computing the
   * tridiagonal decomposition; this is expected to be an instantiation of the
   * Matrix class template.
   *
@@ -61,12 +63,12 @@ void tridiagonalization_inplace(MatrixType& matA, CoeffVectorType& hCoeffs);
   *
   * \sa class HessenbergDecomposition, class SelfAdjointEigenSolver
   */
-template<typename _MatrixType> class Tridiagonalization
+template<typename MatrixType_> class Tridiagonalization
 {
   public:
 
-    /** \brief Synonym for the template parameter \p _MatrixType. */
-    typedef _MatrixType MatrixType;
+    /** \brief Synonym for the template parameter \p MatrixType_. */
+    typedef MatrixType_ MatrixType;
 
     typedef typename MatrixType::Scalar Scalar;
     typedef typename NumTraits<Scalar>::Real RealScalar;
@@ -83,21 +85,21 @@ template<typename _MatrixType> class Tridiagonalization
     typedef Matrix<Scalar, SizeMinusOne, 1, Options & ~RowMajor, MaxSizeMinusOne, 1> CoeffVectorType;
     typedef typename internal::plain_col_type<MatrixType, RealScalar>::type DiagonalType;
     typedef Matrix<RealScalar, SizeMinusOne, 1, Options & ~RowMajor, MaxSizeMinusOne, 1> SubDiagonalType;
-    typedef typename internal::remove_all<typename MatrixType::RealReturnType>::type MatrixTypeRealView;
+    typedef internal::remove_all_t<typename MatrixType::RealReturnType> MatrixTypeRealView;
     typedef internal::TridiagonalizationMatrixTReturnType<MatrixTypeRealView> MatrixTReturnType;
 
-    typedef typename internal::conditional<NumTraits<Scalar>::IsComplex,
-              typename internal::add_const_on_value_type<typename Diagonal<const MatrixType>::RealReturnType>::type,
+    typedef std::conditional_t<NumTraits<Scalar>::IsComplex,
+              internal::add_const_on_value_type_t<typename Diagonal<const MatrixType>::RealReturnType>,
               const Diagonal<const MatrixType>
-            >::type DiagonalReturnType;
+            > DiagonalReturnType;
 
-    typedef typename internal::conditional<NumTraits<Scalar>::IsComplex,
-              typename internal::add_const_on_value_type<typename Diagonal<const MatrixType, -1>::RealReturnType>::type,
+    typedef std::conditional_t<NumTraits<Scalar>::IsComplex,
+              internal::add_const_on_value_type_t<typename Diagonal<const MatrixType, -1>::RealReturnType>,
               const Diagonal<const MatrixType, -1>
-            >::type SubDiagonalReturnType;
+            > SubDiagonalReturnType;
 
     /** \brief Return type of matrixQ() */
-    typedef HouseholderSequence<MatrixType,typename internal::remove_all<typename CoeffVectorType::ConjugateReturnType>::type> HouseholderSequenceType;
+    typedef HouseholderSequence<MatrixType,internal::remove_all_t<typename CoeffVectorType::ConjugateReturnType>> HouseholderSequenceType;
 
     /** \brief Default constructor.
       *
@@ -440,9 +442,8 @@ void tridiagonalization_inplace(MatrixType& mat, DiagonalType& diag, SubDiagonal
 template<typename MatrixType, int Size, bool IsComplex>
 struct tridiagonalization_inplace_selector
 {
-  typedef typename Tridiagonalization<MatrixType>::CoeffVectorType CoeffVectorType;
   typedef typename Tridiagonalization<MatrixType>::HouseholderSequenceType HouseholderSequenceType;
-  template<typename DiagonalType, typename SubDiagonalType>
+  template<typename DiagonalType, typename SubDiagonalType, typename CoeffVectorType>
   static EIGEN_DEVICE_FUNC
       void run(MatrixType& mat, DiagonalType& diag, SubDiagonalType& subdiag, CoeffVectorType& hCoeffs, bool extractQ)
   {
diff --git a/libs/eigen/Eigen/src/Geometry/AlignedBox.h b/libs/eigen/Eigen/src/Geometry/AlignedBox.h
index 55a9d0a..a824817 100644
--- a/libs/eigen/Eigen/src/Geometry/AlignedBox.h
+++ b/libs/eigen/Eigen/src/Geometry/AlignedBox.h
@@ -46,6 +46,8 @@
 #ifndef EIGEN_ALIGNEDBOX_H
 #define EIGEN_ALIGNEDBOX_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /** \geometry_module \ingroup Geometry_Module
@@ -55,20 +57,20 @@ namespace Eigen {
   *
   * \brief An axis aligned box
   *
-  * \tparam _Scalar the type of the scalar coefficients
-  * \tparam _AmbientDim the dimension of the ambient space, can be a compile time value or Dynamic.
+  * \tparam Scalar_ the type of the scalar coefficients
+  * \tparam AmbientDim_ the dimension of the ambient space, can be a compile time value or Dynamic.
   *
   * This class represents an axis aligned box as a pair of the minimal and maximal corners.
   * \warning The result of most methods is undefined when applied to an empty box. You can check for empty boxes using isEmpty().
   * \sa alignedboxtypedefs
   */
-template <typename _Scalar, int _AmbientDim>
+template <typename Scalar_, int AmbientDim_>
 class AlignedBox
 {
 public:
-EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
-  enum { AmbientDimAtCompileTime = _AmbientDim };
-  typedef _Scalar                                   Scalar;
+EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar_,AmbientDim_)
+  enum { AmbientDimAtCompileTime = AmbientDim_ };
+  typedef Scalar_                                   Scalar;
   typedef NumTraits<Scalar>                         ScalarTraits;
   typedef Eigen::Index                              Index; ///< \deprecated since Eigen 3.3
   typedef typename ScalarTraits::Real               RealScalar;
@@ -181,7 +183,7 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
     */
   EIGEN_DEVICE_FUNC inline VectorType corner(CornerType corner) const
   {
-    EIGEN_STATIC_ASSERT(_AmbientDim <= 3, THIS_METHOD_IS_ONLY_FOR_VECTORS_OF_A_SPECIFIC_SIZE);
+    EIGEN_STATIC_ASSERT(AmbientDim_ <= 3, THIS_METHOD_IS_ONLY_FOR_VECTORS_OF_A_SPECIFIC_SIZE);
 
     VectorType res;
 
diff --git a/libs/eigen/Eigen/src/Geometry/AngleAxis.h b/libs/eigen/Eigen/src/Geometry/AngleAxis.h
index 78328b6..c23a908 100644
--- a/libs/eigen/Eigen/src/Geometry/AngleAxis.h
+++ b/libs/eigen/Eigen/src/Geometry/AngleAxis.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_ANGLEAXIS_H
 #define EIGEN_ANGLEAXIS_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 /** \geometry_module \ingroup Geometry_Module
@@ -18,7 +20,7 @@ namespace Eigen {
   *
   * \brief Represents a 3D rotation as a rotation angle around an arbitrary 3D axis
   *
-  * \param _Scalar the scalar type, i.e., the type of the coefficients.
+  * \param Scalar_ the scalar type, i.e., the type of the coefficients.
   *
   * \warning When setting up an AngleAxis object, the axis vector \b must \b be \b normalized.
   *
@@ -39,16 +41,16 @@ namespace Eigen {
   */
 
 namespace internal {
-template<typename _Scalar> struct traits<AngleAxis<_Scalar> >
+template<typename Scalar_> struct traits<AngleAxis<Scalar_> >
 {
-  typedef _Scalar Scalar;
+  typedef Scalar_ Scalar;
 };
 }
 
-template<typename _Scalar>
-class AngleAxis : public RotationBase<AngleAxis<_Scalar>,3>
+template<typename Scalar_>
+class AngleAxis : public RotationBase<AngleAxis<Scalar_>,3>
 {
-  typedef RotationBase<AngleAxis<_Scalar>,3> Base;
+  typedef RotationBase<AngleAxis<Scalar_>,3> Base;
 
 public:
 
@@ -56,7 +58,7 @@ public:
 
   enum { Dim = 3 };
   /** the scalar type of the coefficients */
-  typedef _Scalar Scalar;
+  typedef Scalar_ Scalar;
   typedef Matrix<Scalar,3,3> Matrix3;
   typedef Matrix<Scalar,3,1> Vector3;
   typedef Quaternion<Scalar> QuaternionType;
diff --git a/libs/eigen/Eigen/src/Geometry/EulerAngles.h b/libs/eigen/Eigen/src/Geometry/EulerAngles.h
index 19b734c..2b99960 100644
--- a/libs/eigen/Eigen/src/Geometry/EulerAngles.h
+++ b/libs/eigen/Eigen/src/Geometry/EulerAngles.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_EULERANGLES_H
 #define EIGEN_EULERANGLES_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 /** \geometry_module \ingroup Geometry_Module
diff --git a/libs/eigen/Eigen/src/Geometry/Homogeneous.h b/libs/eigen/Eigen/src/Geometry/Homogeneous.h
index 94083ac..538cf83 100644
--- a/libs/eigen/Eigen/src/Geometry/Homogeneous.h
+++ b/libs/eigen/Eigen/src/Geometry/Homogeneous.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_HOMOGENEOUS_H
 #define EIGEN_HOMOGENEOUS_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /** \geometry_module \ingroup Geometry_Module
@@ -35,7 +37,7 @@ struct traits<Homogeneous<MatrixType,Direction> >
 {
   typedef typename traits<MatrixType>::StorageKind StorageKind;
   typedef typename ref_selector<MatrixType>::type MatrixTypeNested;
-  typedef typename remove_reference<MatrixTypeNested>::type _MatrixTypeNested;
+  typedef std::remove_reference_t<MatrixTypeNested> MatrixTypeNested_;
   enum {
     RowsPlusOne = (MatrixType::RowsAtCompileTime != Dynamic) ?
                   int(MatrixType::RowsAtCompileTime) + 1 : Dynamic,
@@ -45,7 +47,7 @@ struct traits<Homogeneous<MatrixType,Direction> >
     ColsAtCompileTime = Direction==Horizontal ? ColsPlusOne : MatrixType::ColsAtCompileTime,
     MaxRowsAtCompileTime = RowsAtCompileTime,
     MaxColsAtCompileTime = ColsAtCompileTime,
-    TmpFlags = _MatrixTypeNested::Flags & HereditaryBits,
+    TmpFlags = MatrixTypeNested_::Flags & HereditaryBits,
     Flags = ColsAtCompileTime==1 ? (TmpFlags & ~RowMajorBit)
           : RowsAtCompileTime==1 ? (TmpFlags | RowMajorBit)
           : TmpFlags
@@ -57,13 +59,13 @@ template<typename MatrixType,typename Rhs> struct homogeneous_right_product_impl
 
 } // end namespace internal
 
-template<typename MatrixType,int _Direction> class Homogeneous
-  : public MatrixBase<Homogeneous<MatrixType,_Direction> >, internal::no_assignment_operator
+template<typename MatrixType,int Direction_> class Homogeneous
+  : public MatrixBase<Homogeneous<MatrixType,Direction_> >, internal::no_assignment_operator
 {
   public:
 
     typedef MatrixType NestedExpression;
-    enum { Direction = _Direction };
+    enum { Direction = Direction_ };
 
     typedef MatrixBase<Homogeneous> Base;
     EIGEN_DENSE_PUBLIC_INTERFACE(Homogeneous)
@@ -225,7 +227,7 @@ template<typename Scalar, int Dim, int Mode,int Options>
 struct take_matrix_for_product<Transform<Scalar, Dim, Mode, Options> >
 {
   typedef Transform<Scalar, Dim, Mode, Options> TransformType;
-  typedef typename internal::add_const<typename TransformType::ConstAffinePart>::type type;
+  typedef std::add_const_t<typename TransformType::ConstAffinePart> type;
   EIGEN_DEVICE_FUNC static type run (const TransformType& x) { return x.affine(); }
 };
 
@@ -241,8 +243,8 @@ template<typename MatrixType,typename Lhs>
 struct traits<homogeneous_left_product_impl<Homogeneous<MatrixType,Vertical>,Lhs> >
 {
   typedef typename take_matrix_for_product<Lhs>::type LhsMatrixType;
-  typedef typename remove_all<MatrixType>::type MatrixTypeCleaned;
-  typedef typename remove_all<LhsMatrixType>::type LhsMatrixTypeCleaned;
+  typedef remove_all_t<MatrixType> MatrixTypeCleaned;
+  typedef remove_all_t<LhsMatrixType> LhsMatrixTypeCleaned;
   typedef typename make_proper_matrix_type<
                  typename traits<MatrixTypeCleaned>::Scalar,
                  LhsMatrixTypeCleaned::RowsAtCompileTime,
@@ -257,8 +259,8 @@ struct homogeneous_left_product_impl<Homogeneous<MatrixType,Vertical>,Lhs>
   : public ReturnByValue<homogeneous_left_product_impl<Homogeneous<MatrixType,Vertical>,Lhs> >
 {
   typedef typename traits<homogeneous_left_product_impl>::LhsMatrixType LhsMatrixType;
-  typedef typename remove_all<LhsMatrixType>::type LhsMatrixTypeCleaned;
-  typedef typename remove_all<typename LhsMatrixTypeCleaned::Nested>::type LhsMatrixTypeNested;
+  typedef remove_all_t<LhsMatrixType> LhsMatrixTypeCleaned;
+  typedef remove_all_t<typename LhsMatrixTypeCleaned::Nested> LhsMatrixTypeNested;
   EIGEN_DEVICE_FUNC homogeneous_left_product_impl(const Lhs& lhs, const MatrixType& rhs)
     : m_lhs(take_matrix_for_product<Lhs>::run(lhs)),
       m_rhs(rhs)
@@ -299,7 +301,7 @@ template<typename MatrixType,typename Rhs>
 struct homogeneous_right_product_impl<Homogeneous<MatrixType,Horizontal>,Rhs>
   : public ReturnByValue<homogeneous_right_product_impl<Homogeneous<MatrixType,Horizontal>,Rhs> >
 {
-  typedef typename remove_all<typename Rhs::Nested>::type RhsNested;
+  typedef remove_all_t<typename Rhs::Nested> RhsNested;
   EIGEN_DEVICE_FUNC homogeneous_right_product_impl(const MatrixType& lhs, const Rhs& rhs)
     : m_lhs(lhs), m_rhs(rhs)
   {}
@@ -343,7 +345,7 @@ struct unary_evaluator<Homogeneous<ArgType,Direction>, IndexBased>
   EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op)
     : Base(), m_temp(op)
   {
-    ::new (static_cast<Base*>(this)) Base(m_temp);
+    internal::construct_at<Base>(this, m_temp);
   }
 
 protected:
@@ -402,7 +404,7 @@ struct homogeneous_right_product_refactoring_helper
     Rows = Lhs::RowsAtCompileTime
   };
   typedef typename Rhs::template ConstNRowsBlockXpr<Dim>::Type          LinearBlockConst;
-  typedef typename remove_const<LinearBlockConst>::type                 LinearBlock;
+  typedef std::remove_const_t<LinearBlockConst>                 LinearBlock;
   typedef typename Rhs::ConstRowXpr                                     ConstantColumn;
   typedef Replicate<const ConstantColumn,Rows,1>                        ConstantBlock;
   typedef Product<Lhs,LinearBlock,LazyProduct>                          LinearProduct;
@@ -455,7 +457,7 @@ struct homogeneous_left_product_refactoring_helper
     Cols = Rhs::ColsAtCompileTime
   };
   typedef typename Lhs::template ConstNColsBlockXpr<Dim>::Type          LinearBlockConst;
-  typedef typename remove_const<LinearBlockConst>::type                 LinearBlock;
+  typedef std::remove_const_t<LinearBlockConst>                 LinearBlock;
   typedef typename Lhs::ConstColXpr                                     ConstantColumn;
   typedef Replicate<const ConstantColumn,1,Cols>                        ConstantBlock;
   typedef Product<LinearBlock,Rhs,LazyProduct>                          LinearProduct;
diff --git a/libs/eigen/Eigen/src/Geometry/Hyperplane.h b/libs/eigen/Eigen/src/Geometry/Hyperplane.h
index cebe035..ad6aae9 100644
--- a/libs/eigen/Eigen/src/Geometry/Hyperplane.h
+++ b/libs/eigen/Eigen/src/Geometry/Hyperplane.h
@@ -11,6 +11,8 @@
 #ifndef EIGEN_HYPERPLANE_H
 #define EIGEN_HYPERPLANE_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 /** \geometry_module \ingroup Geometry_Module
@@ -22,24 +24,24 @@ namespace Eigen {
   * A hyperplane is an affine subspace of dimension n-1 in a space of dimension n.
   * For example, a hyperplane in a plane is a line; a hyperplane in 3-space is a plane.
   *
-  * \tparam _Scalar the scalar type, i.e., the type of the coefficients
-  * \tparam _AmbientDim the dimension of the ambient space, can be a compile time value or Dynamic.
-  *             Notice that the dimension of the hyperplane is _AmbientDim-1.
+  * \tparam Scalar_ the scalar type, i.e., the type of the coefficients
+  * \tparam AmbientDim_ the dimension of the ambient space, can be a compile time value or Dynamic.
+  *             Notice that the dimension of the hyperplane is AmbientDim_-1.
   *
   * This class represents an hyperplane as the zero set of the implicit equation
   * \f$ n \cdot x + d = 0 \f$ where \f$ n \f$ is a unit normal vector of the plane (linear part)
   * and \f$ d \f$ is the distance (offset) to the origin.
   */
-template <typename _Scalar, int _AmbientDim, int _Options>
+template <typename Scalar_, int AmbientDim_, int Options_>
 class Hyperplane
 {
 public:
-  EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim==Dynamic ? Dynamic : _AmbientDim+1)
+  EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar_,AmbientDim_==Dynamic ? Dynamic : AmbientDim_+1)
   enum {
-    AmbientDimAtCompileTime = _AmbientDim,
-    Options = _Options
+    AmbientDimAtCompileTime = AmbientDim_,
+    Options = Options_
   };
-  typedef _Scalar Scalar;
+  typedef Scalar_ Scalar;
   typedef typename NumTraits<Scalar>::Real RealScalar;
   typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3
   typedef Matrix<Scalar,AmbientDimAtCompileTime,1> VectorType;
@@ -106,7 +108,7 @@ public:
     if(norm <= v0.norm() * v1.norm() * NumTraits<RealScalar>::epsilon())
     {
       Matrix<Scalar,2,3> m; m << v0.transpose(), v1.transpose();
-      JacobiSVD<Matrix<Scalar,2,3> > svd(m, ComputeFullV);
+      JacobiSVD<Matrix<Scalar,2,3>, ComputeFullV> svd(m);
       result.normal() = svd.matrixV().col(2);
     }
     else
diff --git a/libs/eigen/Eigen/src/Geometry/InternalHeaderCheck.h b/libs/eigen/Eigen/src/Geometry/InternalHeaderCheck.h
new file mode 100644
index 0000000..a1159a3
--- /dev/null
+++ b/libs/eigen/Eigen/src/Geometry/InternalHeaderCheck.h
@@ -0,0 +1,3 @@
+#ifndef EIGEN_GEOMETRY_MODULE_H
+#error "Please include Eigen/Geometry instead of including headers inside the src directory directly."
+#endif
diff --git a/libs/eigen/Eigen/src/Geometry/OrthoMethods.h b/libs/eigen/Eigen/src/Geometry/OrthoMethods.h
index 524aebe..fbf020d 100644
--- a/libs/eigen/Eigen/src/Geometry/OrthoMethods.h
+++ b/libs/eigen/Eigen/src/Geometry/OrthoMethods.h
@@ -11,41 +11,87 @@
 #ifndef EIGEN_ORTHOMETHODS_H
 #define EIGEN_ORTHOMETHODS_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
+namespace internal {
+
+// Vector3 version (default)
+template<typename Derived, typename OtherDerived, int Size>
+struct cross_impl
+{
+  typedef typename ScalarBinaryOpTraits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType Scalar;
+  typedef Matrix<Scalar,MatrixBase<Derived>::RowsAtCompileTime,MatrixBase<Derived>::ColsAtCompileTime> return_type;
+
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  return_type run(const MatrixBase<Derived>& first, const MatrixBase<OtherDerived>& second)
+  {
+    EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Derived,3)
+    EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,3)
+
+    // Note that there is no need for an expression here since the compiler
+    // optimize such a small temporary very well (even within a complex expression)
+    typename internal::nested_eval<Derived,2>::type lhs(first.derived());
+    typename internal::nested_eval<OtherDerived,2>::type rhs(second.derived());
+    return return_type(
+      numext::conj(lhs.coeff(1) * rhs.coeff(2) - lhs.coeff(2) * rhs.coeff(1)),
+      numext::conj(lhs.coeff(2) * rhs.coeff(0) - lhs.coeff(0) * rhs.coeff(2)),
+      numext::conj(lhs.coeff(0) * rhs.coeff(1) - lhs.coeff(1) * rhs.coeff(0))
+    );
+  }
+};
+
+// Vector2 version
+template<typename Derived, typename OtherDerived>
+struct cross_impl<Derived, OtherDerived, 2>
+{
+  typedef typename ScalarBinaryOpTraits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType Scalar;
+  typedef Scalar return_type;
+
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  return_type run(const MatrixBase<Derived>& first, const MatrixBase<OtherDerived>& second)
+  {
+    EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Derived,2);
+    EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,2);
+    typename internal::nested_eval<Derived,2>::type lhs(first.derived());
+    typename internal::nested_eval<OtherDerived,2>::type rhs(second.derived());
+    return numext::conj(lhs.coeff(0) * rhs.coeff(1) - lhs.coeff(1) * rhs.coeff(0));
+  }
+};
+
+} // end namespace internal
+
 /** \geometry_module \ingroup Geometry_Module
   *
-  * \returns the cross product of \c *this and \a other
+  * \returns the cross product of \c *this and \a other. This is either a scalar for size-2 vectors or a size-3 vector for size-3 vectors.
   *
-  * Here is a very good explanation of cross-product: http://xkcd.com/199/
+  * This method is implemented for two different cases: between vectors of fixed size 2 and between vectors of fixed size 3.
   * 
-  * With complex numbers, the cross product is implemented as
-  * \f$ (\mathbf{a}+i\mathbf{b}) \times (\mathbf{c}+i\mathbf{d}) = (\mathbf{a} \times \mathbf{c} - \mathbf{b} \times \mathbf{d}) - i(\mathbf{a} \times \mathbf{d} - \mathbf{b} \times \mathbf{c})\f$
+  * For vectors of size 3, the output is simply the traditional cross product.
+  *
+  * For vectors of size 2, the output is a scalar.
+  * Given vectors \f$ v = \begin{bmatrix} v_1 & v_2 \end{bmatrix} \f$ and \f$ w = \begin{bmatrix} w_1 & w_2 \end{bmatrix} \f$,
+  * the result is simply \f$ v\times w = \overline{v_1 w_2 - v_2 w_1} = \text{conj}\left|\begin{smallmatrix} v_1 & w_1 \\ v_2 & w_2 \end{smallmatrix}\right| \f$;
+  * or, to put it differently, it is the third coordinate of the cross product of \f$ \begin{bmatrix} v_1 & v_2 & v_3 \end{bmatrix} \f$ and \f$ \begin{bmatrix} w_1 & w_2 & w_3 \end{bmatrix} \f$.
+  * For real-valued inputs, the result can be interpreted as the signed area of a parallelogram spanned by the two vectors.
+  * 
+  * \note With complex numbers, the cross product is implemented as
+  * \f$ (\mathbf{a}+i\mathbf{b}) \times (\mathbf{c}+i\mathbf{d}) = (\mathbf{a} \times \mathbf{c} - \mathbf{b} \times \mathbf{d}) - i(\mathbf{a} \times \mathbf{d} + \mathbf{b} \times \mathbf{c})\f$
   * 
   * \sa MatrixBase::cross3()
   */
 template<typename Derived>
 template<typename OtherDerived>
-#ifndef EIGEN_PARSED_BY_DOXYGEN
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-typename MatrixBase<Derived>::template cross_product_return_type<OtherDerived>::type
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+typename internal::cross_impl<Derived, OtherDerived>::return_type
 #else
-typename MatrixBase<Derived>::PlainObject
+inline std::conditional_t<SizeAtCompileTime==2, Scalar, PlainObject>
 #endif
 MatrixBase<Derived>::cross(const MatrixBase<OtherDerived>& other) const
 {
-  EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Derived,3)
-  EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,3)
-
-  // Note that there is no need for an expression here since the compiler
-  // optimize such a small temporary very well (even within a complex expression)
-  typename internal::nested_eval<Derived,2>::type lhs(derived());
-  typename internal::nested_eval<OtherDerived,2>::type rhs(other.derived());
-  return typename cross_product_return_type<OtherDerived>::type(
-    numext::conj(lhs.coeff(1) * rhs.coeff(2) - lhs.coeff(2) * rhs.coeff(1)),
-    numext::conj(lhs.coeff(2) * rhs.coeff(0) - lhs.coeff(0) * rhs.coeff(2)),
-    numext::conj(lhs.coeff(0) * rhs.coeff(1) - lhs.coeff(1) * rhs.coeff(0))
-  );
+  return internal::cross_impl<Derived, OtherDerived>::run(*this, other);
 }
 
 namespace internal {
@@ -91,8 +137,8 @@ MatrixBase<Derived>::cross3(const MatrixBase<OtherDerived>& other) const
   OtherDerivedNested rhs(other.derived());
 
   return internal::cross3_impl<Architecture::Target,
-                        typename internal::remove_all<DerivedNested>::type,
-                        typename internal::remove_all<OtherDerivedNested>::type>::run(lhs,rhs);
+                        internal::remove_all_t<DerivedNested>,
+                        internal::remove_all_t<OtherDerivedNested>>::run(lhs,rhs);
 }
 
 /** \geometry_module \ingroup Geometry_Module
diff --git a/libs/eigen/Eigen/src/Geometry/ParametrizedLine.h b/libs/eigen/Eigen/src/Geometry/ParametrizedLine.h
index 584f500..7576922 100644
--- a/libs/eigen/Eigen/src/Geometry/ParametrizedLine.h
+++ b/libs/eigen/Eigen/src/Geometry/ParametrizedLine.h
@@ -11,6 +11,8 @@
 #ifndef EIGEN_PARAMETRIZEDLINE_H
 #define EIGEN_PARAMETRIZEDLINE_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 /** \geometry_module \ingroup Geometry_Module
@@ -23,19 +25,19 @@ namespace Eigen {
   * direction vector \f$ \mathbf{d} \f$ such that the line corresponds to
   * the set \f$ l(t) = \mathbf{o} + t \mathbf{d} \f$, \f$ t \in \mathbf{R} \f$.
   *
-  * \tparam _Scalar the scalar type, i.e., the type of the coefficients
-  * \tparam _AmbientDim the dimension of the ambient space, can be a compile time value or Dynamic.
+  * \tparam Scalar_ the scalar type, i.e., the type of the coefficients
+  * \tparam AmbientDim_ the dimension of the ambient space, can be a compile time value or Dynamic.
   */
-template <typename _Scalar, int _AmbientDim, int _Options>
+template <typename Scalar_, int AmbientDim_, int Options_>
 class ParametrizedLine
 {
 public:
-  EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
+  EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar_,AmbientDim_)
   enum {
-    AmbientDimAtCompileTime = _AmbientDim,
-    Options = _Options
+    AmbientDimAtCompileTime = AmbientDim_,
+    Options = Options_
   };
-  typedef _Scalar Scalar;
+  typedef Scalar_ Scalar;
   typedef typename NumTraits<Scalar>::Real RealScalar;
   typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3
   typedef Matrix<Scalar,AmbientDimAtCompileTime,1,Options> VectorType;
@@ -59,7 +61,7 @@ public:
     : m_origin(origin), m_direction(direction) {}
 
   template <int OtherOptions>
-  EIGEN_DEVICE_FUNC explicit ParametrizedLine(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane);
+  EIGEN_DEVICE_FUNC explicit ParametrizedLine(const Hyperplane<Scalar_, AmbientDim_, OtherOptions>& hyperplane);
 
   /** Constructs a parametrized line going from \a p0 to \a p1. */
   EIGEN_DEVICE_FUNC static inline ParametrizedLine Through(const VectorType& p0, const VectorType& p1)
@@ -96,13 +98,13 @@ public:
   EIGEN_DEVICE_FUNC VectorType pointAt(const Scalar& t) const;
   
   template <int OtherOptions>
-  EIGEN_DEVICE_FUNC Scalar intersectionParameter(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const;
- 
+  EIGEN_DEVICE_FUNC Scalar intersectionParameter(const Hyperplane<Scalar_, AmbientDim_, OtherOptions>& hyperplane) const;
+
   template <int OtherOptions>
-  EIGEN_DEVICE_FUNC Scalar intersection(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const;
-  
+  EIGEN_DEVICE_FUNC Scalar intersection(const Hyperplane<Scalar_, AmbientDim_, OtherOptions>& hyperplane) const;
+
   template <int OtherOptions>
-  EIGEN_DEVICE_FUNC VectorType intersectionPoint(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const;
+  EIGEN_DEVICE_FUNC VectorType intersectionPoint(const Hyperplane<Scalar_, AmbientDim_, OtherOptions>& hyperplane) const;
 
   /** Applies the transformation matrix \a mat to \c *this and returns a reference to \c *this.
     *
@@ -178,9 +180,9 @@ protected:
   *
   * \warning the ambient space must have dimension 2 such that the hyperplane actually describes a line
   */
-template <typename _Scalar, int _AmbientDim, int _Options>
+template <typename Scalar_, int AmbientDim_, int Options_>
 template <int OtherOptions>
-EIGEN_DEVICE_FUNC inline ParametrizedLine<_Scalar, _AmbientDim,_Options>::ParametrizedLine(const Hyperplane<_Scalar, _AmbientDim,OtherOptions>& hyperplane)
+EIGEN_DEVICE_FUNC inline ParametrizedLine<Scalar_, AmbientDim_,Options_>::ParametrizedLine(const Hyperplane<Scalar_, AmbientDim_,OtherOptions>& hyperplane)
 {
   EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(VectorType, 2)
   direction() = hyperplane.normal().unitOrthogonal();
@@ -189,18 +191,18 @@ EIGEN_DEVICE_FUNC inline ParametrizedLine<_Scalar, _AmbientDim,_Options>::Parame
 
 /** \returns the point at \a t along this line
   */
-template <typename _Scalar, int _AmbientDim, int _Options>
-EIGEN_DEVICE_FUNC inline typename ParametrizedLine<_Scalar, _AmbientDim,_Options>::VectorType
-ParametrizedLine<_Scalar, _AmbientDim,_Options>::pointAt(const _Scalar& t) const
+template <typename Scalar_, int AmbientDim_, int Options_>
+EIGEN_DEVICE_FUNC inline typename ParametrizedLine<Scalar_, AmbientDim_,Options_>::VectorType
+ParametrizedLine<Scalar_, AmbientDim_,Options_>::pointAt(const Scalar_& t) const
 {
   return origin() + (direction()*t); 
 }
 
 /** \returns the parameter value of the intersection between \c *this and the given \a hyperplane
   */
-template <typename _Scalar, int _AmbientDim, int _Options>
+template <typename Scalar_, int AmbientDim_, int Options_>
 template <int OtherOptions>
-EIGEN_DEVICE_FUNC inline _Scalar ParametrizedLine<_Scalar, _AmbientDim,_Options>::intersectionParameter(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const
+EIGEN_DEVICE_FUNC inline Scalar_ ParametrizedLine<Scalar_, AmbientDim_,Options_>::intersectionParameter(const Hyperplane<Scalar_, AmbientDim_, OtherOptions>& hyperplane) const
 {
   return -(hyperplane.offset()+hyperplane.normal().dot(origin()))
           / hyperplane.normal().dot(direction());
@@ -210,19 +212,19 @@ EIGEN_DEVICE_FUNC inline _Scalar ParametrizedLine<_Scalar, _AmbientDim,_Options>
 /** \deprecated use intersectionParameter()
   * \returns the parameter value of the intersection between \c *this and the given \a hyperplane
   */
-template <typename _Scalar, int _AmbientDim, int _Options>
+template <typename Scalar_, int AmbientDim_, int Options_>
 template <int OtherOptions>
-EIGEN_DEVICE_FUNC inline _Scalar ParametrizedLine<_Scalar, _AmbientDim,_Options>::intersection(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const
+EIGEN_DEVICE_FUNC inline Scalar_ ParametrizedLine<Scalar_, AmbientDim_,Options_>::intersection(const Hyperplane<Scalar_, AmbientDim_, OtherOptions>& hyperplane) const
 {
   return intersectionParameter(hyperplane);
 }
 
 /** \returns the point of the intersection between \c *this and the given hyperplane
   */
-template <typename _Scalar, int _AmbientDim, int _Options>
+template <typename Scalar_, int AmbientDim_, int Options_>
 template <int OtherOptions>
-EIGEN_DEVICE_FUNC inline typename ParametrizedLine<_Scalar, _AmbientDim,_Options>::VectorType
-ParametrizedLine<_Scalar, _AmbientDim,_Options>::intersectionPoint(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const
+EIGEN_DEVICE_FUNC inline typename ParametrizedLine<Scalar_, AmbientDim_,Options_>::VectorType
+ParametrizedLine<Scalar_, AmbientDim_,Options_>::intersectionPoint(const Hyperplane<Scalar_, AmbientDim_, OtherOptions>& hyperplane) const
 {
   return pointAt(intersectionParameter(hyperplane));
 }
diff --git a/libs/eigen/Eigen/src/Geometry/Quaternion.h b/libs/eigen/Eigen/src/Geometry/Quaternion.h
index 3259e59..0aca4c4 100644
--- a/libs/eigen/Eigen/src/Geometry/Quaternion.h
+++ b/libs/eigen/Eigen/src/Geometry/Quaternion.h
@@ -10,6 +10,8 @@
 
 #ifndef EIGEN_QUATERNION_H
 #define EIGEN_QUATERNION_H
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 
@@ -44,8 +46,8 @@ class QuaternionBase : public RotationBase<Derived, 3>
   typedef typename NumTraits<Scalar>::Real RealScalar;
   typedef typename internal::traits<Derived>::Coefficients Coefficients;
   typedef typename Coefficients::CoeffReturnType CoeffReturnType;
-  typedef typename internal::conditional<bool(internal::traits<Derived>::Flags&LvalueBit),
-                                        Scalar&, CoeffReturnType>::type NonConstCoeffReturnType;
+  typedef std::conditional_t<bool(internal::traits<Derived>::Flags&LvalueBit),
+                                  Scalar&, CoeffReturnType> NonConstCoeffReturnType;
 
 
   enum {
@@ -198,14 +200,14 @@ class QuaternionBase : public RotationBase<Derived, 3>
 
   template<typename NewScalarType>
   EIGEN_DEVICE_FUNC inline
-  typename internal::enable_if<internal::is_same<Scalar,NewScalarType>::value,const Derived&>::type cast() const
+  std::enable_if_t<internal::is_same<Scalar,NewScalarType>::value,const Derived&> cast() const
   {
     return derived();
   }
 
   template<typename NewScalarType>
   EIGEN_DEVICE_FUNC inline
-  typename internal::enable_if<!internal::is_same<Scalar,NewScalarType>::value,Quaternion<NewScalarType> >::type cast() const
+  std::enable_if_t<!internal::is_same<Scalar,NewScalarType>::value,Quaternion<NewScalarType> > cast() const
   {
     return Quaternion<NewScalarType>(coeffs().template cast<NewScalarType>());
   }
@@ -236,8 +238,8 @@ protected:
   *
   * \brief The quaternion class used to represent 3D orientations and rotations
   *
-  * \tparam _Scalar the scalar type, i.e., the type of the coefficients
-  * \tparam _Options controls the memory alignment of the coefficients. Can be \# AutoAlign or \# DontAlign. Default is AutoAlign.
+  * \tparam Scalar_ the scalar type, i.e., the type of the coefficients
+  * \tparam Options_ controls the memory alignment of the coefficients. Can be \# AutoAlign or \# DontAlign. Default is AutoAlign.
   *
   * This class represents a quaternion \f$ w+xi+yj+zk \f$ that is a convenient representation of
   * orientations and rotations of objects in three dimensions. Compared to other representations
@@ -256,12 +258,12 @@ protected:
   */
 
 namespace internal {
-template<typename _Scalar,int _Options>
-struct traits<Quaternion<_Scalar,_Options> >
+template<typename Scalar_,int Options_>
+struct traits<Quaternion<Scalar_,Options_> >
 {
-  typedef Quaternion<_Scalar,_Options> PlainObject;
-  typedef _Scalar Scalar;
-  typedef Matrix<_Scalar,4,1,_Options> Coefficients;
+  typedef Quaternion<Scalar_,Options_> PlainObject;
+  typedef Scalar_ Scalar;
+  typedef Matrix<Scalar_,4,1,Options_> Coefficients;
   enum{
     Alignment = internal::traits<Coefficients>::Alignment,
     Flags = LvalueBit
@@ -269,14 +271,14 @@ struct traits<Quaternion<_Scalar,_Options> >
 };
 }
 
-template<typename _Scalar, int _Options>
-class Quaternion : public QuaternionBase<Quaternion<_Scalar,_Options> >
+template<typename Scalar_, int Options_>
+class Quaternion : public QuaternionBase<Quaternion<Scalar_,Options_> >
 {
 public:
-  typedef QuaternionBase<Quaternion<_Scalar,_Options> > Base;
+  typedef QuaternionBase<Quaternion<Scalar_,Options_> > Base;
   enum { NeedsAlignment = internal::traits<Quaternion>::Alignment>0 };
 
-  typedef _Scalar Scalar;
+  typedef Scalar_ Scalar;
 
   EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Quaternion)
   using Base::operator*=;
@@ -307,7 +309,7 @@ public:
 
   /** Constructs and initializes a quaternion from either:
     *  - a rotation matrix expression,
-    *  - a 4D vector expression representing quaternion coefficients.
+    *  - a 4D vector expression representing quaternion coefficients in the order [\c x, \c y, \c z, \c w].
     */
   template<typename Derived>
   EIGEN_DEVICE_FUNC explicit inline Quaternion(const MatrixBase<Derived>& other) { *this = other; }
@@ -317,7 +319,6 @@ public:
   EIGEN_DEVICE_FUNC explicit inline Quaternion(const Quaternion<OtherScalar, OtherOptions>& other)
   { m_coeffs = other.coeffs().template cast<Scalar>(); }
 
-#if EIGEN_HAS_RVALUE_REFERENCES
   // We define a copy constructor, which means we don't get an implicit move constructor or assignment operator.
   /** Default move constructor */
   EIGEN_DEVICE_FUNC inline Quaternion(Quaternion&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_constructible<Scalar>::value)
@@ -330,7 +331,6 @@ public:
     m_coeffs = std::move(other.coeffs());
     return *this;
   }
-#endif
 
   EIGEN_DEVICE_FUNC static Quaternion UnitRandom();
 
@@ -341,20 +341,17 @@ public:
   EIGEN_DEVICE_FUNC inline const Coefficients& coeffs() const { return m_coeffs;}
 
   EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(bool(NeedsAlignment))
-  
+
 #ifdef EIGEN_QUATERNION_PLUGIN
 # include EIGEN_QUATERNION_PLUGIN
 #endif
 
 protected:
   Coefficients m_coeffs;
-  
+
 #ifndef EIGEN_PARSED_BY_DOXYGEN
-    static EIGEN_STRONG_INLINE void _check_template_params()
-    {
-      EIGEN_STATIC_ASSERT( (_Options & DontAlign) == _Options,
-        INVALID_MATRIX_TEMPLATE_PARAMETERS)
-    }
+  EIGEN_STATIC_ASSERT( (Options_ & DontAlign) == Options_,
+                       INVALID_MATRIX_TEMPLATE_PARAMETERS)
 #endif
 };
 
@@ -370,19 +367,19 @@ typedef Quaternion<double> Quaterniond;
 ***************************************************************************/
 
 namespace internal {
-  template<typename _Scalar, int _Options>
-  struct traits<Map<Quaternion<_Scalar>, _Options> > : traits<Quaternion<_Scalar, (int(_Options)&Aligned)==Aligned ? AutoAlign : DontAlign> >
+  template<typename Scalar_, int Options_>
+  struct traits<Map<Quaternion<Scalar_>, Options_> > : traits<Quaternion<Scalar_, (int(Options_)&Aligned)==Aligned ? AutoAlign : DontAlign> >
   {
-    typedef Map<Matrix<_Scalar,4,1>, _Options> Coefficients;
+    typedef Map<Matrix<Scalar_,4,1>, Options_> Coefficients;
   };
 }
 
 namespace internal {
-  template<typename _Scalar, int _Options>
-  struct traits<Map<const Quaternion<_Scalar>, _Options> > : traits<Quaternion<_Scalar, (int(_Options)&Aligned)==Aligned ? AutoAlign : DontAlign> >
+  template<typename Scalar_, int Options_>
+  struct traits<Map<const Quaternion<Scalar_>, Options_> > : traits<Quaternion<Scalar_, (int(Options_)&Aligned)==Aligned ? AutoAlign : DontAlign> >
   {
-    typedef Map<const Matrix<_Scalar,4,1>, _Options> Coefficients;
-    typedef traits<Quaternion<_Scalar, (int(_Options)&Aligned)==Aligned ? AutoAlign : DontAlign> > TraitsBase;
+    typedef Map<const Matrix<Scalar_,4,1>, Options_> Coefficients;
+    typedef traits<Quaternion<Scalar_, (int(Options_)&Aligned)==Aligned ? AutoAlign : DontAlign> > TraitsBase;
     enum {
       Flags = TraitsBase::Flags & ~LvalueBit
     };
@@ -392,22 +389,22 @@ namespace internal {
 /** \ingroup Geometry_Module
   * \brief Quaternion expression mapping a constant memory buffer
   *
-  * \tparam _Scalar the type of the Quaternion coefficients
-  * \tparam _Options see class Map
+  * \tparam Scalar_ the type of the Quaternion coefficients
+  * \tparam Options_ see class Map
   *
   * This is a specialization of class Map for Quaternion. This class allows to view
   * a 4 scalar memory buffer as an Eigen's Quaternion object.
   *
   * \sa class Map, class Quaternion, class QuaternionBase
   */
-template<typename _Scalar, int _Options>
-class Map<const Quaternion<_Scalar>, _Options >
-  : public QuaternionBase<Map<const Quaternion<_Scalar>, _Options> >
+template<typename Scalar_, int Options_>
+class Map<const Quaternion<Scalar_>, Options_ >
+  : public QuaternionBase<Map<const Quaternion<Scalar_>, Options_> >
 {
   public:
-    typedef QuaternionBase<Map<const Quaternion<_Scalar>, _Options> > Base;
+    typedef QuaternionBase<Map<const Quaternion<Scalar_>, Options_> > Base;
 
-    typedef _Scalar Scalar;
+    typedef Scalar_ Scalar;
     typedef typename internal::traits<Map>::Coefficients Coefficients;
     EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Map)
     using Base::operator*=;
@@ -417,7 +414,7 @@ class Map<const Quaternion<_Scalar>, _Options >
       * The pointer \a coeffs must reference the four coefficients of Quaternion in the following order:
       * \code *coeffs == {x, y, z, w} \endcode
       *
-      * If the template parameter _Options is set to #Aligned, then the pointer coeffs must be aligned. */
+      * If the template parameter Options_ is set to #Aligned, then the pointer coeffs must be aligned. */
     EIGEN_DEVICE_FUNC explicit EIGEN_STRONG_INLINE Map(const Scalar* coeffs) : m_coeffs(coeffs) {}
 
     EIGEN_DEVICE_FUNC inline const Coefficients& coeffs() const { return m_coeffs;}
@@ -429,22 +426,22 @@ class Map<const Quaternion<_Scalar>, _Options >
 /** \ingroup Geometry_Module
   * \brief Expression of a quaternion from a memory buffer
   *
-  * \tparam _Scalar the type of the Quaternion coefficients
-  * \tparam _Options see class Map
+  * \tparam Scalar_ the type of the Quaternion coefficients
+  * \tparam Options_ see class Map
   *
   * This is a specialization of class Map for Quaternion. This class allows to view
   * a 4 scalar memory buffer as an Eigen's  Quaternion object.
   *
   * \sa class Map, class Quaternion, class QuaternionBase
   */
-template<typename _Scalar, int _Options>
-class Map<Quaternion<_Scalar>, _Options >
-  : public QuaternionBase<Map<Quaternion<_Scalar>, _Options> >
+template<typename Scalar_, int Options_>
+class Map<Quaternion<Scalar_>, Options_ >
+  : public QuaternionBase<Map<Quaternion<Scalar_>, Options_> >
 {
   public:
-    typedef QuaternionBase<Map<Quaternion<_Scalar>, _Options> > Base;
+    typedef QuaternionBase<Map<Quaternion<Scalar_>, Options_> > Base;
 
-    typedef _Scalar Scalar;
+    typedef Scalar_ Scalar;
     typedef typename internal::traits<Map>::Coefficients Coefficients;
     EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Map)
     using Base::operator*=;
@@ -454,7 +451,7 @@ class Map<Quaternion<_Scalar>, _Options >
       * The pointer \a coeffs must reference the four coefficients of Quaternion in the following order:
       * \code *coeffs == {x, y, z, w} \endcode
       *
-      * If the template parameter _Options is set to #Aligned, then the pointer coeffs must be aligned. */
+      * If the template parameter Options_ is set to #Aligned, then the pointer coeffs must be aligned. */
     EIGEN_DEVICE_FUNC explicit EIGEN_STRONG_INLINE Map(Scalar* coeffs) : m_coeffs(coeffs) {}
 
     EIGEN_DEVICE_FUNC inline Coefficients& coeffs() { return m_coeffs; }
@@ -654,7 +651,7 @@ EIGEN_DEVICE_FUNC inline Derived& QuaternionBase<Derived>::setFromTwoVectors(con
   {
     c = numext::maxi(c,Scalar(-1));
     Matrix<Scalar,2,3> m; m << v0.transpose(), v1.transpose();
-    JacobiSVD<Matrix<Scalar,2,3> > svd(m, ComputeFullV);
+    JacobiSVD<Matrix<Scalar,2,3>, ComputeFullV> svd(m);
     Vector3 axis = svd.matrixV().col(2);
 
     Scalar w2 = (Scalar(1)+c)*Scalar(0.5);
diff --git a/libs/eigen/Eigen/src/Geometry/Rotation2D.h b/libs/eigen/Eigen/src/Geometry/Rotation2D.h
index d0bd575..aa7f863 100644
--- a/libs/eigen/Eigen/src/Geometry/Rotation2D.h
+++ b/libs/eigen/Eigen/src/Geometry/Rotation2D.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_ROTATION2D_H
 #define EIGEN_ROTATION2D_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 /** \geometry_module \ingroup Geometry_Module
@@ -18,7 +20,7 @@ namespace Eigen {
   *
   * \brief Represents a rotation/orientation in a 2 dimensional space.
   *
-  * \tparam _Scalar the scalar type, i.e., the type of the coefficients
+  * \tparam Scalar_ the scalar type, i.e., the type of the coefficients
   *
   * This class is equivalent to a single scalar representing a counter clock wise rotation
   * as a single angle in radian. It provides some additional features such as the automatic
@@ -31,16 +33,16 @@ namespace Eigen {
 
 namespace internal {
 
-template<typename _Scalar> struct traits<Rotation2D<_Scalar> >
+template<typename Scalar_> struct traits<Rotation2D<Scalar_> >
 {
-  typedef _Scalar Scalar;
+  typedef Scalar_ Scalar;
 };
 } // end namespace internal
 
-template<typename _Scalar>
-class Rotation2D : public RotationBase<Rotation2D<_Scalar>,2>
+template<typename Scalar_>
+class Rotation2D : public RotationBase<Rotation2D<Scalar_>,2>
 {
-  typedef RotationBase<Rotation2D<_Scalar>,2> Base;
+  typedef RotationBase<Rotation2D<Scalar_>,2> Base;
 
 public:
 
@@ -48,7 +50,7 @@ public:
 
   enum { Dim = 2 };
   /** the scalar type of the coefficients */
-  typedef _Scalar Scalar;
+  typedef Scalar_ Scalar;
   typedef Matrix<Scalar,2,1> Vector2;
   typedef Matrix<Scalar,2,2> Matrix2;
 
diff --git a/libs/eigen/Eigen/src/Geometry/RotationBase.h b/libs/eigen/Eigen/src/Geometry/RotationBase.h
index f0ee0bd..f21277f 100644
--- a/libs/eigen/Eigen/src/Geometry/RotationBase.h
+++ b/libs/eigen/Eigen/src/Geometry/RotationBase.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_ROTATIONBASE_H
 #define EIGEN_ROTATIONBASE_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 // forward declaration
@@ -23,13 +25,13 @@ struct rotation_base_generic_product_selector;
   * \brief Common base class for compact rotation representations
   *
   * \tparam Derived is the derived type, i.e., a rotation type
-  * \tparam _Dim the dimension of the space
+  * \tparam Dim_ the dimension of the space
   */
-template<typename Derived, int _Dim>
+template<typename Derived, int Dim_>
 class RotationBase
 {
   public:
-    enum { Dim = _Dim };
+    enum { Dim = Dim_ };
     /** the scalar type of the coefficients */
     typedef typename internal::traits<Derived>::Scalar Scalar;
 
@@ -135,9 +137,9 @@ struct rotation_base_generic_product_selector<RotationDerived,OtherVectorType,tr
   *
   * \brief Constructs a Dim x Dim rotation matrix from the rotation \a r
   */
-template<typename _Scalar, int _Rows, int _Cols, int _Storage, int _MaxRows, int _MaxCols>
+template<typename Scalar_, int Rows_, int Cols_, int Storage_, int MaxRows_, int MaxCols_>
 template<typename OtherDerived>
-EIGEN_DEVICE_FUNC Matrix<_Scalar, _Rows, _Cols, _Storage, _MaxRows, _MaxCols>
+EIGEN_DEVICE_FUNC Matrix<Scalar_, Rows_, Cols_, Storage_, MaxRows_, MaxCols_>
 ::Matrix(const RotationBase<OtherDerived,ColsAtCompileTime>& r)
 {
   EIGEN_STATIC_ASSERT_MATRIX_SPECIFIC_SIZE(Matrix,int(OtherDerived::Dim),int(OtherDerived::Dim))
@@ -148,10 +150,10 @@ EIGEN_DEVICE_FUNC Matrix<_Scalar, _Rows, _Cols, _Storage, _MaxRows, _MaxCols>
   *
   * \brief Set a Dim x Dim rotation matrix from the rotation \a r
   */
-template<typename _Scalar, int _Rows, int _Cols, int _Storage, int _MaxRows, int _MaxCols>
+template<typename Scalar_, int Rows_, int Cols_, int Storage_, int MaxRows_, int MaxCols_>
 template<typename OtherDerived>
-EIGEN_DEVICE_FUNC Matrix<_Scalar, _Rows, _Cols, _Storage, _MaxRows, _MaxCols>&
-Matrix<_Scalar, _Rows, _Cols, _Storage, _MaxRows, _MaxCols>
+EIGEN_DEVICE_FUNC Matrix<Scalar_, Rows_, Cols_, Storage_, MaxRows_, MaxCols_>&
+Matrix<Scalar_, Rows_, Cols_, Storage_, MaxRows_, MaxCols_>
 ::operator=(const RotationBase<OtherDerived,ColsAtCompileTime>& r)
 {
   EIGEN_STATIC_ASSERT_MATRIX_SPECIFIC_SIZE(Matrix,int(OtherDerived::Dim),int(OtherDerived::Dim))
diff --git a/libs/eigen/Eigen/src/Geometry/Scaling.h b/libs/eigen/Eigen/src/Geometry/Scaling.h
index d352f1f..8bcdce6 100644
--- a/libs/eigen/Eigen/src/Geometry/Scaling.h
+++ b/libs/eigen/Eigen/src/Geometry/Scaling.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_SCALING_H
 #define EIGEN_SCALING_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 /** \geometry_module \ingroup Geometry_Module
@@ -18,7 +20,7 @@ namespace Eigen {
   *
   * \brief Represents a generic uniform scaling transformation
   *
-  * \tparam _Scalar the scalar type, i.e., the type of the coefficients.
+  * \tparam Scalar_ the scalar type, i.e., the type of the coefficients.
   *
   * This class represent a uniform scaling transformation. It is the return
   * type of Scaling(Scalar), and most of the time this is the only way it
@@ -45,12 +47,12 @@ namespace internal
   };
 }
 
-template<typename _Scalar>
+template<typename Scalar_>
 class UniformScaling
 {
 public:
   /** the scalar type of the coefficients */
-  typedef _Scalar Scalar;
+  typedef Scalar_ Scalar;
 
 protected:
 
@@ -160,6 +162,11 @@ template<typename Derived>
 inline const DiagonalWrapper<const Derived> Scaling(const MatrixBase<Derived>& coeffs)
 { return coeffs.asDiagonal(); }
 
+/** Constructs an axis aligned scaling expression from vector \a coeffs when passed as an rvalue reference */
+template<typename Derived>
+inline typename DiagonalWrapper<const Derived>::PlainObject Scaling(MatrixBase<Derived>&& coeffs)
+{ return typename DiagonalWrapper<const Derived>::PlainObject(std::move(coeffs.derived())); }
+
 /** \deprecated */
 typedef DiagonalMatrix<float, 2> AlignedScaling2f;
 /** \deprecated */
diff --git a/libs/eigen/Eigen/src/Geometry/Transform.h b/libs/eigen/Eigen/src/Geometry/Transform.h
index 52b8c2a..fd0ae7e 100644
--- a/libs/eigen/Eigen/src/Geometry/Transform.h
+++ b/libs/eigen/Eigen/src/Geometry/Transform.h
@@ -12,6 +12,8 @@
 #ifndef EIGEN_TRANSFORM_H
 #define EIGEN_TRANSFORM_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
@@ -63,15 +65,15 @@ struct transform_construct_from_matrix;
 
 template<typename TransformType> struct transform_take_affine_part;
 
-template<typename _Scalar, int _Dim, int _Mode, int _Options>
-struct traits<Transform<_Scalar,_Dim,_Mode,_Options> >
+template<typename Scalar_, int Dim_, int Mode_, int Options_>
+struct traits<Transform<Scalar_,Dim_,Mode_,Options_> >
 {
-  typedef _Scalar Scalar;
+  typedef Scalar_ Scalar;
   typedef Eigen::Index StorageIndex;
   typedef Dense StorageKind;
   enum {
-    Dim1 = _Dim==Dynamic ? _Dim : _Dim + 1,
-    RowsAtCompileTime = _Mode==Projective ? Dim1 : _Dim,
+    Dim1 = Dim_==Dynamic ? Dim_ : Dim_ + 1,
+    RowsAtCompileTime = Mode_==Projective ? Dim1 : Dim_,
     ColsAtCompileTime = Dim1,
     MaxRowsAtCompileTime = RowsAtCompileTime,
     MaxColsAtCompileTime = ColsAtCompileTime,
@@ -89,9 +91,9 @@ template<int Mode> struct transform_make_affine;
   *
   * \brief Represents an homogeneous transformation in a N dimensional space
   *
-  * \tparam _Scalar the scalar type, i.e., the type of the coefficients
-  * \tparam _Dim the dimension of the space
-  * \tparam _Mode the type of the transformation. Can be:
+  * \tparam Scalar_ the scalar type, i.e., the type of the coefficients
+  * \tparam Dim_ the dimension of the space
+  * \tparam Mode_ the type of the transformation. Can be:
   *              - #Affine: the transformation is stored as a (Dim+1)^2 matrix,
   *                         where the last row is assumed to be [0 ... 0 1].
   *              - #AffineCompact: the transformation is stored as a (Dim)x(Dim+1) matrix.
@@ -100,7 +102,7 @@ template<int Mode> struct transform_make_affine;
   *              - #Isometry: same as #Affine with the additional assumption that
   *                           the linear part represents a rotation. This assumption is exploited
   *                           to speed up some functions such as inverse() and rotation().
-  * \tparam _Options has the same meaning as in class Matrix. It allows to specify DontAlign and/or RowMajor.
+  * \tparam Options_ has the same meaning as in class Matrix. It allows to specify DontAlign and/or RowMajor.
   *                  These Options are passed directly to the underlying matrix type.
   *
   * The homography is internally represented and stored by a matrix which
@@ -200,20 +202,20 @@ template<int Mode> struct transform_make_affine;
   *
   * \sa class Matrix, class Quaternion
   */
-template<typename _Scalar, int _Dim, int _Mode, int _Options>
+template<typename Scalar_, int Dim_, int Mode_, int Options_>
 class Transform
 {
 public:
-  EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_Dim==Dynamic ? Dynamic : (_Dim+1)*(_Dim+1))
+  EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar_,Dim_==Dynamic ? Dynamic : (Dim_+1)*(Dim_+1))
   enum {
-    Mode = _Mode,
-    Options = _Options,
-    Dim = _Dim,     ///< space dimension in which the transformation holds
-    HDim = _Dim+1,  ///< size of a respective homogeneous vector
+    Mode = Mode_,
+    Options = Options_,
+    Dim = Dim_,     ///< space dimension in which the transformation holds
+    HDim = Dim_+1,  ///< size of a respective homogeneous vector
     Rows = int(Mode)==(AffineCompact) ? Dim : HDim
   };
   /** the scalar type of the coefficients */
-  typedef _Scalar Scalar;
+  typedef Scalar_ Scalar;
   typedef Eigen::Index StorageIndex;
   typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3
   /** type of the matrix used to represent the transformation */
@@ -227,13 +229,13 @@ public:
   /** type of read reference to the linear part of the transformation */
   typedef const Block<ConstMatrixType,Dim,Dim,int(Mode)==(AffineCompact) && (int(Options)&RowMajor)==0> ConstLinearPart;
   /** type of read/write reference to the affine part of the transformation */
-  typedef typename internal::conditional<int(Mode)==int(AffineCompact),
+  typedef std::conditional_t<int(Mode)==int(AffineCompact),
                               MatrixType&,
-                              Block<MatrixType,Dim,HDim> >::type AffinePart;
+                              Block<MatrixType,Dim,HDim> > AffinePart;
   /** type of read reference to the affine part of the transformation */
-  typedef typename internal::conditional<int(Mode)==int(AffineCompact),
+  typedef std::conditional_t<int(Mode)==int(AffineCompact),
                               const MatrixType&,
-                              const Block<const MatrixType,Dim,HDim> >::type ConstAffinePart;
+                              const Block<const MatrixType,Dim,HDim> > ConstAffinePart;
   /** type of a vector */
   typedef Matrix<Scalar,Dim,1> VectorType;
   /** type of a read/write reference to the translation part of the rotation */
@@ -317,12 +319,12 @@ public:
     check_template_params();
     // prevent conversions as:
     // Affine | AffineCompact | Isometry = Projective
-    EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(OtherMode==int(Projective), Mode==int(Projective)),
+    EIGEN_STATIC_ASSERT(internal::check_implication(OtherMode==int(Projective), Mode==int(Projective)),
                         YOU_PERFORMED_AN_INVALID_TRANSFORMATION_CONVERSION)
 
     // prevent conversions as:
     // Isometry = Affine | AffineCompact
-    EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(OtherMode==int(Affine)||OtherMode==int(AffineCompact), Mode!=int(Isometry)),
+    EIGEN_STATIC_ASSERT(internal::check_implication(OtherMode==int(Affine)||OtherMode==int(AffineCompact), Mode!=int(Isometry)),
                         YOU_PERFORMED_AN_INVALID_TRANSFORMATION_CONVERSION)
 
     enum { ModeIsAffineCompact = Mode == int(AffineCompact),
@@ -367,9 +369,11 @@ public:
   }
 
   #ifdef EIGEN_QT_SUPPORT
+  #if (QT_VERSION < QT_VERSION_CHECK(6, 0, 0))
   inline Transform(const QMatrix& other);
   inline Transform& operator=(const QMatrix& other);
   inline QMatrix toQMatrix(void) const;
+  #endif
   inline Transform(const QTransform& other);
   inline Transform& operator=(const QTransform& other);
   inline QTransform toQTransform(void) const;
@@ -443,7 +447,7 @@ public:
     * \li a general transformation matrix of size Dim+1 x Dim+1.
     */
   template<typename OtherDerived> friend
-  EIGEN_DEVICE_FUNC inline const typename internal::transform_left_product_impl<OtherDerived,Mode,Options,_Dim,_Dim+1>::ResultType
+  EIGEN_DEVICE_FUNC inline const typename internal::transform_left_product_impl<OtherDerived,Mode,Options,Dim_,Dim_+1>::ResultType
     operator * (const EigenBase<OtherDerived> &a, const Transform &b)
   { return internal::transform_left_product_impl<OtherDerived,Mode,Options,Dim,HDim>::run(a.derived(),b); }
 
@@ -596,7 +600,7 @@ public:
   template<typename Derived>
   EIGEN_DEVICE_FUNC inline Transform operator*(const RotationBase<Derived,Dim>& r) const;
 
-  typedef typename internal::conditional<int(Mode)==Isometry,ConstLinearPart,const LinearMatrixType>::type RotationReturnType;
+  typedef std::conditional_t<int(Mode)==Isometry,ConstLinearPart,const LinearMatrixType> RotationReturnType;
   EIGEN_DEVICE_FUNC RotationReturnType rotation() const;
 
   template<typename RotationMatrixType, typename ScalingMatrixType>
@@ -732,6 +736,8 @@ typedef Transform<double,3,Projective> Projective3d;
 **************************/
 
 #ifdef EIGEN_QT_SUPPORT
+
+#if (QT_VERSION < QT_VERSION_CHECK(6, 0, 0))
 /** Initializes \c *this from a QMatrix assuming the dimension is 2.
   *
   * This function is available only if the token EIGEN_QT_SUPPORT is defined.
@@ -776,6 +782,7 @@ QMatrix Transform<Scalar,Dim,Mode,Options>::toQMatrix(void) const
                  m_matrix.coeff(0,1), m_matrix.coeff(1,1),
                  m_matrix.coeff(0,2), m_matrix.coeff(1,2));
 }
+#endif
 
 /** Initializes \c *this from a QTransform assuming the dimension is 2.
   *
@@ -1098,7 +1105,7 @@ template<typename RotationMatrixType, typename ScalingMatrixType>
 EIGEN_DEVICE_FUNC void Transform<Scalar,Dim,Mode,Options>::computeRotationScaling(RotationMatrixType *rotation, ScalingMatrixType *scaling) const
 {
   // Note that JacobiSVD is faster than BDCSVD for small matrices.
-  JacobiSVD<LinearMatrixType> svd(linear(), ComputeFullU | ComputeFullV);
+  JacobiSVD<LinearMatrixType, ComputeFullU | ComputeFullV> svd(linear());
 
   Scalar x = (svd.matrixU() * svd.matrixV().adjoint()).determinant() < Scalar(0) ? Scalar(-1) : Scalar(1); // so x has absolute value 1
   VectorType sv(svd.singularValues());
@@ -1128,7 +1135,7 @@ template<typename ScalingMatrixType, typename RotationMatrixType>
 EIGEN_DEVICE_FUNC void Transform<Scalar,Dim,Mode,Options>::computeScalingRotation(ScalingMatrixType *scaling, RotationMatrixType *rotation) const
 {
   // Note that JacobiSVD is faster than BDCSVD for small matrices.
-  JacobiSVD<LinearMatrixType> svd(linear(), ComputeFullU | ComputeFullV);
+  JacobiSVD<LinearMatrixType, ComputeFullU | ComputeFullV> svd(linear());
 
   Scalar x = (svd.matrixU() * svd.matrixV().adjoint()).determinant() < Scalar(0) ? Scalar(-1) : Scalar(1); // so x has absolute value 1
   VectorType sv(svd.singularValues());
@@ -1259,17 +1266,17 @@ template<typename TransformType> struct transform_take_affine_part {
   typedef typename TransformType::MatrixType MatrixType;
   typedef typename TransformType::AffinePart AffinePart;
   typedef typename TransformType::ConstAffinePart ConstAffinePart;
-  static inline AffinePart run(MatrixType& m)
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE AffinePart run(MatrixType& m)
   { return m.template block<TransformType::Dim,TransformType::HDim>(0,0); }
-  static inline ConstAffinePart run(const MatrixType& m)
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ConstAffinePart run(const MatrixType& m)
   { return m.template block<TransformType::Dim,TransformType::HDim>(0,0); }
 };
 
 template<typename Scalar, int Dim, int Options>
 struct transform_take_affine_part<Transform<Scalar,Dim,AffineCompact, Options> > {
   typedef typename Transform<Scalar,Dim,AffineCompact,Options>::MatrixType MatrixType;
-  static inline MatrixType& run(MatrixType& m) { return m; }
-  static inline const MatrixType& run(const MatrixType& m) { return m; }
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE MatrixType& run(MatrixType& m) { return m; }
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const MatrixType& run(const MatrixType& m) { return m; }
 };
 
 /*****************************************************
@@ -1279,7 +1286,7 @@ struct transform_take_affine_part<Transform<Scalar,Dim,AffineCompact, Options> >
 template<typename Other, int Mode, int Options, int Dim, int HDim>
 struct transform_construct_from_matrix<Other, Mode,Options,Dim,HDim, Dim,Dim>
 {
-  static inline void run(Transform<typename Other::Scalar,Dim,Mode,Options> *transform, const Other& other)
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Transform<typename Other::Scalar,Dim,Mode,Options> *transform, const Other& other)
   {
     transform->linear() = other;
     transform->translation().setZero();
@@ -1290,7 +1297,7 @@ struct transform_construct_from_matrix<Other, Mode,Options,Dim,HDim, Dim,Dim>
 template<typename Other, int Mode, int Options, int Dim, int HDim>
 struct transform_construct_from_matrix<Other, Mode,Options,Dim,HDim, Dim,HDim>
 {
-  static inline void run(Transform<typename Other::Scalar,Dim,Mode,Options> *transform, const Other& other)
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Transform<typename Other::Scalar,Dim,Mode,Options> *transform, const Other& other)
   {
     transform->affine() = other;
     transform->makeAffine();
@@ -1300,14 +1307,14 @@ struct transform_construct_from_matrix<Other, Mode,Options,Dim,HDim, Dim,HDim>
 template<typename Other, int Mode, int Options, int Dim, int HDim>
 struct transform_construct_from_matrix<Other, Mode,Options,Dim,HDim, HDim,HDim>
 {
-  static inline void run(Transform<typename Other::Scalar,Dim,Mode,Options> *transform, const Other& other)
+  static  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Transform<typename Other::Scalar,Dim,Mode,Options> *transform, const Other& other)
   { transform->matrix() = other; }
 };
 
 template<typename Other, int Options, int Dim, int HDim>
 struct transform_construct_from_matrix<Other, AffineCompact,Options,Dim,HDim, HDim,HDim>
 {
-  static inline void run(Transform<typename Other::Scalar,Dim,AffineCompact,Options> *transform, const Other& other)
+  static  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Transform<typename Other::Scalar,Dim,AffineCompact,Options> *transform, const Other& other)
   { transform->matrix() = other.template block<Dim,HDim>(0,0); }
 };
 
@@ -1397,7 +1404,7 @@ struct transform_right_product_impl< TransformType, MatrixType, 2, 1> // rhs is
     Dim = TransformType::Dim,
     HDim = TransformType::HDim,
     OtherRows = MatrixType::RowsAtCompileTime,
-    WorkingRows = EIGEN_PLAIN_ENUM_MIN(TransformMatrix::RowsAtCompileTime,HDim)
+    WorkingRows = plain_enum_min(TransformMatrix::RowsAtCompileTime, HDim)
   };
 
   typedef typename MatrixType::PlainObject ResultType;
diff --git a/libs/eigen/Eigen/src/Geometry/Translation.h b/libs/eigen/Eigen/src/Geometry/Translation.h
index 8c22901..dd0adba 100644
--- a/libs/eigen/Eigen/src/Geometry/Translation.h
+++ b/libs/eigen/Eigen/src/Geometry/Translation.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_TRANSLATION_H
 #define EIGEN_TRANSLATION_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 /** \geometry_module \ingroup Geometry_Module
@@ -18,23 +20,23 @@ namespace Eigen {
   *
   * \brief Represents a translation transformation
   *
-  * \tparam _Scalar the scalar type, i.e., the type of the coefficients.
-  * \tparam _Dim the  dimension of the space, can be a compile time value or Dynamic
+  * \tparam Scalar_ the scalar type, i.e., the type of the coefficients.
+  * \tparam Dim_ the  dimension of the space, can be a compile time value or Dynamic
   *
   * \note This class is not aimed to be used to store a translation transformation,
   * but rather to make easier the constructions and updates of Transform objects.
   *
   * \sa class Scaling, class Transform
   */
-template<typename _Scalar, int _Dim>
+template<typename Scalar_, int Dim_>
 class Translation
 {
 public:
-  EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_Dim)
+  EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar_,Dim_)
   /** dimension of the space */
-  enum { Dim = _Dim };
+  enum { Dim = Dim_ };
   /** the scalar type of the coefficients */
-  typedef _Scalar Scalar;
+  typedef Scalar_ Scalar;
   /** corresponding vector type */
   typedef Matrix<Scalar,Dim,1> VectorType;
   /** corresponding linear transformation matrix type */
@@ -131,7 +133,7 @@ public:
 
   /** Applies translation to vector */
   template<typename Derived>
-  inline typename internal::enable_if<Derived::IsVectorAtCompileTime,VectorType>::type
+  inline std::enable_if_t<Derived::IsVectorAtCompileTime,VectorType>
   operator* (const MatrixBase<Derived>& vec) const
   { return m_coeffs + vec.derived(); }
 
diff --git a/libs/eigen/Eigen/src/Geometry/Umeyama.h b/libs/eigen/Eigen/src/Geometry/Umeyama.h
index 6b75500..8049787 100644
--- a/libs/eigen/Eigen/src/Geometry/Umeyama.h
+++ b/libs/eigen/Eigen/src/Geometry/Umeyama.h
@@ -16,6 +16,8 @@
 // * Eigen/SVD
 // * Eigen/Array
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 #ifndef EIGEN_PARSED_BY_DOXYGEN
@@ -32,10 +34,10 @@ template<typename MatrixType, typename OtherMatrixType>
 struct umeyama_transform_matrix_type
 {
   enum {
-    MinRowsAtCompileTime = EIGEN_SIZE_MIN_PREFER_DYNAMIC(MatrixType::RowsAtCompileTime, OtherMatrixType::RowsAtCompileTime),
+    MinRowsAtCompileTime = internal::min_size_prefer_dynamic(MatrixType::RowsAtCompileTime, OtherMatrixType::RowsAtCompileTime),
 
     // When possible we want to choose some small fixed size value since the result
-    // is likely to fit on the stack. So here, EIGEN_SIZE_MIN_PREFER_DYNAMIC is not what we want.
+    // is likely to fit on the stack. So here, min_size_prefer_dynamic is not what we want.
     HomogeneousDimension = int(MinRowsAtCompileTime) == Dynamic ? Dynamic : int(MinRowsAtCompileTime)+1
   };
 
@@ -102,7 +104,7 @@ umeyama(const MatrixBase<Derived>& src, const MatrixBase<OtherDerived>& dst, boo
   EIGEN_STATIC_ASSERT((internal::is_same<Scalar, typename internal::traits<OtherDerived>::Scalar>::value),
     YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
 
-  enum { Dimension = EIGEN_SIZE_MIN_PREFER_DYNAMIC(Derived::RowsAtCompileTime, OtherDerived::RowsAtCompileTime) };
+  enum { Dimension = internal::min_size_prefer_dynamic(Derived::RowsAtCompileTime, OtherDerived::RowsAtCompileTime) };
 
   typedef Matrix<Scalar, Dimension, 1> VectorType;
   typedef Matrix<Scalar, Dimension, Dimension> MatrixType;
@@ -122,13 +124,10 @@ umeyama(const MatrixBase<Derived>& src, const MatrixBase<OtherDerived>& dst, boo
   const RowMajorMatrixType src_demean = src.colwise() - src_mean;
   const RowMajorMatrixType dst_demean = dst.colwise() - dst_mean;
 
-  // Eq. (36)-(37)
-  const Scalar src_var = src_demean.rowwise().squaredNorm().sum() * one_over_n;
-
   // Eq. (38)
   const MatrixType sigma = one_over_n * dst_demean * src_demean.transpose();
 
-  JacobiSVD<MatrixType> svd(sigma, ComputeFullU | ComputeFullV);
+  JacobiSVD<MatrixType, ComputeFullU | ComputeFullV> svd(sigma);
 
   // Initialize the resulting transformation with an identity matrix...
   TransformationMatrixType Rt = TransformationMatrixType::Identity(m+1,m+1);
@@ -144,6 +143,9 @@ umeyama(const MatrixBase<Derived>& src, const MatrixBase<OtherDerived>& dst, boo
 
   if (with_scaling)
   {
+    // Eq. (36)-(37)
+    const Scalar src_var = src_demean.rowwise().squaredNorm().sum() * one_over_n;
+
     // Eq. (42)
     const Scalar c = Scalar(1)/src_var * svd.singularValues().dot(S);
 
diff --git a/libs/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h b/libs/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h
index 9af6a9a..bd91949 100644
--- a/libs/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h
+++ b/libs/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h
@@ -11,6 +11,8 @@
 #ifndef EIGEN_GEOMETRY_SIMD_H
 #define EIGEN_GEOMETRY_SIMD_H
 
+#include "../InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
diff --git a/libs/eigen/Eigen/src/Householder/BlockHouseholder.h b/libs/eigen/Eigen/src/Householder/BlockHouseholder.h
index 39ce1c2..a5c8095 100644
--- a/libs/eigen/Eigen/src/Householder/BlockHouseholder.h
+++ b/libs/eigen/Eigen/src/Householder/BlockHouseholder.h
@@ -13,6 +13,8 @@
 
 // This file contains some helper function to deal with block householder reflectors
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
@@ -85,7 +87,7 @@ void make_block_householder_triangular_factor(TriangularFactorType& triFactor, c
 template<typename MatrixType,typename VectorsType,typename CoeffsType>
 void apply_block_householder_on_the_left(MatrixType& mat, const VectorsType& vectors, const CoeffsType& hCoeffs, bool forward)
 {
-  enum { TFactorSize = MatrixType::ColsAtCompileTime };
+  enum { TFactorSize = VectorsType::ColsAtCompileTime };
   Index nbVecs = vectors.cols();
   Matrix<typename MatrixType::Scalar, TFactorSize, TFactorSize, RowMajor> T(nbVecs,nbVecs);
   
diff --git a/libs/eigen/Eigen/src/Householder/Householder.h b/libs/eigen/Eigen/src/Householder/Householder.h
index 5bc037f..855b752 100644
--- a/libs/eigen/Eigen/src/Householder/Householder.h
+++ b/libs/eigen/Eigen/src/Householder/Householder.h
@@ -11,6 +11,8 @@
 #ifndef EIGEN_HOUSEHOLDER_H
 #define EIGEN_HOUSEHOLDER_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
@@ -69,7 +71,7 @@ void MatrixBase<Derived>::makeHouseholder(
   Scalar& tau,
   RealScalar& beta) const
 {
-  using std::sqrt;
+  using numext::sqrt;
   using numext::conj;
   
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(EssentialPart)
@@ -122,7 +124,7 @@ void MatrixBase<Derived>::applyHouseholderOnTheLeft(
   {
     *this *= Scalar(1)-tau;
   }
-  else if(tau!=Scalar(0))
+  else if(!numext::is_exactly_zero(tau))
   {
     Map<typename internal::plain_row_type<PlainObject>::type> tmp(workspace,cols());
     Block<Derived, EssentialPart::SizeAtCompileTime, Derived::ColsAtCompileTime> bottom(derived(), 1, 0, rows()-1, cols());
@@ -160,7 +162,7 @@ void MatrixBase<Derived>::applyHouseholderOnTheRight(
   {
     *this *= Scalar(1)-tau;
   }
-  else if(tau!=Scalar(0))
+  else if(!numext::is_exactly_zero(tau))
   {
     Map<typename internal::plain_col_type<PlainObject>::type> tmp(workspace,rows());
     Block<Derived, Derived::RowsAtCompileTime, EssentialPart::SizeAtCompileTime> right(derived(), 0, 1, rows(), cols()-1);
diff --git a/libs/eigen/Eigen/src/Householder/HouseholderSequence.h b/libs/eigen/Eigen/src/Householder/HouseholderSequence.h
index 022f6c3..41fef64 100644
--- a/libs/eigen/Eigen/src/Householder/HouseholderSequence.h
+++ b/libs/eigen/Eigen/src/Householder/HouseholderSequence.h
@@ -11,6 +11,8 @@
 #ifndef EIGEN_HOUSEHOLDER_SEQUENCE_H
 #define EIGEN_HOUSEHOLDER_SEQUENCE_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /** \ingroup Householder_Module
@@ -131,34 +133,34 @@ template<typename VectorsType, typename CoeffsType, int Side> class HouseholderS
     typedef typename internal::traits<HouseholderSequence>::Scalar Scalar;
 
     typedef HouseholderSequence<
-      typename internal::conditional<NumTraits<Scalar>::IsComplex,
-        typename internal::remove_all<typename VectorsType::ConjugateReturnType>::type,
-        VectorsType>::type,
-      typename internal::conditional<NumTraits<Scalar>::IsComplex,
-        typename internal::remove_all<typename CoeffsType::ConjugateReturnType>::type,
-        CoeffsType>::type,
+      std::conditional_t<NumTraits<Scalar>::IsComplex,
+        internal::remove_all_t<typename VectorsType::ConjugateReturnType>,
+        VectorsType>,
+      std::conditional_t<NumTraits<Scalar>::IsComplex,
+        internal::remove_all_t<typename CoeffsType::ConjugateReturnType>,
+        CoeffsType>,
       Side
     > ConjugateReturnType;
 
     typedef HouseholderSequence<
       VectorsType,
-      typename internal::conditional<NumTraits<Scalar>::IsComplex,
-        typename internal::remove_all<typename CoeffsType::ConjugateReturnType>::type,
-        CoeffsType>::type,
+      std::conditional_t<NumTraits<Scalar>::IsComplex,
+        internal::remove_all_t<typename CoeffsType::ConjugateReturnType>,
+        CoeffsType>,
       Side
     > AdjointReturnType;
 
     typedef HouseholderSequence<
-      typename internal::conditional<NumTraits<Scalar>::IsComplex,
-        typename internal::remove_all<typename VectorsType::ConjugateReturnType>::type,
-        VectorsType>::type,
+      std::conditional_t<NumTraits<Scalar>::IsComplex,
+        internal::remove_all_t<typename VectorsType::ConjugateReturnType>,
+        VectorsType>,
       CoeffsType,
       Side
     > TransposeReturnType;
 
     typedef HouseholderSequence<
-      typename internal::add_const<VectorsType>::type,
-      typename internal::add_const<CoeffsType>::type,
+      std::add_const_t<VectorsType>,
+      std::add_const_t<CoeffsType>,
       Side
     > ConstHouseholderSequence;
 
@@ -255,10 +257,10 @@ template<typename VectorsType, typename CoeffsType, int Side> class HouseholderS
      */
     template<bool Cond>
     EIGEN_DEVICE_FUNC
-    inline typename internal::conditional<Cond,ConjugateReturnType,ConstHouseholderSequence>::type
+    inline std::conditional_t<Cond,ConjugateReturnType,ConstHouseholderSequence>
     conjugateIf() const
     {
-      typedef typename internal::conditional<Cond,ConjugateReturnType,ConstHouseholderSequence>::type ReturnType;
+      typedef std::conditional_t<Cond,ConjugateReturnType,ConstHouseholderSequence> ReturnType;
       return ReturnType(m_vectors.template conjugateIf<Cond>(), m_coeffs.template conjugateIf<Cond>());
     }
 
@@ -382,21 +384,25 @@ template<typename VectorsType, typename CoeffsType, int Side> class HouseholderS
           Index bs = end-k;
           Index start = k + m_shift;
 
-          typedef Block<typename internal::remove_all<VectorsType>::type,Dynamic,Dynamic> SubVectorsType;
+          typedef Block<internal::remove_all_t<VectorsType>,Dynamic,Dynamic> SubVectorsType;
           SubVectorsType sub_vecs1(m_vectors.const_cast_derived(), Side==OnTheRight ? k : start,
                                                                    Side==OnTheRight ? start : k,
                                                                    Side==OnTheRight ? bs : m_vectors.rows()-start,
                                                                    Side==OnTheRight ? m_vectors.cols()-start : bs);
-          typename internal::conditional<Side==OnTheRight, Transpose<SubVectorsType>, SubVectorsType&>::type sub_vecs(sub_vecs1);
+          std::conditional_t<Side==OnTheRight, Transpose<SubVectorsType>, SubVectorsType&> sub_vecs(sub_vecs1);
 
-          Index dstStart = dst.rows()-rows()+m_shift+k;
           Index dstRows  = rows()-m_shift-k;
-          Block<Dest,Dynamic,Dynamic> sub_dst(dst,
-                                              dstStart,
-                                              inputIsIdentity ? dstStart : 0,
-                                              dstRows,
-                                              inputIsIdentity ? dstRows : dst.cols());
-          apply_block_householder_on_the_left(sub_dst, sub_vecs, m_coeffs.segment(k, bs), !m_reverse);
+
+          if (inputIsIdentity)
+          {
+            Block<Dest, Dynamic, Dynamic> sub_dst = dst.bottomRightCorner(dstRows, dstRows);
+            apply_block_householder_on_the_left(sub_dst, sub_vecs, m_coeffs.segment(k, bs), !m_reverse);
+          }
+          else
+          {
+            auto sub_dst = dst.bottomRows(dstRows);
+            apply_block_householder_on_the_left(sub_dst, sub_vecs, m_coeffs.segment(k, bs), !m_reverse);
+          }
         }
       }
       else
@@ -405,9 +411,18 @@ template<typename VectorsType, typename CoeffsType, int Side> class HouseholderS
         for(Index k = 0; k < m_length; ++k)
         {
           Index actual_k = m_reverse ? k : m_length-k-1;
-          Index dstStart = rows()-m_shift-actual_k;
-          dst.bottomRightCorner(dstStart, inputIsIdentity ? dstStart : dst.cols())
-            .applyHouseholderOnTheLeft(essentialVector(actual_k), m_coeffs.coeff(actual_k), workspace.data());
+          Index dstRows = rows()-m_shift-actual_k;
+
+          if (inputIsIdentity)
+          {
+            Block<Dest, Dynamic, Dynamic> sub_dst = dst.bottomRightCorner(dstRows, dstRows);
+            sub_dst.applyHouseholderOnTheLeft(essentialVector(actual_k), m_coeffs.coeff(actual_k), workspace.data());
+          }
+          else
+          {
+            auto sub_dst = dst.bottomRows(dstRows);
+            sub_dst.applyHouseholderOnTheLeft(essentialVector(actual_k), m_coeffs.coeff(actual_k), workspace.data());
+          }
         }
       }
     }
@@ -428,7 +443,7 @@ template<typename VectorsType, typename CoeffsType, int Side> class HouseholderS
       return res;
     }
 
-    template<typename _VectorsType, typename _CoeffsType, int _Side> friend struct internal::hseq_side_dependent_impl;
+    template<typename VectorsType_, typename CoeffsType_, int Side_> friend struct internal::hseq_side_dependent_impl;
 
     /** \brief Sets the length of the Householder sequence.
       * \param [in]  length  New value for the length.
diff --git a/libs/eigen/Eigen/src/Householder/InternalHeaderCheck.h b/libs/eigen/Eigen/src/Householder/InternalHeaderCheck.h
new file mode 100644
index 0000000..70de89b
--- /dev/null
+++ b/libs/eigen/Eigen/src/Householder/InternalHeaderCheck.h
@@ -0,0 +1,3 @@
+#ifndef EIGEN_HOUSEHOLDER_MODULE_H
+#error "Please include Eigen/Householder instead of including headers inside the src directory directly."
+#endif
diff --git a/libs/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h b/libs/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h
index a117fc1..d2d55b7 100644
--- a/libs/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h
+++ b/libs/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_BASIC_PRECONDITIONERS_H
 #define EIGEN_BASIC_PRECONDITIONERS_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /** \ingroup IterativeLinearSolvers_Module
@@ -21,7 +23,7 @@ namespace Eigen {
     A.diagonal().asDiagonal() . x = b
     \endcode
   *
-  * \tparam _Scalar the type of the scalar.
+  * \tparam Scalar_ the type of the scalar.
   *
   * \implsparsesolverconcept
   *
@@ -32,10 +34,10 @@ namespace Eigen {
   *
   * \sa class LeastSquareDiagonalPreconditioner, class ConjugateGradient
   */
-template <typename _Scalar>
+template <typename Scalar_>
 class DiagonalPreconditioner
 {
-    typedef _Scalar Scalar;
+    typedef Scalar_ Scalar;
     typedef Matrix<Scalar,Dynamic,1> Vector;
   public:
     typedef typename Vector::StorageIndex StorageIndex;
@@ -116,7 +118,7 @@ class DiagonalPreconditioner
     (A.adjoint() * A).diagonal().asDiagonal() * x = b
     \endcode
   *
-  * \tparam _Scalar the type of the scalar.
+  * \tparam Scalar_ the type of the scalar.
   *
   * \implsparsesolverconcept
   *
@@ -124,12 +126,12 @@ class DiagonalPreconditioner
   *
   * \sa class LeastSquaresConjugateGradient, class DiagonalPreconditioner
   */
-template <typename _Scalar>
-class LeastSquareDiagonalPreconditioner : public DiagonalPreconditioner<_Scalar>
+template <typename Scalar_>
+class LeastSquareDiagonalPreconditioner : public DiagonalPreconditioner<Scalar_>
 {
-    typedef _Scalar Scalar;
+    typedef Scalar_ Scalar;
     typedef typename NumTraits<Scalar>::Real RealScalar;
-    typedef DiagonalPreconditioner<_Scalar> Base;
+    typedef DiagonalPreconditioner<Scalar_> Base;
     using Base::m_invdiag;
   public:
 
diff --git a/libs/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h b/libs/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h
index 153acef..76195c7 100644
--- a/libs/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h
+++ b/libs/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h
@@ -11,6 +11,8 @@
 #ifndef EIGEN_BICGSTAB_H
 #define EIGEN_BICGSTAB_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
@@ -49,9 +51,9 @@ bool bicgstab(const MatrixType& mat, const Rhs& rhs, Dest& x,
     x.setZero();
     return true;
   }
-  Scalar rho    = 1;
-  Scalar alpha  = 1;
-  Scalar w      = 1;
+  Scalar rho    (1);
+  Scalar alpha  (1);
+  Scalar w      (1);
   
   VectorType v = VectorType::Zero(n), p = VectorType::Zero(n);
   VectorType y(n),  z(n);
@@ -108,17 +110,17 @@ bool bicgstab(const MatrixType& mat, const Rhs& rhs, Dest& x,
 
 }
 
-template< typename _MatrixType,
-          typename _Preconditioner = DiagonalPreconditioner<typename _MatrixType::Scalar> >
+template< typename MatrixType_,
+          typename Preconditioner_ = DiagonalPreconditioner<typename MatrixType_::Scalar> >
 class BiCGSTAB;
 
 namespace internal {
 
-template< typename _MatrixType, typename _Preconditioner>
-struct traits<BiCGSTAB<_MatrixType,_Preconditioner> >
+template< typename MatrixType_, typename Preconditioner_>
+struct traits<BiCGSTAB<MatrixType_,Preconditioner_> >
 {
-  typedef _MatrixType MatrixType;
-  typedef _Preconditioner Preconditioner;
+  typedef MatrixType_ MatrixType;
+  typedef Preconditioner_ Preconditioner;
 };
 
 }
@@ -129,8 +131,8 @@ struct traits<BiCGSTAB<_MatrixType,_Preconditioner> >
   * This class allows to solve for A.x = b sparse linear problems using a bi conjugate gradient
   * stabilized algorithm. The vectors x and b can be either dense or sparse.
   *
-  * \tparam _MatrixType the type of the sparse matrix A, can be a dense or a sparse matrix.
-  * \tparam _Preconditioner the type of the preconditioner. Default is DiagonalPreconditioner
+  * \tparam MatrixType_ the type of the sparse matrix A, can be a dense or a sparse matrix.
+  * \tparam Preconditioner_ the type of the preconditioner. Default is DiagonalPreconditioner
   *
   * \implsparsesolverconcept
   *
@@ -154,8 +156,8 @@ struct traits<BiCGSTAB<_MatrixType,_Preconditioner> >
   *
   * \sa class SimplicialCholesky, DiagonalPreconditioner, IdentityPreconditioner
   */
-template< typename _MatrixType, typename _Preconditioner>
-class BiCGSTAB : public IterativeSolverBase<BiCGSTAB<_MatrixType,_Preconditioner> >
+template< typename MatrixType_, typename Preconditioner_>
+class BiCGSTAB : public IterativeSolverBase<BiCGSTAB<MatrixType_,Preconditioner_> >
 {
   typedef IterativeSolverBase<BiCGSTAB> Base;
   using Base::matrix;
@@ -164,10 +166,10 @@ class BiCGSTAB : public IterativeSolverBase<BiCGSTAB<_MatrixType,_Preconditioner
   using Base::m_info;
   using Base::m_isInitialized;
 public:
-  typedef _MatrixType MatrixType;
+  typedef MatrixType_ MatrixType;
   typedef typename MatrixType::Scalar Scalar;
   typedef typename MatrixType::RealScalar RealScalar;
-  typedef _Preconditioner Preconditioner;
+  typedef Preconditioner_ Preconditioner;
 
 public:
 
diff --git a/libs/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h b/libs/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h
index 5d8c6b4..5a7dbc7 100644
--- a/libs/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h
+++ b/libs/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_CONJUGATE_GRADIENT_H
 #define EIGEN_CONJUGATE_GRADIENT_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
@@ -29,8 +31,6 @@ void conjugate_gradient(const MatrixType& mat, const Rhs& rhs, Dest& x,
                         const Preconditioner& precond, Index& iters,
                         typename Dest::RealScalar& tol_error)
 {
-  using std::sqrt;
-  using std::abs;
   typedef typename Dest::RealScalar RealScalar;
   typedef typename Dest::Scalar Scalar;
   typedef Matrix<Scalar,Dynamic,1> VectorType;
@@ -56,7 +56,7 @@ void conjugate_gradient(const MatrixType& mat, const Rhs& rhs, Dest& x,
   if (residualNorm2 < threshold)
   {
     iters = 0;
-    tol_error = sqrt(residualNorm2 / rhsNorm2);
+    tol_error = numext::sqrt(residualNorm2 / rhsNorm2);
     return;
   }
 
@@ -86,23 +86,23 @@ void conjugate_gradient(const MatrixType& mat, const Rhs& rhs, Dest& x,
     p = z + beta * p;                           // update search direction
     i++;
   }
-  tol_error = sqrt(residualNorm2 / rhsNorm2);
+  tol_error = numext::sqrt(residualNorm2 / rhsNorm2);
   iters = i;
 }
 
 }
 
-template< typename _MatrixType, int _UpLo=Lower,
-          typename _Preconditioner = DiagonalPreconditioner<typename _MatrixType::Scalar> >
+template< typename MatrixType_, int UpLo_=Lower,
+          typename Preconditioner_ = DiagonalPreconditioner<typename MatrixType_::Scalar> >
 class ConjugateGradient;
 
 namespace internal {
 
-template< typename _MatrixType, int _UpLo, typename _Preconditioner>
-struct traits<ConjugateGradient<_MatrixType,_UpLo,_Preconditioner> >
+template< typename MatrixType_, int UpLo_, typename Preconditioner_>
+struct traits<ConjugateGradient<MatrixType_,UpLo_,Preconditioner_> >
 {
-  typedef _MatrixType MatrixType;
-  typedef _Preconditioner Preconditioner;
+  typedef MatrixType_ MatrixType;
+  typedef Preconditioner_ Preconditioner;
 };
 
 }
@@ -113,11 +113,11 @@ struct traits<ConjugateGradient<_MatrixType,_UpLo,_Preconditioner> >
   * This class allows to solve for A.x = b linear problems using an iterative conjugate gradient algorithm.
   * The matrix A must be selfadjoint. The matrix A and the vectors x and b can be either dense or sparse.
   *
-  * \tparam _MatrixType the type of the matrix A, can be a dense or a sparse matrix.
-  * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower,
+  * \tparam MatrixType_ the type of the matrix A, can be a dense or a sparse matrix.
+  * \tparam UpLo_ the triangular part that will be used for the computations. It can be Lower,
   *               \c Upper, or \c Lower|Upper in which the full matrix entries will be considered.
   *               Default is \c Lower, best performance is \c Lower|Upper.
-  * \tparam _Preconditioner the type of the preconditioner. Default is DiagonalPreconditioner
+  * \tparam Preconditioner_ the type of the preconditioner. Default is DiagonalPreconditioner
   *
   * \implsparsesolverconcept
   *
@@ -127,8 +127,8 @@ struct traits<ConjugateGradient<_MatrixType,_UpLo,_Preconditioner> >
   * 
   * The tolerance corresponds to the relative residual error: |Ax-b|/|b|
   * 
-  * \b Performance: Even though the default value of \c _UpLo is \c Lower, significantly higher performance is
-  * achieved when using a complete matrix and \b Lower|Upper as the \a _UpLo template parameter. Moreover, in this
+  * \b Performance: Even though the default value of \c UpLo_ is \c Lower, significantly higher performance is
+  * achieved when using a complete matrix and \b Lower|Upper as the \a UpLo_ template parameter. Moreover, in this
   * case multi-threading can be exploited if the user code is compiled with OpenMP enabled.
   * See \ref TopicMultiThreading for details.
   * 
@@ -154,8 +154,8 @@ struct traits<ConjugateGradient<_MatrixType,_UpLo,_Preconditioner> >
   *
   * \sa class LeastSquaresConjugateGradient, class SimplicialCholesky, DiagonalPreconditioner, IdentityPreconditioner
   */
-template< typename _MatrixType, int _UpLo, typename _Preconditioner>
-class ConjugateGradient : public IterativeSolverBase<ConjugateGradient<_MatrixType,_UpLo,_Preconditioner> >
+template< typename MatrixType_, int UpLo_, typename Preconditioner_>
+class ConjugateGradient : public IterativeSolverBase<ConjugateGradient<MatrixType_,UpLo_,Preconditioner_> >
 {
   typedef IterativeSolverBase<ConjugateGradient> Base;
   using Base::matrix;
@@ -164,13 +164,13 @@ class ConjugateGradient : public IterativeSolverBase<ConjugateGradient<_MatrixTy
   using Base::m_info;
   using Base::m_isInitialized;
 public:
-  typedef _MatrixType MatrixType;
+  typedef MatrixType_ MatrixType;
   typedef typename MatrixType::Scalar Scalar;
   typedef typename MatrixType::RealScalar RealScalar;
-  typedef _Preconditioner Preconditioner;
+  typedef Preconditioner_ Preconditioner;
 
   enum {
-    UpLo = _UpLo
+    UpLo = UpLo_
   };
 
 public:
@@ -205,12 +205,12 @@ public:
                       &&  (!MatrixType::IsRowMajor)
                       &&  (!NumTraits<Scalar>::IsComplex)
     };
-    typedef typename internal::conditional<TransposeInput,Transpose<const ActualMatrixType>, ActualMatrixType const&>::type RowMajorWrapper;
-    EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(MatrixWrapper::MatrixFree,UpLo==(Lower|Upper)),MATRIX_FREE_CONJUGATE_GRADIENT_IS_COMPATIBLE_WITH_UPPER_UNION_LOWER_MODE_ONLY);
-    typedef typename internal::conditional<UpLo==(Lower|Upper),
-                                           RowMajorWrapper,
-                                           typename MatrixWrapper::template ConstSelfAdjointViewReturnType<UpLo>::Type
-                                          >::type SelfAdjointWrapper;
+    typedef std::conditional_t<TransposeInput,Transpose<const ActualMatrixType>, ActualMatrixType const&> RowMajorWrapper;
+    EIGEN_STATIC_ASSERT(internal::check_implication(MatrixWrapper::MatrixFree,UpLo==(Lower|Upper)),MATRIX_FREE_CONJUGATE_GRADIENT_IS_COMPATIBLE_WITH_UPPER_UNION_LOWER_MODE_ONLY);
+    typedef std::conditional_t<UpLo==(Lower|Upper),
+                                    RowMajorWrapper,
+                                    typename MatrixWrapper::template ConstSelfAdjointViewReturnType<UpLo>::Type
+                                   > SelfAdjointWrapper;
 
     m_iterations = Base::maxIterations();
     m_error = Base::m_tolerance;
diff --git a/libs/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h b/libs/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h
index 7803fd8..e697f32 100644
--- a/libs/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h
+++ b/libs/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h
@@ -14,6 +14,8 @@
 #include <vector>
 #include <list>
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 /**
   * \brief Modified Incomplete Cholesky with dual threshold
@@ -22,9 +24,9 @@ namespace Eigen {
   *              Limited memory, SIAM J. Sci. Comput.  21(1), pp. 24-45, 1999
   *
   * \tparam Scalar the scalar type of the input matrices
-  * \tparam _UpLo The triangular part that will be used for the computations. It can be Lower
+  * \tparam UpLo_ The triangular part that will be used for the computations. It can be Lower
     *               or Upper. Default is Lower.
-  * \tparam _OrderingType The ordering method to use, either AMDOrdering<> or NaturalOrdering<>. Default is AMDOrdering<int>,
+  * \tparam OrderingType_ The ordering method to use, either AMDOrdering<> or NaturalOrdering<>. Default is AMDOrdering<int>,
   *                       unless EIGEN_MPL2_ONLY is defined, in which case the default is NaturalOrdering<int>.
   *
   * \implsparsesolverconcept
@@ -41,15 +43,15 @@ namespace Eigen {
   * the info() method, then you can either increase the initial shift, or better use another preconditioning technique.
   *
   */
-template <typename Scalar, int _UpLo = Lower, typename _OrderingType = AMDOrdering<int> >
-class IncompleteCholesky : public SparseSolverBase<IncompleteCholesky<Scalar,_UpLo,_OrderingType> >
+template <typename Scalar, int UpLo_ = Lower, typename OrderingType_ = AMDOrdering<int> >
+class IncompleteCholesky : public SparseSolverBase<IncompleteCholesky<Scalar,UpLo_,OrderingType_> >
 {
   protected:
-    typedef SparseSolverBase<IncompleteCholesky<Scalar,_UpLo,_OrderingType> > Base;
+    typedef SparseSolverBase<IncompleteCholesky<Scalar,UpLo_,OrderingType_> > Base;
     using Base::m_isInitialized;
   public:
     typedef typename NumTraits<Scalar>::Real RealScalar;
-    typedef _OrderingType OrderingType;
+    typedef OrderingType_ OrderingType;
     typedef typename OrderingType::PermutationType PermutationType;
     typedef typename PermutationType::StorageIndex StorageIndex;
     typedef SparseMatrix<Scalar,ColMajor,StorageIndex> FactorType;
@@ -57,7 +59,7 @@ class IncompleteCholesky : public SparseSolverBase<IncompleteCholesky<Scalar,_Up
     typedef Matrix<RealScalar,Dynamic,1> VectorRx;
     typedef Matrix<StorageIndex,Dynamic, 1> VectorIx;
     typedef std::vector<std::list<StorageIndex> > VectorList;
-    enum { UpLo = _UpLo };
+    enum { UpLo = UpLo_ };
     enum {
       ColsAtCompileTime = Dynamic,
       MaxColsAtCompileTime = Dynamic
@@ -160,13 +162,13 @@ class IncompleteCholesky : public SparseSolverBase<IncompleteCholesky<Scalar,_Up
     }
 
     /** \returns the sparse lower triangular factor L */
-    const FactorType& matrixL() const { eigen_assert("m_factorizationIsOk"); return m_L; }
+    const FactorType& matrixL() const { eigen_assert(m_factorizationIsOk && "factorize() should be called first"); return m_L; }
 
     /** \returns a vector representing the scaling factor S */
-    const VectorRx& scalingS() const { eigen_assert("m_factorizationIsOk"); return m_scale; }
+    const VectorRx& scalingS() const { eigen_assert(m_factorizationIsOk && "factorize() should be called first"); return m_scale; }
 
     /** \returns the fill-in reducing permutation P (can be empty for a natural ordering) */
-    const PermutationType& permutationP() const { eigen_assert("m_analysisIsOk"); return m_perm; }
+    const PermutationType& permutationP() const { eigen_assert(m_analysisIsOk && "analyzePattern() should be called first"); return m_perm; }
 
   protected:
     FactorType m_L;              // The lower part stored in CSC
@@ -185,9 +187,9 @@ class IncompleteCholesky : public SparseSolverBase<IncompleteCholesky<Scalar,_Up
 //   C-J. Lin and J. J. Moré, Incomplete Cholesky Factorizations with
 //   Limited memory, SIAM J. Sci. Comput.  21(1), pp. 24-45, 1999
 //   http://ftp.mcs.anl.gov/pub/tech_reports/reports/P682.pdf
-template<typename Scalar, int _UpLo, typename OrderingType>
-template<typename _MatrixType>
-void IncompleteCholesky<Scalar,_UpLo, OrderingType>::factorize(const _MatrixType& mat)
+template<typename Scalar, int UpLo_, typename OrderingType>
+template<typename MatrixType_>
+void IncompleteCholesky<Scalar,UpLo_, OrderingType>::factorize(const MatrixType_& mat)
 {
   using std::sqrt;
   eigen_assert(m_analysisIsOk && "analyzePattern() should be called first");
@@ -199,12 +201,12 @@ void IncompleteCholesky<Scalar,_UpLo, OrderingType>::factorize(const _MatrixType
   {
     // The temporary is needed to make sure that the diagonal entry is properly sorted
     FactorType tmp(mat.rows(), mat.cols());
-    tmp = mat.template selfadjointView<_UpLo>().twistedBy(m_perm);
+    tmp = mat.template selfadjointView<UpLo_>().twistedBy(m_perm);
     m_L.template selfadjointView<Lower>() = tmp.template selfadjointView<Lower>();
   }
   else
   {
-    m_L.template selfadjointView<Lower>() = mat.template selfadjointView<_UpLo>();
+    m_L.template selfadjointView<Lower>() = mat.template selfadjointView<UpLo_>();
   }
 
   Index n = m_L.cols();
@@ -369,8 +371,8 @@ void IncompleteCholesky<Scalar,_UpLo, OrderingType>::factorize(const _MatrixType
   } while(m_info!=Success);
 }
 
-template<typename Scalar, int _UpLo, typename OrderingType>
-inline void IncompleteCholesky<Scalar,_UpLo, OrderingType>::updateList(Ref<const VectorIx> colPtr, Ref<VectorIx> rowIdx, Ref<VectorSx> vals, const Index& col, const Index& jk, VectorIx& firstElt, VectorList& listCol)
+template<typename Scalar, int UpLo_, typename OrderingType>
+inline void IncompleteCholesky<Scalar,UpLo_, OrderingType>::updateList(Ref<const VectorIx> colPtr, Ref<VectorIx> rowIdx, Ref<VectorSx> vals, const Index& col, const Index& jk, VectorIx& firstElt, VectorList& listCol)
 {
   if (jk < colPtr(col+1) )
   {
diff --git a/libs/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h b/libs/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h
index cdcf709..44f25fc 100644
--- a/libs/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h
+++ b/libs/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h
@@ -12,6 +12,8 @@
 #define EIGEN_INCOMPLETE_LUT_H
 
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
@@ -95,15 +97,15 @@ Index QuickSplit(VectorV &row, VectorI &ind, Index ncut)
   * alternatively, on GMANE:
   *   http://comments.gmane.org/gmane.comp.lib.eigen/3302
   */
-template <typename _Scalar, typename _StorageIndex = int>
-class IncompleteLUT : public SparseSolverBase<IncompleteLUT<_Scalar, _StorageIndex> >
+template <typename Scalar_, typename StorageIndex_ = int>
+class IncompleteLUT : public SparseSolverBase<IncompleteLUT<Scalar_, StorageIndex_> >
 {
   protected:
     typedef SparseSolverBase<IncompleteLUT> Base;
     using Base::m_isInitialized;
   public:
-    typedef _Scalar Scalar;
-    typedef _StorageIndex StorageIndex;
+    typedef Scalar_ Scalar;
+    typedef StorageIndex_ StorageIndex;
     typedef typename NumTraits<Scalar>::Real RealScalar;
     typedef Matrix<Scalar,Dynamic,1> Vector;
     typedef Matrix<StorageIndex,Dynamic,1> VectorI;
@@ -219,8 +221,8 @@ void IncompleteLUT<Scalar,StorageIndex>::setFillfactor(int fillfactor)
 }
 
 template <typename Scalar, typename StorageIndex>
-template<typename _MatrixType>
-void IncompleteLUT<Scalar,StorageIndex>::analyzePattern(const _MatrixType& amat)
+template<typename MatrixType_>
+void IncompleteLUT<Scalar,StorageIndex>::analyzePattern(const MatrixType_& amat)
 {
   // Compute the Fill-reducing permutation
   // Since ILUT does not perform any numerical pivoting,
@@ -240,8 +242,8 @@ void IncompleteLUT<Scalar,StorageIndex>::analyzePattern(const _MatrixType& amat)
 }
 
 template <typename Scalar, typename StorageIndex>
-template<typename _MatrixType>
-void IncompleteLUT<Scalar,StorageIndex>::factorize(const _MatrixType& amat)
+template<typename MatrixType_>
+void IncompleteLUT<Scalar,StorageIndex>::factorize(const MatrixType_& amat)
 {
   using std::sqrt;
   using std::swap;
diff --git a/libs/eigen/Eigen/src/IterativeLinearSolvers/InternalHeaderCheck.h b/libs/eigen/Eigen/src/IterativeLinearSolvers/InternalHeaderCheck.h
new file mode 100644
index 0000000..b657e84
--- /dev/null
+++ b/libs/eigen/Eigen/src/IterativeLinearSolvers/InternalHeaderCheck.h
@@ -0,0 +1,3 @@
+#ifndef EIGEN_ITERATIVELINEARSOLVERS_MODULE_H
+#error "Please include Eigen/IterativeLinearSolvers instead of including headers inside the src directory directly."
+#endif
diff --git a/libs/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h b/libs/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h
index 28a0c51..49829d0 100644
--- a/libs/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h
+++ b/libs/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_ITERATIVE_SOLVER_BASE_H
 #define EIGEN_ITERATIVE_SOLVER_BASE_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
@@ -40,7 +42,7 @@ public:
 template<typename MatrixType>
 struct is_ref_compatible
 {
-  enum { value = is_ref_compatible_impl<typename remove_all<MatrixType>::type>::value };
+  enum { value = is_ref_compatible_impl<remove_all_t<MatrixType>>::value };
 };
 
 template<typename MatrixType, bool MatrixFree = !internal::is_ref_compatible<MatrixType>::value>
@@ -77,16 +79,16 @@ public:
   template<typename MatrixDerived>
   void grab(const EigenBase<MatrixDerived> &mat)
   {
-    m_matrix.~Ref<const MatrixType>();
-    ::new (&m_matrix) Ref<const MatrixType>(mat.derived());
+    internal::destroy_at(&m_matrix);
+    internal::construct_at(&m_matrix, mat.derived());
   }
 
   void grab(const Ref<const MatrixType> &mat)
   {
     if(&(mat.derived()) != &m_matrix)
     {
-      m_matrix.~Ref<const MatrixType>();
-      ::new (&m_matrix) Ref<const MatrixType>(mat);
+      internal::destroy_at(&m_matrix);
+      internal::construct_at(&m_matrix, mat);
     }
   }
 
@@ -186,6 +188,9 @@ public:
     compute(matrix());
   }
 
+
+  IterativeSolverBase(IterativeSolverBase&&) = default;
+
   ~IterativeSolverBase() {}
 
   /** Initializes the iterative solver for the sparsity pattern of the matrix \a A for further solving \c Ax=b problems.
@@ -295,7 +300,7 @@ public:
   /** \returns the number of iterations performed during the last solve */
   Index iterations() const
   {
-    eigen_assert(m_isInitialized && "ConjugateGradient is not initialized.");
+    eigen_assert(m_isInitialized && "IterativeSolverBase is not initialized.");
     return m_iterations;
   }
 
@@ -304,7 +309,7 @@ public:
     */
   RealScalar error() const
   {
-    eigen_assert(m_isInitialized && "ConjugateGradient is not initialized.");
+    eigen_assert(m_isInitialized && "IterativeSolverBase is not initialized.");
     return m_error;
   }
 
@@ -364,7 +369,7 @@ public:
   }
 
   template<typename Rhs, typename DestDerived>
-  typename internal::enable_if<Rhs::ColsAtCompileTime!=1 && DestDerived::ColsAtCompileTime!=1>::type
+  std::enable_if_t<Rhs::ColsAtCompileTime!=1 && DestDerived::ColsAtCompileTime!=1>
   _solve_with_guess_impl(const Rhs& b, MatrixBase<DestDerived> &aDest) const
   {
     eigen_assert(rows()==b.rows());
@@ -389,7 +394,7 @@ public:
   }
 
   template<typename Rhs, typename DestDerived>
-  typename internal::enable_if<Rhs::ColsAtCompileTime==1 || DestDerived::ColsAtCompileTime==1>::type
+  std::enable_if_t<Rhs::ColsAtCompileTime==1 || DestDerived::ColsAtCompileTime==1>
   _solve_with_guess_impl(const Rhs& b, MatrixBase<DestDerived> &dest) const
   {
     derived()._solve_vector_with_guess_impl(b,dest.derived());
diff --git a/libs/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h b/libs/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h
index 203fd0e..a76f3f8 100644
--- a/libs/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h
+++ b/libs/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_LEAST_SQUARE_CONJUGATE_GRADIENT_H
 #define EIGEN_LEAST_SQUARE_CONJUGATE_GRADIENT_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
@@ -73,7 +75,7 @@ void least_square_conjugate_gradient(const MatrixType& mat, const Rhs& rhs, Dest
     Scalar alpha = absNew / tmp.squaredNorm();      // the amount we travel on dir
     x += alpha * p;                                 // update solution
     residual -= alpha * tmp;                        // update residual
-    normal_residual = mat.adjoint() * residual;     // update residual of the normal equation
+    normal_residual.noalias() = mat.adjoint() * residual;     // update residual of the normal equation
     
     residualNorm2 = normal_residual.squaredNorm();
     if(residualNorm2 < threshold)
@@ -93,17 +95,17 @@ void least_square_conjugate_gradient(const MatrixType& mat, const Rhs& rhs, Dest
 
 }
 
-template< typename _MatrixType,
-          typename _Preconditioner = LeastSquareDiagonalPreconditioner<typename _MatrixType::Scalar> >
+template< typename MatrixType_,
+          typename Preconditioner_ = LeastSquareDiagonalPreconditioner<typename MatrixType_::Scalar> >
 class LeastSquaresConjugateGradient;
 
 namespace internal {
 
-template< typename _MatrixType, typename _Preconditioner>
-struct traits<LeastSquaresConjugateGradient<_MatrixType,_Preconditioner> >
+template< typename MatrixType_, typename Preconditioner_>
+struct traits<LeastSquaresConjugateGradient<MatrixType_,Preconditioner_> >
 {
-  typedef _MatrixType MatrixType;
-  typedef _Preconditioner Preconditioner;
+  typedef MatrixType_ MatrixType;
+  typedef Preconditioner_ Preconditioner;
 };
 
 }
@@ -111,13 +113,13 @@ struct traits<LeastSquaresConjugateGradient<_MatrixType,_Preconditioner> >
 /** \ingroup IterativeLinearSolvers_Module
   * \brief A conjugate gradient solver for sparse (or dense) least-square problems
   *
-  * This class allows to solve for A x = b linear problems using an iterative conjugate gradient algorithm.
+  * This class solves for the least-squares solution to A x = b using an iterative conjugate gradient algorithm.
   * The matrix A can be non symmetric and rectangular, but the matrix A' A should be positive-definite to guaranty stability.
   * Otherwise, the SparseLU or SparseQR classes might be preferable.
   * The matrix A and the vectors x and b can be either dense or sparse.
   *
-  * \tparam _MatrixType the type of the matrix A, can be a dense or a sparse matrix.
-  * \tparam _Preconditioner the type of the preconditioner. Default is LeastSquareDiagonalPreconditioner
+  * \tparam MatrixType_ the type of the matrix A, can be a dense or a sparse matrix.
+  * \tparam Preconditioner_ the type of the preconditioner. Default is LeastSquareDiagonalPreconditioner
   *
   * \implsparsesolverconcept
   * 
@@ -145,8 +147,8 @@ struct traits<LeastSquaresConjugateGradient<_MatrixType,_Preconditioner> >
   * 
   * \sa class ConjugateGradient, SparseLU, SparseQR
   */
-template< typename _MatrixType, typename _Preconditioner>
-class LeastSquaresConjugateGradient : public IterativeSolverBase<LeastSquaresConjugateGradient<_MatrixType,_Preconditioner> >
+template< typename MatrixType_, typename Preconditioner_>
+class LeastSquaresConjugateGradient : public IterativeSolverBase<LeastSquaresConjugateGradient<MatrixType_,Preconditioner_> >
 {
   typedef IterativeSolverBase<LeastSquaresConjugateGradient> Base;
   using Base::matrix;
@@ -155,10 +157,10 @@ class LeastSquaresConjugateGradient : public IterativeSolverBase<LeastSquaresCon
   using Base::m_info;
   using Base::m_isInitialized;
 public:
-  typedef _MatrixType MatrixType;
+  typedef MatrixType_ MatrixType;
   typedef typename MatrixType::Scalar Scalar;
   typedef typename MatrixType::RealScalar RealScalar;
-  typedef _Preconditioner Preconditioner;
+  typedef Preconditioner_ Preconditioner;
 
 public:
 
diff --git a/libs/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h b/libs/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h
index 7b89657..bb56db3 100644
--- a/libs/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h
+++ b/libs/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_SOLVEWITHGUESS_H
 #define EIGEN_SOLVEWITHGUESS_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 template<typename Decomposition, typename RhsType, typename GuessType> class SolveWithGuess;
@@ -83,7 +85,7 @@ struct evaluator<SolveWithGuess<Decomposition,RhsType, GuessType> >
   evaluator(const SolveType& solve)
     : m_result(solve.rows(), solve.cols())
   {
-    ::new (static_cast<Base*>(this)) Base(m_result);
+    internal::construct_at<Base>(this, m_result);
     m_result = solve.guess();
     solve.dec()._solve_with_guess_impl(solve.rhs(), m_result);
   }
diff --git a/libs/eigen/Eigen/src/Jacobi/InternalHeaderCheck.h b/libs/eigen/Eigen/src/Jacobi/InternalHeaderCheck.h
new file mode 100644
index 0000000..b17b1f2
--- /dev/null
+++ b/libs/eigen/Eigen/src/Jacobi/InternalHeaderCheck.h
@@ -0,0 +1,3 @@
+#ifndef EIGEN_JACOBI_MODULE_H
+#error "Please include Eigen/Jacobi instead of including headers inside the src directory directly."
+#endif
diff --git a/libs/eigen/Eigen/src/Jacobi/Jacobi.h b/libs/eigen/Eigen/src/Jacobi/Jacobi.h
index 76668a5..5d96989 100644
--- a/libs/eigen/Eigen/src/Jacobi/Jacobi.h
+++ b/libs/eigen/Eigen/src/Jacobi/Jacobi.h
@@ -11,6 +11,8 @@
 #ifndef EIGEN_JACOBI_H
 #define EIGEN_JACOBI_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /** \ingroup Jacobi_Module
@@ -161,7 +163,7 @@ template<typename Scalar>
 EIGEN_DEVICE_FUNC
 void JacobiRotation<Scalar>::makeGivens(const Scalar& p, const Scalar& q, Scalar* r)
 {
-  makeGivens(p, q, r, typename internal::conditional<NumTraits<Scalar>::IsComplex, internal::true_type, internal::false_type>::type());
+  makeGivens(p, q, r, std::conditional_t<NumTraits<Scalar>::IsComplex, internal::true_type, internal::false_type>());
 }
 
 
@@ -232,13 +234,13 @@ void JacobiRotation<Scalar>::makeGivens(const Scalar& p, const Scalar& q, Scalar
 {
   using std::sqrt;
   using std::abs;
-  if(q==Scalar(0))
+  if(numext::is_exactly_zero(q))
   {
     m_c = p<Scalar(0) ? Scalar(-1) : Scalar(1);
     m_s = Scalar(0);
     if(r) *r = abs(p);
   }
-  else if(p==Scalar(0))
+  else if(numext::is_exactly_zero(p))
   {
     m_c = Scalar(0);
     m_s = q<Scalar(0) ? Scalar(1) : Scalar(-1);
@@ -342,15 +344,18 @@ struct apply_rotation_in_the_plane_selector<Scalar,OtherScalar,SizeAtCompileTime
 {
   static inline void run(Scalar *x, Index incrx, Scalar *y, Index incry, Index size, OtherScalar c, OtherScalar s)
   {
-    enum {
-      PacketSize = packet_traits<Scalar>::size,
-      OtherPacketSize = packet_traits<OtherScalar>::size
-    };
     typedef typename packet_traits<Scalar>::type Packet;
     typedef typename packet_traits<OtherScalar>::type OtherPacket;
 
+    enum {
+      RequiredAlignment = plain_enum_max(unpacket_traits<Packet>::alignment,
+                                         unpacket_traits<OtherPacket>::alignment),
+      PacketSize = packet_traits<Scalar>::size,
+      OtherPacketSize = packet_traits<OtherScalar>::size
+    };
+
     /*** dynamic-size vectorized paths ***/
-    if(SizeAtCompileTime == Dynamic && ((incrx==1 && incry==1) || PacketSize == 1))
+    if(size >= 2 * PacketSize && SizeAtCompileTime == Dynamic && ((incrx == 1 && incry == 1) || PacketSize == 1))
     {
       // both vectors are sequentially stored in memory => vectorization
       enum { Peeling = 2 };
@@ -421,11 +426,11 @@ struct apply_rotation_in_the_plane_selector<Scalar,OtherScalar,SizeAtCompileTime
     }
 
     /*** fixed-size vectorized path ***/
-    else if(SizeAtCompileTime != Dynamic && MinAlignment>0) // FIXME should be compared to the required alignment
+    else if(SizeAtCompileTime != Dynamic && MinAlignment >= RequiredAlignment)
     {
       const OtherPacket pc = pset1<OtherPacket>(c);
       const OtherPacket ps = pset1<OtherPacket>(s);
-      conj_helper<OtherPacket,Packet,NumTraits<OtherPacket>::IsComplex,false> pcj;
+      conj_helper<OtherPacket,Packet,NumTraits<OtherScalar>::IsComplex,false> pcj;
       conj_helper<OtherPacket,Packet,false,false> pm;
       Scalar* EIGEN_RESTRICT px = x;
       Scalar* EIGEN_RESTRICT py = y;
@@ -450,11 +455,11 @@ struct apply_rotation_in_the_plane_selector<Scalar,OtherScalar,SizeAtCompileTime
 
 template<typename VectorX, typename VectorY, typename OtherScalar>
 EIGEN_DEVICE_FUNC
-void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x, DenseBase<VectorY>& xpr_y, const JacobiRotation<OtherScalar>& j)
+void inline apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x, DenseBase<VectorY>& xpr_y, const JacobiRotation<OtherScalar>& j)
 {
   typedef typename VectorX::Scalar Scalar;
-  const bool Vectorizable =    (int(VectorX::Flags) & int(VectorY::Flags) & PacketAccessBit)
-                            && (int(packet_traits<Scalar>::size) == int(packet_traits<OtherScalar>::size));
+  constexpr bool Vectorizable = (int(evaluator<VectorX>::Flags) & int(evaluator<VectorY>::Flags) & PacketAccessBit) &&
+                                (int(packet_traits<Scalar>::size) == int(packet_traits<OtherScalar>::size));
 
   eigen_assert(xpr_x.size() == xpr_y.size());
   Index size = xpr_x.size();
@@ -466,13 +471,13 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x
 
   OtherScalar c = j.c();
   OtherScalar s = j.s();
-  if (c==OtherScalar(1) && s==OtherScalar(0))
+  if (numext::is_exactly_one(c) && numext::is_exactly_zero(s))
     return;
 
   apply_rotation_in_the_plane_selector<
     Scalar,OtherScalar,
     VectorX::SizeAtCompileTime,
-    EIGEN_PLAIN_ENUM_MIN(evaluator<VectorX>::Alignment, evaluator<VectorY>::Alignment),
+    plain_enum_min(evaluator<VectorX>::Alignment, evaluator<VectorY>::Alignment),
     Vectorizable>::run(x,incrx,y,incry,size,c,s);
 }
 
diff --git a/libs/eigen/Eigen/src/KLUSupport/InternalHeaderCheck.h b/libs/eigen/Eigen/src/KLUSupport/InternalHeaderCheck.h
new file mode 100644
index 0000000..eb1d671
--- /dev/null
+++ b/libs/eigen/Eigen/src/KLUSupport/InternalHeaderCheck.h
@@ -0,0 +1,3 @@
+#ifndef EIGEN_KLUSUPPORT_MODULE_H
+#error "Please include Eigen/KLUSupport instead of including headers inside the src directory directly."
+#endif
diff --git a/libs/eigen/Eigen/src/KLUSupport/KLUSupport.h b/libs/eigen/Eigen/src/KLUSupport/KLUSupport.h
index 215db35..bfe2f66 100644
--- a/libs/eigen/Eigen/src/KLUSupport/KLUSupport.h
+++ b/libs/eigen/Eigen/src/KLUSupport/KLUSupport.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_KLUSUPPORT_H
 #define EIGEN_KLUSUPPORT_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /* TODO extract L, extract U, compute det, etc... */
@@ -23,7 +25,7 @@ namespace Eigen {
   *
   * \warning The input matrix A should be in a \b compressed and \b column-major form.
   * Otherwise an expensive copy will be made. You can call the inexpensive makeCompressed() to get a compressed matrix.
-  * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>
+  * \tparam MatrixType_ the type of the sparse matrix A, it must be a SparseMatrix<>
   *
   * \implsparsesolverconcept
   *
@@ -56,15 +58,15 @@ inline klu_numeric* klu_factor(int Ap[], int Ai[], std::complex<double> Ax[], kl
 }
 
 
-template<typename _MatrixType>
-class KLU : public SparseSolverBase<KLU<_MatrixType> >
+template<typename MatrixType_>
+class KLU : public SparseSolverBase<KLU<MatrixType_> >
 {
   protected:
-    typedef SparseSolverBase<KLU<_MatrixType> > Base;
+    typedef SparseSolverBase<KLU<MatrixType_> > Base;
     using Base::m_isInitialized;
   public:
     using Base::_solve_impl;
-    typedef _MatrixType MatrixType;
+    typedef MatrixType_ MatrixType;
     typedef typename MatrixType::Scalar Scalar;
     typedef typename MatrixType::RealScalar RealScalar;
     typedef typename MatrixType::StorageIndex StorageIndex;
@@ -263,16 +265,16 @@ class KLU : public SparseSolverBase<KLU<_MatrixType> >
     template<typename MatrixDerived>
     void grab(const EigenBase<MatrixDerived> &A)
     {
-      mp_matrix.~KLUMatrixRef();
-      ::new (&mp_matrix) KLUMatrixRef(A.derived());
+      internal::destroy_at(&mp_matrix);
+      internal::construct_at(&mp_matrix, A.derived());
     }
 
     void grab(const KLUMatrixRef &A)
     {
       if(&(A.derived()) != &mp_matrix)
       {
-        mp_matrix.~KLUMatrixRef();
-        ::new (&mp_matrix) KLUMatrixRef(A);
+        internal::destroy_at(&mp_matrix);
+        internal::construct_at(&mp_matrix, A);
       }
     }
 
diff --git a/libs/eigen/Eigen/src/LU/Determinant.h b/libs/eigen/Eigen/src/LU/Determinant.h
index 3a41e6f..80e695d 100644
--- a/libs/eigen/Eigen/src/LU/Determinant.h
+++ b/libs/eigen/Eigen/src/LU/Determinant.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_DETERMINANT_H
 #define EIGEN_DETERMINANT_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
@@ -80,8 +82,8 @@ template<typename Derived> struct determinant_impl<Derived, 4>
     Scalar d3_1 = det3(m, 0,d2_23, 2,d2_03, 3,d2_02);
     Scalar d3_2 = det3(m, 0,d2_13, 1,d2_03, 3,d2_01);
     Scalar d3_3 = det3(m, 0,d2_12, 1,d2_02, 2,d2_01);
-    return internal::pmadd(-m(0,3),d3_0, m(1,3)*d3_1) +
-           internal::pmadd(-m(2,3),d3_2, m(3,3)*d3_3);
+    return internal::pmadd(static_cast<Scalar>(-m(0,3)),d3_0, static_cast<Scalar>(m(1,3)*d3_1)) +
+           internal::pmadd(static_cast<Scalar>(-m(2,3)),d3_2, static_cast<Scalar>(m(3,3)*d3_3));
   }
 protected:
   static EIGEN_DEVICE_FUNC
@@ -93,7 +95,7 @@ protected:
   static EIGEN_DEVICE_FUNC
   Scalar det3(const Derived& m, Index i0, const Scalar& d0, Index i1, const Scalar& d1, Index i2, const Scalar& d2)
   {
-    return internal::pmadd(m(i0,2), d0, internal::pmadd(-m(i1,2), d1, m(i2,2)*d2));
+    return internal::pmadd(m(i0,2), d0, internal::pmadd(static_cast<Scalar>(-m(i1,2)), d1, static_cast<Scalar>(m(i2,2)*d2)));
   }
 };
 
@@ -109,7 +111,7 @@ inline typename internal::traits<Derived>::Scalar MatrixBase<Derived>::determina
 {
   eigen_assert(rows() == cols());
   typedef typename internal::nested_eval<Derived,Base::RowsAtCompileTime>::type Nested;
-  return internal::determinant_impl<typename internal::remove_all<Nested>::type>::run(derived());
+  return internal::determinant_impl<internal::remove_all_t<Nested>>::run(derived());
 }
 
 } // end namespace Eigen
diff --git a/libs/eigen/Eigen/src/LU/FullPivLU.h b/libs/eigen/Eigen/src/LU/FullPivLU.h
index ba1749f..259b549 100644
--- a/libs/eigen/Eigen/src/LU/FullPivLU.h
+++ b/libs/eigen/Eigen/src/LU/FullPivLU.h
@@ -10,11 +10,13 @@
 #ifndef EIGEN_LU_H
 #define EIGEN_LU_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
-template<typename _MatrixType> struct traits<FullPivLU<_MatrixType> >
- : traits<_MatrixType>
+template<typename MatrixType_> struct traits<FullPivLU<MatrixType_> >
+ : traits<MatrixType_>
 {
   typedef MatrixXpr XprKind;
   typedef SolverStorage StorageKind;
@@ -30,7 +32,7 @@ template<typename _MatrixType> struct traits<FullPivLU<_MatrixType> >
   *
   * \brief LU decomposition of a matrix with complete pivoting, and related features
   *
-  * \tparam _MatrixType the type of the matrix of which we are computing the LU decomposition
+  * \tparam MatrixType_ the type of the matrix of which we are computing the LU decomposition
   *
   * This class represents a LU decomposition of any matrix, with complete pivoting: the matrix A is
   * decomposed as \f$ A = P^{-1} L U Q^{-1} \f$ where L is unit-lower-triangular, U is
@@ -57,11 +59,11 @@ template<typename _MatrixType> struct traits<FullPivLU<_MatrixType> >
   *
   * \sa MatrixBase::fullPivLu(), MatrixBase::determinant(), MatrixBase::inverse()
   */
-template<typename _MatrixType> class FullPivLU
-  : public SolverBase<FullPivLU<_MatrixType> >
+template<typename MatrixType_> class FullPivLU
+  : public SolverBase<FullPivLU<MatrixType_> >
 {
   public:
-    typedef _MatrixType MatrixType;
+    typedef MatrixType_ MatrixType;
     typedef SolverBase<FullPivLU> Base;
     friend class SolverBase<FullPivLU>;
 
@@ -419,10 +421,7 @@ template<typename _MatrixType> class FullPivLU
 
   protected:
 
-    static void check_template_parameters()
-    {
-      EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
-    }
+    EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
 
     void computeInPlace();
 
@@ -487,8 +486,6 @@ FullPivLU<MatrixType>::FullPivLU(EigenBase<InputType>& matrix)
 template<typename MatrixType>
 void FullPivLU<MatrixType>::computeInPlace()
 {
-  check_template_parameters();
-
   // the permutations are stored as int indices, so just to be sure:
   eigen_assert(m_lu.rows()<=NumTraits<int>::highest() && m_lu.cols()<=NumTraits<int>::highest());
 
@@ -522,7 +519,7 @@ void FullPivLU<MatrixType>::computeInPlace()
     row_of_biggest_in_corner += k; // correct the values! since they were computed in the corner,
     col_of_biggest_in_corner += k; // need to add k to them.
 
-    if(biggest_in_corner==Score(0))
+    if(numext::is_exactly_zero(biggest_in_corner))
     {
       // before exiting, make sure to initialize the still uninitialized transpositions
       // in a sane state without destroying what we already have.
@@ -613,15 +610,15 @@ MatrixType FullPivLU<MatrixType>::reconstructedMatrix() const
 /********* Implementation of kernel() **************************************************/
 
 namespace internal {
-template<typename _MatrixType>
-struct kernel_retval<FullPivLU<_MatrixType> >
-  : kernel_retval_base<FullPivLU<_MatrixType> >
+template<typename MatrixType_>
+struct kernel_retval<FullPivLU<MatrixType_> >
+  : kernel_retval_base<FullPivLU<MatrixType_> >
 {
-  EIGEN_MAKE_KERNEL_HELPERS(FullPivLU<_MatrixType>)
+  EIGEN_MAKE_KERNEL_HELPERS(FullPivLU<MatrixType_>)
 
-  enum { MaxSmallDimAtCompileTime = EIGEN_SIZE_MIN_PREFER_FIXED(
-            MatrixType::MaxColsAtCompileTime,
-            MatrixType::MaxRowsAtCompileTime)
+  enum { MaxSmallDimAtCompileTime = min_size_prefer_fixed(
+              MatrixType::MaxColsAtCompileTime,
+              MatrixType::MaxRowsAtCompileTime)
   };
 
   template<typename Dest> void evalTo(Dest& dst) const
@@ -699,15 +696,15 @@ struct kernel_retval<FullPivLU<_MatrixType> >
 
 /***** Implementation of image() *****************************************************/
 
-template<typename _MatrixType>
-struct image_retval<FullPivLU<_MatrixType> >
-  : image_retval_base<FullPivLU<_MatrixType> >
+template<typename MatrixType_>
+struct image_retval<FullPivLU<MatrixType_> >
+  : image_retval_base<FullPivLU<MatrixType_> >
 {
-  EIGEN_MAKE_IMAGE_HELPERS(FullPivLU<_MatrixType>)
+  EIGEN_MAKE_IMAGE_HELPERS(FullPivLU<MatrixType_>)
 
-  enum { MaxSmallDimAtCompileTime = EIGEN_SIZE_MIN_PREFER_FIXED(
-            MatrixType::MaxColsAtCompileTime,
-            MatrixType::MaxRowsAtCompileTime)
+  enum { MaxSmallDimAtCompileTime = min_size_prefer_fixed(
+              MatrixType::MaxColsAtCompileTime,
+              MatrixType::MaxRowsAtCompileTime)
   };
 
   template<typename Dest> void evalTo(Dest& dst) const
@@ -740,9 +737,9 @@ struct image_retval<FullPivLU<_MatrixType> >
 } // end namespace internal
 
 #ifndef EIGEN_PARSED_BY_DOXYGEN
-template<typename _MatrixType>
+template<typename MatrixType_>
 template<typename RhsType, typename DstType>
-void FullPivLU<_MatrixType>::_solve_impl(const RhsType &rhs, DstType &dst) const
+void FullPivLU<MatrixType_>::_solve_impl(const RhsType &rhs, DstType &dst) const
 {
   /* The decomposition PAQ = LU can be rewritten as A = P^{-1} L U Q^{-1}.
   * So we proceed as follows:
@@ -787,9 +784,9 @@ void FullPivLU<_MatrixType>::_solve_impl(const RhsType &rhs, DstType &dst) const
     dst.row(permutationQ().indices().coeff(i)).setZero();
 }
 
-template<typename _MatrixType>
+template<typename MatrixType_>
 template<bool Conjugate, typename RhsType, typename DstType>
-void FullPivLU<_MatrixType>::_solve_impl_transposed(const RhsType &rhs, DstType &dst) const
+void FullPivLU<MatrixType_>::_solve_impl_transposed(const RhsType &rhs, DstType &dst) const
 {
   /* The decomposition PAQ = LU can be rewritten as A = P^{-1} L U Q^{-1},
    * and since permutations are real and unitary, we can write this
diff --git a/libs/eigen/Eigen/src/LU/InternalHeaderCheck.h b/libs/eigen/Eigen/src/LU/InternalHeaderCheck.h
new file mode 100644
index 0000000..f346b17
--- /dev/null
+++ b/libs/eigen/Eigen/src/LU/InternalHeaderCheck.h
@@ -0,0 +1,3 @@
+#ifndef EIGEN_LU_MODULE_H
+#error "Please include Eigen/LU instead of including headers inside the src directory directly."
+#endif
diff --git a/libs/eigen/Eigen/src/LU/InverseImpl.h b/libs/eigen/Eigen/src/LU/InverseImpl.h
index a40cefa..bcfe703 100644
--- a/libs/eigen/Eigen/src/LU/InverseImpl.h
+++ b/libs/eigen/Eigen/src/LU/InverseImpl.h
@@ -11,6 +11,8 @@
 #ifndef EIGEN_INVERSE_IMPL_H
 #define EIGEN_INVERSE_IMPL_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
@@ -309,13 +311,13 @@ struct Assignment<DstXprType, Inverse<XprType>, internal::assign_op<typename Dst
     if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
       dst.resize(dstRows, dstCols);
     
-    const int Size = EIGEN_PLAIN_ENUM_MIN(XprType::ColsAtCompileTime,DstXprType::ColsAtCompileTime);
+    const int Size = plain_enum_min(XprType::ColsAtCompileTime, DstXprType::ColsAtCompileTime);
     EIGEN_ONLY_USED_FOR_DEBUG(Size);
     eigen_assert(( (Size<=1) || (Size>4) || (extract_data(src.nestedExpression())!=extract_data(dst)))
               && "Aliasing problem detected in inverse(), you need to do inverse().eval() here.");
 
     typedef typename internal::nested_eval<XprType,XprType::ColsAtCompileTime>::type  ActualXprType;
-    typedef typename internal::remove_all<ActualXprType>::type                        ActualXprTypeCleanded;
+    typedef internal::remove_all_t<ActualXprType>                        ActualXprTypeCleanded;
     
     ActualXprType actual_xpr(src.nestedExpression());
     
@@ -385,11 +387,11 @@ inline void MatrixBase<Derived>::computeInverseAndDetWithCheck(
   eigen_assert(rows() == cols());
   // for 2x2, it's worth giving a chance to avoid evaluating.
   // for larger sizes, evaluating has negligible cost and limits code size.
-  typedef typename internal::conditional<
+  typedef std::conditional_t<
     RowsAtCompileTime == 2,
-    typename internal::remove_all<typename internal::nested_eval<Derived, 2>::type>::type,
+    internal::remove_all_t<typename internal::nested_eval<Derived, 2>::type>,
     PlainObject
-  >::type MatrixType;
+  > MatrixType;
   internal::compute_inverse_and_det_with_check<MatrixType, ResultType>::run
     (derived(), absDeterminantThreshold, inverse, determinant, invertible);
 }
diff --git a/libs/eigen/Eigen/src/LU/PartialPivLU.h b/libs/eigen/Eigen/src/LU/PartialPivLU.h
index 34aed72..1377398 100644
--- a/libs/eigen/Eigen/src/LU/PartialPivLU.h
+++ b/libs/eigen/Eigen/src/LU/PartialPivLU.h
@@ -11,16 +11,18 @@
 #ifndef EIGEN_PARTIALLU_H
 #define EIGEN_PARTIALLU_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
-template<typename _MatrixType> struct traits<PartialPivLU<_MatrixType> >
- : traits<_MatrixType>
+template<typename MatrixType_> struct traits<PartialPivLU<MatrixType_> >
+ : traits<MatrixType_>
 {
   typedef MatrixXpr XprKind;
   typedef SolverStorage StorageKind;
   typedef int StorageIndex;
-  typedef traits<_MatrixType> BaseTraits;
+  typedef traits<MatrixType_> BaseTraits;
   enum {
     Flags = BaseTraits::Flags & RowMajorBit,
     CoeffReadCost = Dynamic
@@ -46,7 +48,7 @@ struct enable_if_ref<Ref<T>,Derived> {
   *
   * \brief LU decomposition of a matrix with partial pivoting, and related features
   *
-  * \tparam _MatrixType the type of the matrix of which we are computing the LU decomposition
+  * \tparam MatrixType_ the type of the matrix of which we are computing the LU decomposition
   *
   * This class represents a LU decomposition of a \b square \b invertible matrix, with partial pivoting: the matrix A
   * is decomposed as A = PLU where L is unit-lower-triangular, U is upper-triangular, and P
@@ -73,12 +75,12 @@ struct enable_if_ref<Ref<T>,Derived> {
   *
   * \sa MatrixBase::partialPivLu(), MatrixBase::determinant(), MatrixBase::inverse(), MatrixBase::computeInverse(), class FullPivLU
   */
-template<typename _MatrixType> class PartialPivLU
-  : public SolverBase<PartialPivLU<_MatrixType> >
+template<typename MatrixType_> class PartialPivLU
+  : public SolverBase<PartialPivLU<MatrixType_> >
 {
   public:
 
-    typedef _MatrixType MatrixType;
+    typedef MatrixType_ MatrixType;
     typedef SolverBase<PartialPivLU> Base;
     friend class SolverBase<PartialPivLU>;
 
@@ -265,10 +267,7 @@ template<typename _MatrixType> class PartialPivLU
 
   protected:
 
-    static void check_template_parameters()
-    {
-      EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
-    }
+    EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
 
     void compute();
 
@@ -334,12 +333,12 @@ namespace internal {
 template<typename Scalar, int StorageOrder, typename PivIndex, int SizeAtCompileTime=Dynamic>
 struct partial_lu_impl
 {
-  static const int UnBlockedBound = 16;
-  static const bool UnBlockedAtCompileTime = SizeAtCompileTime!=Dynamic && SizeAtCompileTime<=UnBlockedBound;
-  static const int ActualSizeAtCompileTime = UnBlockedAtCompileTime ? SizeAtCompileTime : Dynamic;
+  static constexpr int UnBlockedBound = 16;
+  static constexpr bool UnBlockedAtCompileTime = SizeAtCompileTime!=Dynamic && SizeAtCompileTime<=UnBlockedBound;
+  static constexpr int ActualSizeAtCompileTime = UnBlockedAtCompileTime ? SizeAtCompileTime : Dynamic;
   // Remaining rows and columns at compile-time:
-  static const int RRows = SizeAtCompileTime==2 ? 1 : Dynamic;
-  static const int RCols = SizeAtCompileTime==2 ? 1 : Dynamic;
+  static constexpr int RRows = SizeAtCompileTime==2 ? 1 : Dynamic;
+  static constexpr int RCols = SizeAtCompileTime==2 ? 1 : Dynamic;
   typedef Matrix<Scalar, ActualSizeAtCompileTime, ActualSizeAtCompileTime, StorageOrder> MatrixType;
   typedef Ref<MatrixType> MatrixTypeRef;
   typedef Ref<Matrix<Scalar, Dynamic, Dynamic, StorageOrder> > BlockType;
@@ -379,7 +378,7 @@ struct partial_lu_impl
 
       row_transpositions[k] = PivIndex(row_of_biggest_in_col);
 
-      if(biggest_in_corner != Score(0))
+      if(!numext::is_exactly_zero(biggest_in_corner))
       {
         if(k != row_of_biggest_in_col)
         {
@@ -405,7 +404,7 @@ struct partial_lu_impl
     {
       Index k = endk;
       row_transpositions[k] = PivIndex(k);
-      if (Scoring()(lu(k, k)) == Score(0) && first_zero_pivot == -1)
+      if (numext::is_exactly_zero(Scoring()(lu(k, k))) && first_zero_pivot == -1)
         first_zero_pivot = k;
     }
 
@@ -515,7 +514,7 @@ void partial_lu_inplace(MatrixType& lu, TranspositionType& row_transpositions, t
   partial_lu_impl
     < typename MatrixType::Scalar, MatrixType::Flags&RowMajorBit?RowMajor:ColMajor,
       typename TranspositionType::StorageIndex,
-      EIGEN_SIZE_MIN_PREFER_FIXED(MatrixType::RowsAtCompileTime,MatrixType::ColsAtCompileTime)>
+      internal::min_size_prefer_fixed(MatrixType::RowsAtCompileTime, MatrixType::ColsAtCompileTime)>
     ::blocked_lu(lu.rows(), lu.cols(), &lu.coeffRef(0,0), lu.outerStride(), &row_transpositions.coeffRef(0), nb_transpositions);
 }
 
@@ -524,8 +523,6 @@ void partial_lu_inplace(MatrixType& lu, TranspositionType& row_transpositions, t
 template<typename MatrixType>
 void PartialPivLU<MatrixType>::compute()
 {
-  check_template_parameters();
-
   // the row permutation is stored as int indices, so just to be sure:
   eigen_assert(m_lu.rows()<NumTraits<int>::highest());
 
diff --git a/libs/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h b/libs/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h
index 755168a..b636442 100644
--- a/libs/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h
+++ b/libs/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h
@@ -33,48 +33,61 @@
 #ifndef EIGEN_PARTIALLU_LAPACK_H
 #define EIGEN_PARTIALLU_LAPACK_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
 
-/** \internal Specialization for the data types supported by LAPACKe */
+namespace lapacke_helpers {
+// -------------------------------------------------------------------------------------------------------------------
+//        Generic lapacke partial lu implementation that converts arguments and dispatches to the function above
+// -------------------------------------------------------------------------------------------------------------------
 
-#define EIGEN_LAPACKE_LU_PARTPIV(EIGTYPE, LAPACKE_TYPE, LAPACKE_PREFIX) \
-template<int StorageOrder> \
-struct partial_lu_impl<EIGTYPE, StorageOrder, lapack_int> \
-{ \
-  /* \internal performs the LU decomposition in-place of the matrix represented */ \
-  static lapack_int blocked_lu(Index rows, Index cols, EIGTYPE* lu_data, Index luStride, lapack_int* row_transpositions, lapack_int& nb_transpositions, lapack_int maxBlockSize=256) \
-  { \
-    EIGEN_UNUSED_VARIABLE(maxBlockSize);\
-    lapack_int matrix_order, first_zero_pivot; \
-    lapack_int m, n, lda, *ipiv, info; \
-    EIGTYPE* a; \
-/* Set up parameters for ?getrf */ \
-    matrix_order = StorageOrder==RowMajor ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR; \
-    lda = convert_index<lapack_int>(luStride); \
-    a = lu_data; \
-    ipiv = row_transpositions; \
-    m = convert_index<lapack_int>(rows); \
-    n = convert_index<lapack_int>(cols); \
-    nb_transpositions = 0; \
-\
-    info = LAPACKE_##LAPACKE_PREFIX##getrf( matrix_order, m, n, (LAPACKE_TYPE*)a, lda, ipiv ); \
-\
-    for(int i=0;i<m;i++) { ipiv[i]--; if (ipiv[i]!=i) nb_transpositions++; } \
-\
-    eigen_assert(info >= 0); \
-/* something should be done with nb_transpositions */ \
-\
-    first_zero_pivot = info; \
-    return first_zero_pivot; \
-  } \
+template<typename Scalar, int StorageOrder>
+struct lapacke_partial_lu {
+  /** \internal performs the LU decomposition in-place of the matrix represented */
+  static lapack_int blocked_lu(Index rows, Index cols, Scalar* lu_data, Index luStride, lapack_int* row_transpositions,
+  lapack_int& nb_transpositions, lapack_int maxBlockSize=256)
+  {
+    EIGEN_UNUSED_VARIABLE(maxBlockSize);
+    // Set up parameters for getrf
+    lapack_int matrix_order = StorageOrder==RowMajor ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR;
+    lapack_int lda = to_lapack(luStride);
+    Scalar* a = lu_data;
+    lapack_int* ipiv = row_transpositions;
+    lapack_int m = to_lapack(rows);
+    lapack_int n = to_lapack(cols);
+    nb_transpositions = 0;
+
+    lapack_int info = getrf(matrix_order, m, n, to_lapack(a), lda, ipiv );
+    eigen_assert(info >= 0);
+
+    for(int i=0; i<m; i++) {
+      ipiv[i]--;
+      if (ipiv[i] != i) nb_transpositions++;
+    }
+    lapack_int first_zero_pivot = info;
+    return first_zero_pivot;
+  }
 };
+} // end namespace lapacke_helpers
 
-EIGEN_LAPACKE_LU_PARTPIV(double, double, d)
-EIGEN_LAPACKE_LU_PARTPIV(float, float, s)
-EIGEN_LAPACKE_LU_PARTPIV(dcomplex, lapack_complex_double, z)
-EIGEN_LAPACKE_LU_PARTPIV(scomplex, lapack_complex_float,  c)
+/*
+ * Here, we just put the generic implementation from lapacke_partial_lu into a partial specialization of the partial_lu_impl
+ * type. This specialization is more specialized than the generic implementations that Eigen implements, so if the
+ * Scalar type matches they will be chosen.
+ */
+#define EIGEN_LAPACKE_PARTIAL_LU(EIGTYPE) \
+template<int StorageOrder>                \
+struct partial_lu_impl<EIGTYPE, StorageOrder, lapack_int, Dynamic> : public lapacke_helpers::lapacke_partial_lu<EIGTYPE, StorageOrder> {};
+
+EIGEN_LAPACKE_PARTIAL_LU(double)
+EIGEN_LAPACKE_PARTIAL_LU(float)
+EIGEN_LAPACKE_PARTIAL_LU(std::complex<double>)
+EIGEN_LAPACKE_PARTIAL_LU(std::complex<float>)
+
+#undef EIGEN_LAPACKE_PARTIAL_LU
 
 } // end namespace internal
 
diff --git a/libs/eigen/Eigen/src/LU/arch/InverseSize4.h b/libs/eigen/Eigen/src/LU/arch/InverseSize4.h
index a232ffc..25f4601 100644
--- a/libs/eigen/Eigen/src/LU/arch/InverseSize4.h
+++ b/libs/eigen/Eigen/src/LU/arch/InverseSize4.h
@@ -35,6 +35,15 @@
 #ifndef EIGEN_INVERSE_SIZE_4_H
 #define EIGEN_INVERSE_SIZE_4_H
 
+#include "../InternalHeaderCheck.h"
+
+#if EIGEN_COMP_GNUC_STRICT
+// These routines requires bit manipulation of the sign, which is not compatible
+// with fastmath.
+#pragma GCC push_options
+#pragma GCC optimize ("no-fast-math")
+#endif
+
 namespace Eigen
 {
 namespace internal
@@ -48,7 +57,7 @@ struct compute_inverse_size4<Architecture::Target, float, MatrixType, ResultType
     ResultAlignment = traits<ResultType>::Alignment,
     StorageOrdersMatch = (MatrixType::Flags & RowMajorBit) == (ResultType::Flags & RowMajorBit)
   };
-  typedef typename conditional<(MatrixType::Flags & LinearAccessBit), MatrixType const &, typename MatrixType::PlainObject>::type ActualMatrixType;
+  typedef std::conditional_t<(MatrixType::Flags & LinearAccessBit), MatrixType const &, typename MatrixType::PlainObject> ActualMatrixType;
 
   static void run(const MatrixType &mat, ResultType &result)
   {
@@ -56,10 +65,10 @@ struct compute_inverse_size4<Architecture::Target, float, MatrixType, ResultType
 
     const float* data = matrix.data();
     const Index stride = matrix.innerStride();
-    Packet4f _L1 = ploadt<Packet4f,MatrixAlignment>(data);
-    Packet4f _L2 = ploadt<Packet4f,MatrixAlignment>(data + stride*4);
-    Packet4f _L3 = ploadt<Packet4f,MatrixAlignment>(data + stride*8);
-    Packet4f _L4 = ploadt<Packet4f,MatrixAlignment>(data + stride*12);
+    Packet4f L1 = ploadt<Packet4f,MatrixAlignment>(data);
+    Packet4f L2 = ploadt<Packet4f,MatrixAlignment>(data + stride*4);
+    Packet4f L3 = ploadt<Packet4f,MatrixAlignment>(data + stride*8);
+    Packet4f L4 = ploadt<Packet4f,MatrixAlignment>(data + stride*12);
 
     // Four 2x2 sub-matrices of the input matrix
     // input = [[A, B],
@@ -68,17 +77,17 @@ struct compute_inverse_size4<Architecture::Target, float, MatrixType, ResultType
 
     if (!StorageOrdersMatch)
     {
-      A = vec4f_unpacklo(_L1, _L2);
-      B = vec4f_unpacklo(_L3, _L4);
-      C = vec4f_unpackhi(_L1, _L2);
-      D = vec4f_unpackhi(_L3, _L4);
+      A = vec4f_unpacklo(L1, L2);
+      B = vec4f_unpacklo(L3, L4);
+      C = vec4f_unpackhi(L1, L2);
+      D = vec4f_unpackhi(L3, L4);
     }
     else
     {
-      A = vec4f_movelh(_L1, _L2);
-      B = vec4f_movehl(_L2, _L1);
-      C = vec4f_movelh(_L3, _L4);
-      D = vec4f_movehl(_L4, _L3);
+      A = vec4f_movelh(L1, L2);
+      B = vec4f_movehl(L2, L1);
+      C = vec4f_movelh(L3, L4);
+      D = vec4f_movehl(L4, L3);
     }
 
     Packet4f AB, DC;
@@ -118,7 +127,7 @@ struct compute_inverse_size4<Architecture::Target, float, MatrixType, ResultType
     Packet4f det = vec4f_duplane(psub(padd(d1, d2), d), 0);
 
     // reciprocal of the determinant of the input matrix, rd = 1/det
-    Packet4f rd = pdiv(pset1<Packet4f>(1.0f), det);
+    Packet4f rd = preciprocal(det);
 
     // Four sub-matrices of the inverse
     Packet4f iA, iB, iC, iD;
@@ -143,8 +152,8 @@ struct compute_inverse_size4<Architecture::Target, float, MatrixType, ResultType
     iC = psub(iC, pmul(vec4f_swizzle2(A, A, 1, 0, 3, 2), vec4f_swizzle2(DC, DC, 2, 1, 2, 1)));
     iC = psub(pmul(B, vec4f_duplane(dC, 0)), iC);
 
-    const float sign_mask[4] = {0.0f, numext::bit_cast<float>(0x80000000u), numext::bit_cast<float>(0x80000000u), 0.0f};
-    const Packet4f p4f_sign_PNNP = ploadu<Packet4f>(sign_mask);
+    EIGEN_ALIGN_MAX const float sign_mask[4] = {0.0f, -0.0f, -0.0f, 0.0f};
+    const Packet4f p4f_sign_PNNP = pload<Packet4f>(sign_mask);
     rd = pxor(rd, p4f_sign_PNNP);
     iA = pmul(iA, rd);
     iB = pmul(iB, rd);
@@ -173,9 +182,9 @@ struct compute_inverse_size4<Architecture::Target, double, MatrixType, ResultTyp
     ResultAlignment = traits<ResultType>::Alignment,
     StorageOrdersMatch = (MatrixType::Flags & RowMajorBit) == (ResultType::Flags & RowMajorBit)
   };
-  typedef typename conditional<(MatrixType::Flags & LinearAccessBit),
-                               MatrixType const &,
-                               typename MatrixType::PlainObject>::type
+  typedef std::conditional_t<(MatrixType::Flags & LinearAccessBit),
+                         MatrixType const &,
+                         typename MatrixType::PlainObject>
       ActualMatrixType;
 
   static void run(const MatrixType &mat, ResultType &result)
@@ -326,10 +335,10 @@ struct compute_inverse_size4<Architecture::Target, double, MatrixType, ResultTyp
     iC1 = psub(pmul(B1, dC), iC1);
     iC2 = psub(pmul(B2, dC), iC2);
 
-    const double sign_mask1[2] = {0.0, numext::bit_cast<double>(0x8000000000000000ull)};
-    const double sign_mask2[2] = {numext::bit_cast<double>(0x8000000000000000ull), 0.0};
-    const Packet2d sign_PN = ploadu<Packet2d>(sign_mask1);
-    const Packet2d sign_NP = ploadu<Packet2d>(sign_mask2);
+    EIGEN_ALIGN_MAX const double sign_mask1[2] = {0.0, -0.0};
+    EIGEN_ALIGN_MAX const double sign_mask2[2] = {-0.0, 0.0};
+    const Packet2d sign_PN = pload<Packet2d>(sign_mask1);
+    const Packet2d sign_NP = pload<Packet2d>(sign_mask2);
     d1 = pxor(rd, sign_PN);
     d2 = pxor(rd, sign_NP);
 
@@ -348,4 +357,9 @@ struct compute_inverse_size4<Architecture::Target, double, MatrixType, ResultTyp
 #endif
 } // namespace internal
 } // namespace Eigen
+
+#if EIGEN_COMP_GNUC_STRICT
+#pragma GCC pop_options
+#endif
+
 #endif
diff --git a/libs/eigen/Eigen/src/MetisSupport/InternalHeaderCheck.h b/libs/eigen/Eigen/src/MetisSupport/InternalHeaderCheck.h
new file mode 100644
index 0000000..9d34825
--- /dev/null
+++ b/libs/eigen/Eigen/src/MetisSupport/InternalHeaderCheck.h
@@ -0,0 +1,3 @@
+#ifndef EIGEN_METISSUPPORT_MODULE_H
+#error "Please include Eigen/MetisSupport instead of including headers inside the src directory directly."
+#endif
diff --git a/libs/eigen/Eigen/src/MetisSupport/MetisSupport.h b/libs/eigen/Eigen/src/MetisSupport/MetisSupport.h
index 4c15304..c5e143b 100644
--- a/libs/eigen/Eigen/src/MetisSupport/MetisSupport.h
+++ b/libs/eigen/Eigen/src/MetisSupport/MetisSupport.h
@@ -9,6 +9,8 @@
 #ifndef METIS_SUPPORT_H
 #define METIS_SUPPORT_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 /**
  * Get the fill-reducing ordering from the METIS package
diff --git a/libs/eigen/Eigen/src/OrderingMethods/Amd.h b/libs/eigen/Eigen/src/OrderingMethods/Amd.h
index 7ca3f33..5bd531c 100644
--- a/libs/eigen/Eigen/src/OrderingMethods/Amd.h
+++ b/libs/eigen/Eigen/src/OrderingMethods/Amd.h
@@ -21,6 +21,8 @@ the Mozilla Public License v. 2.0, as stated at the top of this file.
 #ifndef EIGEN_SPARSE_AMD_H
 #define EIGEN_SPARSE_AMD_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
diff --git a/libs/eigen/Eigen/src/OrderingMethods/InternalHeaderCheck.h b/libs/eigen/Eigen/src/OrderingMethods/InternalHeaderCheck.h
new file mode 100644
index 0000000..713c447
--- /dev/null
+++ b/libs/eigen/Eigen/src/OrderingMethods/InternalHeaderCheck.h
@@ -0,0 +1,3 @@
+#ifndef EIGEN_ORDERINGMETHODS_MODULE_H
+#error "Please include Eigen/OrderingMethods instead of including headers inside the src directory directly."
+#endif
diff --git a/libs/eigen/Eigen/src/OrderingMethods/Ordering.h b/libs/eigen/Eigen/src/OrderingMethods/Ordering.h
index c578970..5cc4a85 100644
--- a/libs/eigen/Eigen/src/OrderingMethods/Ordering.h
+++ b/libs/eigen/Eigen/src/OrderingMethods/Ordering.h
@@ -11,6 +11,8 @@
 #ifndef EIGEN_ORDERING_H
 #define EIGEN_ORDERING_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
   
 #include "Eigen_Colamd.h"
diff --git a/libs/eigen/Eigen/src/PaStiXSupport/InternalHeaderCheck.h b/libs/eigen/Eigen/src/PaStiXSupport/InternalHeaderCheck.h
new file mode 100644
index 0000000..f588e50
--- /dev/null
+++ b/libs/eigen/Eigen/src/PaStiXSupport/InternalHeaderCheck.h
@@ -0,0 +1,3 @@
+#ifndef EIGEN_PASTIXSUPPORT_MODULE_H
+#error "Please include Eigen/PaStiXSupport instead of including headers inside the src directory directly."
+#endif
diff --git a/libs/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h b/libs/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h
index 3742687..d3126b1 100644
--- a/libs/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h
+++ b/libs/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_PASTIXSUPPORT_H
 #define EIGEN_PASTIXSUPPORT_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 #if defined(DCOMPLEX)
@@ -28,40 +30,40 @@ namespace Eigen {
   *
   * \sa TutorialSparseDirectSolvers
   */
-template<typename _MatrixType, bool IsStrSym = false> class PastixLU;
-template<typename _MatrixType, int Options> class PastixLLT;
-template<typename _MatrixType, int Options> class PastixLDLT;
+template<typename MatrixType_, bool IsStrSym = false> class PastixLU;
+template<typename MatrixType_, int Options> class PastixLLT;
+template<typename MatrixType_, int Options> class PastixLDLT;
 
 namespace internal
 {
     
   template<class Pastix> struct pastix_traits;
 
-  template<typename _MatrixType>
-  struct pastix_traits< PastixLU<_MatrixType> >
+  template<typename MatrixType_>
+  struct pastix_traits< PastixLU<MatrixType_> >
   {
-    typedef _MatrixType MatrixType;
-    typedef typename _MatrixType::Scalar Scalar;
-    typedef typename _MatrixType::RealScalar RealScalar;
-    typedef typename _MatrixType::StorageIndex StorageIndex;
+    typedef MatrixType_ MatrixType;
+    typedef typename MatrixType_::Scalar Scalar;
+    typedef typename MatrixType_::RealScalar RealScalar;
+    typedef typename MatrixType_::StorageIndex StorageIndex;
   };
 
-  template<typename _MatrixType, int Options>
-  struct pastix_traits< PastixLLT<_MatrixType,Options> >
+  template<typename MatrixType_, int Options>
+  struct pastix_traits< PastixLLT<MatrixType_,Options> >
   {
-    typedef _MatrixType MatrixType;
-    typedef typename _MatrixType::Scalar Scalar;
-    typedef typename _MatrixType::RealScalar RealScalar;
-    typedef typename _MatrixType::StorageIndex StorageIndex;
+    typedef MatrixType_ MatrixType;
+    typedef typename MatrixType_::Scalar Scalar;
+    typedef typename MatrixType_::RealScalar RealScalar;
+    typedef typename MatrixType_::StorageIndex StorageIndex;
   };
 
-  template<typename _MatrixType, int Options>
-  struct pastix_traits< PastixLDLT<_MatrixType,Options> >
+  template<typename MatrixType_, int Options>
+  struct pastix_traits< PastixLDLT<MatrixType_,Options> >
   {
-    typedef _MatrixType MatrixType;
-    typedef typename _MatrixType::Scalar Scalar;
-    typedef typename _MatrixType::RealScalar RealScalar;
-    typedef typename _MatrixType::StorageIndex StorageIndex;
+    typedef MatrixType_ MatrixType;
+    typedef typename MatrixType_::Scalar Scalar;
+    typedef typename MatrixType_::RealScalar RealScalar;
+    typedef typename MatrixType_::StorageIndex StorageIndex;
   };
   
   inline void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, float *vals, int *perm, int * invp, float *x, int nbrhs, int *iparm, double *dparm)
@@ -134,8 +136,8 @@ class PastixBase : public SparseSolverBase<Derived>
   public:
     using Base::_solve_impl;
     
-    typedef typename internal::pastix_traits<Derived>::MatrixType _MatrixType;
-    typedef _MatrixType MatrixType;
+    typedef typename internal::pastix_traits<Derived>::MatrixType MatrixType_;
+    typedef MatrixType_ MatrixType;
     typedef typename MatrixType::Scalar Scalar;
     typedef typename MatrixType::RealScalar RealScalar;
     typedef typename MatrixType::StorageIndex StorageIndex;
@@ -397,7 +399,7 @@ bool PastixBase<Base>::_solve_impl(const MatrixBase<Rhs> &b, MatrixBase<Dest> &x
   * This interface can symmetrize the input matrix otherwise. 
   * The vectors or matrices X and B can be either dense or sparse.
   * 
-  * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>
+  * \tparam MatrixType_ the type of the sparse matrix A, it must be a SparseMatrix<>
   * \tparam IsStrSym Indicates if the input matrix has a symmetric pattern, default is false
   * NOTE : Note that if the analysis and factorization phase are called separately, 
   * the input matrix will be symmetrized at each call, hence it is advised to 
@@ -408,11 +410,11 @@ bool PastixBase<Base>::_solve_impl(const MatrixBase<Rhs> &b, MatrixBase<Dest> &x
   * \sa \ref TutorialSparseSolverConcept, class SparseLU
   * 
   */
-template<typename _MatrixType, bool IsStrSym>
-class PastixLU : public PastixBase< PastixLU<_MatrixType> >
+template<typename MatrixType_, bool IsStrSym>
+class PastixLU : public PastixBase< PastixLU<MatrixType_> >
 {
   public:
-    typedef _MatrixType MatrixType;
+    typedef MatrixType_ MatrixType;
     typedef PastixBase<PastixLU<MatrixType> > Base;
     typedef typename Base::ColSpMatrix ColSpMatrix;
     typedef typename MatrixType::StorageIndex StorageIndex;
@@ -520,16 +522,16 @@ class PastixLU : public PastixBase< PastixLU<_MatrixType> >
   *
   * \sa \ref TutorialSparseSolverConcept, class SimplicialLLT
   */
-template<typename _MatrixType, int _UpLo>
-class PastixLLT : public PastixBase< PastixLLT<_MatrixType, _UpLo> >
+template<typename MatrixType_, int UpLo_>
+class PastixLLT : public PastixBase< PastixLLT<MatrixType_, UpLo_> >
 {
   public:
-    typedef _MatrixType MatrixType;
-    typedef PastixBase<PastixLLT<MatrixType, _UpLo> > Base;
+    typedef MatrixType_ MatrixType;
+    typedef PastixBase<PastixLLT<MatrixType, UpLo_> > Base;
     typedef typename Base::ColSpMatrix ColSpMatrix;
     
   public:
-    enum { UpLo = _UpLo };
+    enum { UpLo = UpLo_ };
     PastixLLT() : Base()
     {
       init();
@@ -604,16 +606,16 @@ class PastixLLT : public PastixBase< PastixLLT<_MatrixType, _UpLo> >
   *
   * \sa \ref TutorialSparseSolverConcept, class SimplicialLDLT
   */
-template<typename _MatrixType, int _UpLo>
-class PastixLDLT : public PastixBase< PastixLDLT<_MatrixType, _UpLo> >
+template<typename MatrixType_, int UpLo_>
+class PastixLDLT : public PastixBase< PastixLDLT<MatrixType_, UpLo_> >
 {
   public:
-    typedef _MatrixType MatrixType;
-    typedef PastixBase<PastixLDLT<MatrixType, _UpLo> > Base; 
+    typedef MatrixType_ MatrixType;
+    typedef PastixBase<PastixLDLT<MatrixType, UpLo_> > Base;
     typedef typename Base::ColSpMatrix ColSpMatrix;
     
   public:
-    enum { UpLo = _UpLo };
+    enum { UpLo = UpLo_ };
     PastixLDLT():Base()
     {
       init();
diff --git a/libs/eigen/Eigen/src/PardisoSupport/InternalHeaderCheck.h b/libs/eigen/Eigen/src/PardisoSupport/InternalHeaderCheck.h
new file mode 100644
index 0000000..8ef33f0
--- /dev/null
+++ b/libs/eigen/Eigen/src/PardisoSupport/InternalHeaderCheck.h
@@ -0,0 +1,3 @@
+#ifndef EIGEN_PARDISOSUPPORT_MODULE_H
+#error "Please include Eigen/PardisoSupport instead of including headers inside the src directory directly."
+#endif
diff --git a/libs/eigen/Eigen/src/PardisoSupport/PardisoSupport.h b/libs/eigen/Eigen/src/PardisoSupport/PardisoSupport.h
index f89b79b..e9815e6 100644
--- a/libs/eigen/Eigen/src/PardisoSupport/PardisoSupport.h
+++ b/libs/eigen/Eigen/src/PardisoSupport/PardisoSupport.h
@@ -32,11 +32,13 @@
 #ifndef EIGEN_PARDISOSUPPORT_H
 #define EIGEN_PARDISOSUPPORT_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
-template<typename _MatrixType> class PardisoLU;
-template<typename _MatrixType, int Options=Upper> class PardisoLLT;
-template<typename _MatrixType, int Options=Upper> class PardisoLDLT;
+template<typename MatrixType_> class PardisoLU;
+template<typename MatrixType_, int Options=Upper> class PardisoLLT;
+template<typename MatrixType_, int Options=Upper> class PardisoLDLT;
 
 namespace internal
 {
@@ -66,31 +68,31 @@ namespace internal
 
   template<class Pardiso> struct pardiso_traits;
 
-  template<typename _MatrixType>
-  struct pardiso_traits< PardisoLU<_MatrixType> >
+  template<typename MatrixType_>
+  struct pardiso_traits< PardisoLU<MatrixType_> >
   {
-    typedef _MatrixType MatrixType;
-    typedef typename _MatrixType::Scalar Scalar;
-    typedef typename _MatrixType::RealScalar RealScalar;
-    typedef typename _MatrixType::StorageIndex StorageIndex;
+    typedef MatrixType_ MatrixType;
+    typedef typename MatrixType_::Scalar Scalar;
+    typedef typename MatrixType_::RealScalar RealScalar;
+    typedef typename MatrixType_::StorageIndex StorageIndex;
   };
 
-  template<typename _MatrixType, int Options>
-  struct pardiso_traits< PardisoLLT<_MatrixType, Options> >
+  template<typename MatrixType_, int Options>
+  struct pardiso_traits< PardisoLLT<MatrixType_, Options> >
   {
-    typedef _MatrixType MatrixType;
-    typedef typename _MatrixType::Scalar Scalar;
-    typedef typename _MatrixType::RealScalar RealScalar;
-    typedef typename _MatrixType::StorageIndex StorageIndex;
+    typedef MatrixType_ MatrixType;
+    typedef typename MatrixType_::Scalar Scalar;
+    typedef typename MatrixType_::RealScalar RealScalar;
+    typedef typename MatrixType_::StorageIndex StorageIndex;
   };
 
-  template<typename _MatrixType, int Options>
-  struct pardiso_traits< PardisoLDLT<_MatrixType, Options> >
+  template<typename MatrixType_, int Options>
+  struct pardiso_traits< PardisoLDLT<MatrixType_, Options> >
   {
-    typedef _MatrixType MatrixType;
-    typedef typename _MatrixType::Scalar Scalar;
-    typedef typename _MatrixType::RealScalar RealScalar;
-    typedef typename _MatrixType::StorageIndex StorageIndex;    
+    typedef MatrixType_ MatrixType;
+    typedef typename MatrixType_::Scalar Scalar;
+    typedef typename MatrixType_::RealScalar RealScalar;
+    typedef typename MatrixType_::StorageIndex StorageIndex;
   };
 
 } // end namespace internal
@@ -375,7 +377,7 @@ void PardisoImpl<Derived>::_solve_impl(const MatrixBase<BDerived> &b, MatrixBase
   * By default, it runs in in-core mode. To enable PARDISO's out-of-core feature, set:
   * \code solver.pardisoParameterArray()[59] = 1; \endcode
   *
-  * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>
+  * \tparam MatrixType_ the type of the sparse matrix A, it must be a SparseMatrix<>
   *
   * \implsparsesolverconcept
   *
@@ -437,21 +439,21 @@ class PardisoLU : public PardisoImpl< PardisoLU<MatrixType> >
   *
   * \sa \ref TutorialSparseSolverConcept, class SimplicialLLT
   */
-template<typename MatrixType, int _UpLo>
-class PardisoLLT : public PardisoImpl< PardisoLLT<MatrixType,_UpLo> >
+template<typename MatrixType, int UpLo_>
+class PardisoLLT : public PardisoImpl< PardisoLLT<MatrixType,UpLo_> >
 {
   protected:
-    typedef PardisoImpl< PardisoLLT<MatrixType,_UpLo> > Base;
+    typedef PardisoImpl< PardisoLLT<MatrixType,UpLo_> > Base;
     using Base::pardisoInit;
     using Base::m_matrix;
-    friend class PardisoImpl< PardisoLLT<MatrixType,_UpLo> >;
+    friend class PardisoImpl< PardisoLLT<MatrixType,UpLo_> >;
 
   public:
 
     typedef typename Base::Scalar Scalar;
     typedef typename Base::RealScalar RealScalar;
     typedef typename Base::StorageIndex StorageIndex;
-    enum { UpLo = _UpLo };
+    enum { UpLo = UpLo_ };
     using Base::compute;
 
     PardisoLLT()
diff --git a/libs/eigen/Eigen/src/QR/ColPivHouseholderQR.h b/libs/eigen/Eigen/src/QR/ColPivHouseholderQR.h
index 9b677e9..c906997 100644
--- a/libs/eigen/Eigen/src/QR/ColPivHouseholderQR.h
+++ b/libs/eigen/Eigen/src/QR/ColPivHouseholderQR.h
@@ -11,11 +11,13 @@
 #ifndef EIGEN_COLPIVOTINGHOUSEHOLDERQR_H
 #define EIGEN_COLPIVOTINGHOUSEHOLDERQR_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
-template<typename _MatrixType> struct traits<ColPivHouseholderQR<_MatrixType> >
- : traits<_MatrixType>
+template<typename MatrixType_> struct traits<ColPivHouseholderQR<MatrixType_> >
+ : traits<MatrixType_>
 {
   typedef MatrixXpr XprKind;
   typedef SolverStorage StorageKind;
@@ -31,7 +33,7 @@ template<typename _MatrixType> struct traits<ColPivHouseholderQR<_MatrixType> >
   *
   * \brief Householder rank-revealing QR decomposition of a matrix with column-pivoting
   *
-  * \tparam _MatrixType the type of the matrix of which we are computing the QR decomposition
+  * \tparam MatrixType_ the type of the matrix of which we are computing the QR decomposition
   *
   * This class performs a rank-revealing QR decomposition of a matrix \b A into matrices \b P, \b Q and \b R
   * such that
@@ -48,12 +50,12 @@ template<typename _MatrixType> struct traits<ColPivHouseholderQR<_MatrixType> >
   * 
   * \sa MatrixBase::colPivHouseholderQr()
   */
-template<typename _MatrixType> class ColPivHouseholderQR
-        : public SolverBase<ColPivHouseholderQR<_MatrixType> >
+template<typename MatrixType_> class ColPivHouseholderQR
+        : public SolverBase<ColPivHouseholderQR<MatrixType_> >
 {
   public:
 
-    typedef _MatrixType MatrixType;
+    typedef MatrixType_ MatrixType;
     typedef SolverBase<ColPivHouseholderQR> Base;
     friend class SolverBase<ColPivHouseholderQR>;
 
@@ -67,7 +69,7 @@ template<typename _MatrixType> class ColPivHouseholderQR
     typedef typename internal::plain_row_type<MatrixType, Index>::type IntRowVectorType;
     typedef typename internal::plain_row_type<MatrixType>::type RowVectorType;
     typedef typename internal::plain_row_type<MatrixType, RealScalar>::type RealRowVectorType;
-    typedef HouseholderSequence<MatrixType,typename internal::remove_all<typename HCoeffsType::ConjugateReturnType>::type> HouseholderSequenceType;
+    typedef HouseholderSequence<MatrixType,internal::remove_all_t<typename HCoeffsType::ConjugateReturnType>> HouseholderSequenceType;
     typedef typename MatrixType::PlainObject PlainObject;
 
   private:
@@ -217,6 +219,21 @@ template<typename _MatrixType> class ColPivHouseholderQR
       return m_colsPermutation;
     }
 
+    /** \returns the determinant of the matrix of which
+      * *this is the QR decomposition. It has only linear complexity
+      * (that is, O(n) where n is the dimension of the square matrix)
+      * as the QR decomposition has already been computed.
+      *
+      * \note This is only for square matrices.
+      *
+      * \warning a determinant can be very big or small, so for matrices
+      * of large enough dimension, there is a risk of overflow/underflow.
+      * One way to work around that is to use logAbsDeterminant() instead.
+      *
+      * \sa absDeterminant(), logAbsDeterminant(), MatrixBase::determinant()
+      */
+    typename MatrixType::Scalar determinant() const;
+
     /** \returns the absolute value of the determinant of the matrix of which
       * *this is the QR decomposition. It has only linear complexity
       * (that is, O(n) where n is the dimension of the square matrix)
@@ -228,7 +245,7 @@ template<typename _MatrixType> class ColPivHouseholderQR
       * of large enough dimension, there is a risk of overflow/underflow.
       * One way to work around that is to use logAbsDeterminant() instead.
       *
-      * \sa logAbsDeterminant(), MatrixBase::determinant()
+      * \sa determinant(), logAbsDeterminant(), MatrixBase::determinant()
       */
     typename MatrixType::RealScalar absDeterminant() const;
 
@@ -242,7 +259,7 @@ template<typename _MatrixType> class ColPivHouseholderQR
       * \note This method is useful to work around the risk of overflow/underflow that's inherent
       * to determinant computation.
       *
-      * \sa absDeterminant(), MatrixBase::determinant()
+      * \sa determinant(), absDeterminant(), MatrixBase::determinant()
       */
     typename MatrixType::RealScalar logAbsDeterminant() const;
 
@@ -426,10 +443,7 @@ template<typename _MatrixType> class ColPivHouseholderQR
 
     friend class CompleteOrthogonalDecomposition<MatrixType>;
 
-    static void check_template_parameters()
-    {
-      EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
-    }
+    EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
 
     void computeInPlace();
 
@@ -443,9 +457,19 @@ template<typename _MatrixType> class ColPivHouseholderQR
     bool m_isInitialized, m_usePrescribedThreshold;
     RealScalar m_prescribedThreshold, m_maxpivot;
     Index m_nonzero_pivots;
-    Index m_det_pq;
+    Index m_det_p;
 };
 
+template<typename MatrixType>
+typename MatrixType::Scalar ColPivHouseholderQR<MatrixType>::determinant() const
+{
+  eigen_assert(m_isInitialized && "HouseholderQR is not initialized.");
+  eigen_assert(m_qr.rows() == m_qr.cols() && "You can't take the determinant of a non-square matrix!");
+  Scalar detQ;
+  internal::householder_determinant<HCoeffsType, Scalar, NumTraits<Scalar>::IsComplex>::run(m_hCoeffs, detQ);
+  return m_qr.diagonal().prod() * detQ * Scalar(m_det_p);
+}
+
 template<typename MatrixType>
 typename MatrixType::RealScalar ColPivHouseholderQR<MatrixType>::absDeterminant() const
 {
@@ -481,8 +505,6 @@ ColPivHouseholderQR<MatrixType>& ColPivHouseholderQR<MatrixType>::compute(const
 template<typename MatrixType>
 void ColPivHouseholderQR<MatrixType>::computeInPlace()
 {
-  check_template_parameters();
-
   // the column permutation is stored as int indices, so just to be sure:
   eigen_assert(m_qr.cols()<=NumTraits<int>::highest());
 
@@ -555,7 +577,7 @@ void ColPivHouseholderQR<MatrixType>::computeInPlace()
       // http://www.netlib.org/lapack/lawnspdf/lawn176.pdf
       // and used in LAPACK routines xGEQPF and xGEQP3.
       // See lines 278-297 in http://www.netlib.org/lapack/explore-html/dc/df4/sgeqpf_8f_source.html
-      if (m_colNormsUpdated.coeffRef(j) != RealScalar(0)) {
+      if (!numext::is_exactly_zero(m_colNormsUpdated.coeffRef(j))) {
         RealScalar temp = abs(m_qr.coeffRef(k, j)) / m_colNormsUpdated.coeffRef(j);
         temp = (RealScalar(1) + temp) * (RealScalar(1) - temp);
         temp = temp <  RealScalar(0) ? RealScalar(0) : temp;
@@ -577,14 +599,14 @@ void ColPivHouseholderQR<MatrixType>::computeInPlace()
   for(PermIndexType k = 0; k < size/*m_nonzero_pivots*/; ++k)
     m_colsPermutation.applyTranspositionOnTheRight(k, PermIndexType(m_colsTranspositions.coeff(k)));
 
-  m_det_pq = (number_of_transpositions%2) ? -1 : 1;
+  m_det_p = (number_of_transpositions%2) ? -1 : 1;
   m_isInitialized = true;
 }
 
 #ifndef EIGEN_PARSED_BY_DOXYGEN
-template<typename _MatrixType>
+template<typename MatrixType_>
 template<typename RhsType, typename DstType>
-void ColPivHouseholderQR<_MatrixType>::_solve_impl(const RhsType &rhs, DstType &dst) const
+void ColPivHouseholderQR<MatrixType_>::_solve_impl(const RhsType &rhs, DstType &dst) const
 {
   const Index nonzero_pivots = nonzeroPivots();
 
@@ -606,9 +628,9 @@ void ColPivHouseholderQR<_MatrixType>::_solve_impl(const RhsType &rhs, DstType &
   for(Index i = nonzero_pivots; i < cols(); ++i) dst.row(m_colsPermutation.indices().coeff(i)).setZero();
 }
 
-template<typename _MatrixType>
+template<typename MatrixType_>
 template<bool Conjugate, typename RhsType, typename DstType>
-void ColPivHouseholderQR<_MatrixType>::_solve_impl_transposed(const RhsType &rhs, DstType &dst) const
+void ColPivHouseholderQR<MatrixType_>::_solve_impl_transposed(const RhsType &rhs, DstType &dst) const
 {
   const Index nonzero_pivots = nonzeroPivots();
 
diff --git a/libs/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h b/libs/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h
index 4e9651f..7652d31 100644
--- a/libs/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h
+++ b/libs/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h
@@ -34,6 +34,8 @@
 #ifndef EIGEN_COLPIVOTINGHOUSEHOLDERQR_LAPACKE_H
 #define EIGEN_COLPIVOTINGHOUSEHOLDERQR_LAPACKE_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 /** \internal Specialization for the data types supported by LAPACKe */
diff --git a/libs/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h b/libs/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h
index 486d337..02583a2 100644
--- a/libs/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h
+++ b/libs/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h
@@ -10,12 +10,14 @@
 #ifndef EIGEN_COMPLETEORTHOGONALDECOMPOSITION_H
 #define EIGEN_COMPLETEORTHOGONALDECOMPOSITION_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
-template <typename _MatrixType>
-struct traits<CompleteOrthogonalDecomposition<_MatrixType> >
-    : traits<_MatrixType> {
+template <typename MatrixType_>
+struct traits<CompleteOrthogonalDecomposition<MatrixType_> >
+    : traits<MatrixType_> {
   typedef MatrixXpr XprKind;
   typedef SolverStorage StorageKind;
   typedef int StorageIndex;
@@ -30,7 +32,7 @@ struct traits<CompleteOrthogonalDecomposition<_MatrixType> >
   *
   * \brief Complete orthogonal decomposition (COD) of a matrix.
   *
-  * \param MatrixType the type of the matrix of which we are computing the COD.
+  * \tparam MatrixType_ the type of the matrix of which we are computing the COD.
   *
   * This class performs a rank-revealing complete orthogonal decomposition of a
   * matrix  \b A into matrices \b P, \b Q, \b T, and \b Z such that
@@ -47,11 +49,11 @@ struct traits<CompleteOrthogonalDecomposition<_MatrixType> >
   * 
   * \sa MatrixBase::completeOrthogonalDecomposition()
   */
-template <typename _MatrixType> class CompleteOrthogonalDecomposition
-          : public SolverBase<CompleteOrthogonalDecomposition<_MatrixType> >
+template <typename MatrixType_> class CompleteOrthogonalDecomposition
+          : public SolverBase<CompleteOrthogonalDecomposition<MatrixType_> >
 {
  public:
-  typedef _MatrixType MatrixType;
+  typedef MatrixType_ MatrixType;
   typedef SolverBase<CompleteOrthogonalDecomposition> Base;
 
   template<typename Derived>
@@ -71,8 +73,8 @@ template <typename _MatrixType> class CompleteOrthogonalDecomposition
   typedef typename internal::plain_row_type<MatrixType, RealScalar>::type
       RealRowVectorType;
   typedef HouseholderSequence<
-      MatrixType, typename internal::remove_all<
-                      typename HCoeffsType::ConjugateReturnType>::type>
+      MatrixType, internal::remove_all_t<
+                      typename HCoeffsType::ConjugateReturnType>>
       HouseholderSequenceType;
   typedef typename MatrixType::PlainObject PlainObject;
 
@@ -177,7 +179,7 @@ template <typename _MatrixType> class CompleteOrthogonalDecomposition
    * \code matrixT().template triangularView<Upper>() \endcode
    * For rank-deficient matrices, use
    * \code
-   * matrixR().topLeftCorner(rank(), rank()).template triangularView<Upper>()
+   * matrixT().topLeftCorner(rank(), rank()).template triangularView<Upper>()
    * \endcode
    */
   const MatrixType& matrixT() const { return m_cpqr.matrixQR(); }
@@ -195,6 +197,21 @@ template <typename _MatrixType> class CompleteOrthogonalDecomposition
     return m_cpqr.colsPermutation();
   }
 
+    /** \returns the determinant of the matrix of which
+   * *this is the complete orthogonal decomposition. It has only linear
+   * complexity (that is, O(n) where n is the dimension of the square matrix)
+   * as the complete orthogonal decomposition has already been computed.
+   *
+   * \note This is only for square matrices.
+   *
+   * \warning a determinant can be very big or small, so for matrices
+   * of large enough dimension, there is a risk of overflow/underflow.
+   * One way to work around that is to use logAbsDeterminant() instead.
+   *
+   * \sa absDeterminant(), logAbsDeterminant(), MatrixBase::determinant()
+   */
+  typename MatrixType::Scalar determinant() const;
+
   /** \returns the absolute value of the determinant of the matrix of which
    * *this is the complete orthogonal decomposition. It has only linear
    * complexity (that is, O(n) where n is the dimension of the square matrix)
@@ -206,7 +223,7 @@ template <typename _MatrixType> class CompleteOrthogonalDecomposition
    * of large enough dimension, there is a risk of overflow/underflow.
    * One way to work around that is to use logAbsDeterminant() instead.
    *
-   * \sa logAbsDeterminant(), MatrixBase::determinant()
+   * \sa determinant(), logAbsDeterminant(), MatrixBase::determinant()
    */
   typename MatrixType::RealScalar absDeterminant() const;
 
@@ -221,7 +238,7 @@ template <typename _MatrixType> class CompleteOrthogonalDecomposition
    * \note This method is useful to work around the risk of overflow/underflow
    * that's inherent to determinant computation.
    *
-   * \sa absDeterminant(), MatrixBase::determinant()
+   * \sa determinant(), absDeterminant(), MatrixBase::determinant()
    */
   typename MatrixType::RealScalar logAbsDeterminant() const;
 
@@ -377,9 +394,7 @@ template <typename _MatrixType> class CompleteOrthogonalDecomposition
 #endif
 
  protected:
-  static void check_template_parameters() {
-    EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
-  }
+  EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
 
   template<bool Transpose_, typename Rhs>
   void _check_solve_assertion(const Rhs& b) const {
@@ -407,6 +422,12 @@ template <typename _MatrixType> class CompleteOrthogonalDecomposition
   RowVectorType m_temp;
 };
 
+template <typename MatrixType>
+typename MatrixType::Scalar
+CompleteOrthogonalDecomposition<MatrixType>::determinant() const {
+  return m_cpqr.determinant();
+}
+
 template <typename MatrixType>
 typename MatrixType::RealScalar
 CompleteOrthogonalDecomposition<MatrixType>::absDeterminant() const {
@@ -429,8 +450,6 @@ CompleteOrthogonalDecomposition<MatrixType>::logAbsDeterminant() const {
 template <typename MatrixType>
 void CompleteOrthogonalDecomposition<MatrixType>::computeInPlace()
 {
-  check_template_parameters();
-
   // the column permutation is stored as int indices, so just to be sure:
   eigen_assert(m_cpqr.cols() <= NumTraits<int>::highest());
 
@@ -529,9 +548,9 @@ void CompleteOrthogonalDecomposition<MatrixType>::applyZAdjointOnTheLeftInPlace(
 }
 
 #ifndef EIGEN_PARSED_BY_DOXYGEN
-template <typename _MatrixType>
+template <typename MatrixType_>
 template <typename RhsType, typename DstType>
-void CompleteOrthogonalDecomposition<_MatrixType>::_solve_impl(
+void CompleteOrthogonalDecomposition<MatrixType_>::_solve_impl(
     const RhsType& rhs, DstType& dst) const {
   const Index rank = this->rank();
   if (rank == 0) {
@@ -561,9 +580,9 @@ void CompleteOrthogonalDecomposition<_MatrixType>::_solve_impl(
   dst = colsPermutation() * dst;
 }
 
-template<typename _MatrixType>
+template<typename MatrixType_>
 template<bool Conjugate, typename RhsType, typename DstType>
-void CompleteOrthogonalDecomposition<_MatrixType>::_solve_impl_transposed(const RhsType &rhs, DstType &dst) const
+void CompleteOrthogonalDecomposition<MatrixType_>::_solve_impl_transposed(const RhsType &rhs, DstType &dst) const
 {
   const Index rank = this->rank();
 
diff --git a/libs/eigen/Eigen/src/QR/FullPivHouseholderQR.h b/libs/eigen/Eigen/src/QR/FullPivHouseholderQR.h
index d0664a1..ec7e19b 100644
--- a/libs/eigen/Eigen/src/QR/FullPivHouseholderQR.h
+++ b/libs/eigen/Eigen/src/QR/FullPivHouseholderQR.h
@@ -11,12 +11,14 @@
 #ifndef EIGEN_FULLPIVOTINGHOUSEHOLDERQR_H
 #define EIGEN_FULLPIVOTINGHOUSEHOLDERQR_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
 
-template<typename _MatrixType> struct traits<FullPivHouseholderQR<_MatrixType> >
- : traits<_MatrixType>
+template<typename MatrixType_> struct traits<FullPivHouseholderQR<MatrixType_> >
+ : traits<MatrixType_>
 {
   typedef MatrixXpr XprKind;
   typedef SolverStorage StorageKind;
@@ -40,7 +42,7 @@ struct traits<FullPivHouseholderQRMatrixQReturnType<MatrixType> >
   *
   * \brief Householder rank-revealing QR decomposition of a matrix with full pivoting
   *
-  * \tparam _MatrixType the type of the matrix of which we are computing the QR decomposition
+  * \tparam MatrixType_ the type of the matrix of which we are computing the QR decomposition
   *
   * This class performs a rank-revealing QR decomposition of a matrix \b A into matrices \b P, \b P', \b Q and \b R
   * such that 
@@ -57,12 +59,12 @@ struct traits<FullPivHouseholderQRMatrixQReturnType<MatrixType> >
   * 
   * \sa MatrixBase::fullPivHouseholderQr()
   */
-template<typename _MatrixType> class FullPivHouseholderQR
-        : public SolverBase<FullPivHouseholderQR<_MatrixType> >
+template<typename MatrixType_> class FullPivHouseholderQR
+        : public SolverBase<FullPivHouseholderQR<MatrixType_> >
 {
   public:
 
-    typedef _MatrixType MatrixType;
+    typedef MatrixType_ MatrixType;
     typedef SolverBase<FullPivHouseholderQR> Base;
     friend class SolverBase<FullPivHouseholderQR>;
 
@@ -74,8 +76,8 @@ template<typename _MatrixType> class FullPivHouseholderQR
     typedef internal::FullPivHouseholderQRMatrixQReturnType<MatrixType> MatrixQReturnType;
     typedef typename internal::plain_diag_type<MatrixType>::type HCoeffsType;
     typedef Matrix<StorageIndex, 1,
-                   EIGEN_SIZE_MIN_PREFER_DYNAMIC(ColsAtCompileTime,RowsAtCompileTime), RowMajor, 1,
-                   EIGEN_SIZE_MIN_PREFER_FIXED(MaxColsAtCompileTime,MaxRowsAtCompileTime)> IntDiagSizeVectorType;
+                   internal::min_size_prefer_dynamic(ColsAtCompileTime,RowsAtCompileTime), RowMajor, 1,
+                   internal::min_size_prefer_fixed(MaxColsAtCompileTime, MaxRowsAtCompileTime)> IntDiagSizeVectorType;
     typedef PermutationMatrix<ColsAtCompileTime, MaxColsAtCompileTime> PermutationType;
     typedef typename internal::plain_row_type<MatrixType>::type RowVectorType;
     typedef typename internal::plain_col_type<MatrixType>::type ColVectorType;
@@ -208,6 +210,21 @@ template<typename _MatrixType> class FullPivHouseholderQR
       return m_rows_transpositions;
     }
 
+    /** \returns the determinant of the matrix of which
+      * *this is the QR decomposition. It has only linear complexity
+      * (that is, O(n) where n is the dimension of the square matrix)
+      * as the QR decomposition has already been computed.
+      *
+      * \note This is only for square matrices.
+      *
+      * \warning a determinant can be very big or small, so for matrices
+      * of large enough dimension, there is a risk of overflow/underflow.
+      * One way to work around that is to use logAbsDeterminant() instead.
+      *
+      * \sa absDeterminant(), logAbsDeterminant(), MatrixBase::determinant()
+      */
+    typename MatrixType::Scalar determinant() const;
+
     /** \returns the absolute value of the determinant of the matrix of which
       * *this is the QR decomposition. It has only linear complexity
       * (that is, O(n) where n is the dimension of the square matrix)
@@ -219,7 +236,7 @@ template<typename _MatrixType> class FullPivHouseholderQR
       * of large enough dimension, there is a risk of overflow/underflow.
       * One way to work around that is to use logAbsDeterminant() instead.
       *
-      * \sa logAbsDeterminant(), MatrixBase::determinant()
+      * \sa determinant(), logAbsDeterminant(), MatrixBase::determinant()
       */
     typename MatrixType::RealScalar absDeterminant() const;
 
@@ -233,7 +250,7 @@ template<typename _MatrixType> class FullPivHouseholderQR
       * \note This method is useful to work around the risk of overflow/underflow that's inherent
       * to determinant computation.
       *
-      * \sa absDeterminant(), MatrixBase::determinant()
+      * \sa determinant(), absDeterminant(), MatrixBase::determinant()
       */
     typename MatrixType::RealScalar logAbsDeterminant() const;
 
@@ -403,10 +420,7 @@ template<typename _MatrixType> class FullPivHouseholderQR
 
   protected:
 
-    static void check_template_parameters()
-    {
-      EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
-    }
+    EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
 
     void computeInPlace();
 
@@ -420,9 +434,19 @@ template<typename _MatrixType> class FullPivHouseholderQR
     RealScalar m_prescribedThreshold, m_maxpivot;
     Index m_nonzero_pivots;
     RealScalar m_precision;
-    Index m_det_pq;
+    Index m_det_p;
 };
 
+template<typename MatrixType>
+typename MatrixType::Scalar FullPivHouseholderQR<MatrixType>::determinant() const
+{
+  eigen_assert(m_isInitialized && "HouseholderQR is not initialized.");
+  eigen_assert(m_qr.rows() == m_qr.cols() && "You can't take the determinant of a non-square matrix!");
+  Scalar detQ;
+  internal::householder_determinant<HCoeffsType, Scalar, NumTraits<Scalar>::IsComplex>::run(m_hCoeffs, detQ);
+  return m_qr.diagonal().prod() * detQ * Scalar(m_det_p);
+}
+
 template<typename MatrixType>
 typename MatrixType::RealScalar FullPivHouseholderQR<MatrixType>::absDeterminant() const
 {
@@ -458,8 +482,6 @@ FullPivHouseholderQR<MatrixType>& FullPivHouseholderQR<MatrixType>::compute(cons
 template<typename MatrixType>
 void FullPivHouseholderQR<MatrixType>::computeInPlace()
 {
-  check_template_parameters();
-
   using std::abs;
   Index rows = m_qr.rows();
   Index cols = m_qr.cols();
@@ -534,14 +556,14 @@ void FullPivHouseholderQR<MatrixType>::computeInPlace()
   for(Index k = 0; k < size; ++k)
     m_cols_permutation.applyTranspositionOnTheRight(k, m_cols_transpositions.coeff(k));
 
-  m_det_pq = (number_of_transpositions%2) ? -1 : 1;
+  m_det_p = (number_of_transpositions%2) ? -1 : 1;
   m_isInitialized = true;
 }
 
 #ifndef EIGEN_PARSED_BY_DOXYGEN
-template<typename _MatrixType>
+template<typename MatrixType_>
 template<typename RhsType, typename DstType>
-void FullPivHouseholderQR<_MatrixType>::_solve_impl(const RhsType &rhs, DstType &dst) const
+void FullPivHouseholderQR<MatrixType_>::_solve_impl(const RhsType &rhs, DstType &dst) const
 {
   const Index l_rank = rank();
 
@@ -573,9 +595,9 @@ void FullPivHouseholderQR<_MatrixType>::_solve_impl(const RhsType &rhs, DstType
   for(Index i = l_rank; i < cols(); ++i) dst.row(m_cols_permutation.indices().coeff(i)).setZero();
 }
 
-template<typename _MatrixType>
+template<typename MatrixType_>
 template<bool Conjugate, typename RhsType, typename DstType>
-void FullPivHouseholderQR<_MatrixType>::_solve_impl_transposed(const RhsType &rhs, DstType &dst) const
+void FullPivHouseholderQR<MatrixType_>::_solve_impl_transposed(const RhsType &rhs, DstType &dst) const
 {
   const Index l_rank = rank();
 
diff --git a/libs/eigen/Eigen/src/QR/HouseholderQR.h b/libs/eigen/Eigen/src/QR/HouseholderQR.h
index 801739f..abfefd1 100644
--- a/libs/eigen/Eigen/src/QR/HouseholderQR.h
+++ b/libs/eigen/Eigen/src/QR/HouseholderQR.h
@@ -12,11 +12,13 @@
 #ifndef EIGEN_QR_H
 #define EIGEN_QR_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
-template<typename _MatrixType> struct traits<HouseholderQR<_MatrixType> >
- : traits<_MatrixType>
+template<typename MatrixType_> struct traits<HouseholderQR<MatrixType_> >
+ : traits<MatrixType_>
 {
   typedef MatrixXpr XprKind;
   typedef SolverStorage StorageKind;
@@ -33,7 +35,7 @@ template<typename _MatrixType> struct traits<HouseholderQR<_MatrixType> >
   *
   * \brief Householder QR decomposition of a matrix
   *
-  * \tparam _MatrixType the type of the matrix of which we are computing the QR decomposition
+  * \tparam MatrixType_ the type of the matrix of which we are computing the QR decomposition
   *
   * This class performs a QR decomposition of a matrix \b A into matrices \b Q and \b R
   * such that 
@@ -53,12 +55,12 @@ template<typename _MatrixType> struct traits<HouseholderQR<_MatrixType> >
   *
   * \sa MatrixBase::householderQr()
   */
-template<typename _MatrixType> class HouseholderQR
-        : public SolverBase<HouseholderQR<_MatrixType> >
+template<typename MatrixType_> class HouseholderQR
+        : public SolverBase<HouseholderQR<MatrixType_> >
 {
   public:
 
-    typedef _MatrixType MatrixType;
+    typedef MatrixType_ MatrixType;
     typedef SolverBase<HouseholderQR> Base;
     friend class SolverBase<HouseholderQR>;
 
@@ -70,7 +72,7 @@ template<typename _MatrixType> class HouseholderQR
     typedef Matrix<Scalar, RowsAtCompileTime, RowsAtCompileTime, (MatrixType::Flags&RowMajorBit) ? RowMajor : ColMajor, MaxRowsAtCompileTime, MaxRowsAtCompileTime> MatrixQType;
     typedef typename internal::plain_diag_type<MatrixType>::type HCoeffsType;
     typedef typename internal::plain_row_type<MatrixType>::type RowVectorType;
-    typedef HouseholderSequence<MatrixType,typename internal::remove_all<typename HCoeffsType::ConjugateReturnType>::type> HouseholderSequenceType;
+    typedef HouseholderSequence<MatrixType,internal::remove_all_t<typename HCoeffsType::ConjugateReturnType>> HouseholderSequenceType;
 
     /**
       * \brief Default Constructor.
@@ -182,6 +184,21 @@ template<typename _MatrixType> class HouseholderQR
       return *this;
     }
 
+    /** \returns the determinant of the matrix of which
+      * *this is the QR decomposition. It has only linear complexity
+      * (that is, O(n) where n is the dimension of the square matrix)
+      * as the QR decomposition has already been computed.
+      *
+      * \note This is only for square matrices.
+      *
+      * \warning a determinant can be very big or small, so for matrices
+      * of large enough dimension, there is a risk of overflow/underflow.
+      * One way to work around that is to use logAbsDeterminant() instead.
+      *
+      * \sa absDeterminant(), logAbsDeterminant(), MatrixBase::determinant()
+      */
+    typename MatrixType::Scalar determinant() const;
+
     /** \returns the absolute value of the determinant of the matrix of which
       * *this is the QR decomposition. It has only linear complexity
       * (that is, O(n) where n is the dimension of the square matrix)
@@ -193,7 +210,7 @@ template<typename _MatrixType> class HouseholderQR
       * of large enough dimension, there is a risk of overflow/underflow.
       * One way to work around that is to use logAbsDeterminant() instead.
       *
-      * \sa logAbsDeterminant(), MatrixBase::determinant()
+      * \sa determinant(), logAbsDeterminant(), MatrixBase::determinant()
       */
     typename MatrixType::RealScalar absDeterminant() const;
 
@@ -207,7 +224,7 @@ template<typename _MatrixType> class HouseholderQR
       * \note This method is useful to work around the risk of overflow/underflow that's inherent
       * to determinant computation.
       *
-      * \sa absDeterminant(), MatrixBase::determinant()
+      * \sa determinant(), absDeterminant(), MatrixBase::determinant()
       */
     typename MatrixType::RealScalar logAbsDeterminant() const;
 
@@ -230,10 +247,7 @@ template<typename _MatrixType> class HouseholderQR
 
   protected:
 
-    static void check_template_parameters()
-    {
-      EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
-    }
+    EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
 
     void computeInPlace();
 
@@ -243,6 +257,57 @@ template<typename _MatrixType> class HouseholderQR
     bool m_isInitialized;
 };
 
+namespace internal {
+
+/** \internal */
+template<typename HCoeffs, typename Scalar, bool IsComplex>
+struct householder_determinant
+{
+  static void run(const HCoeffs& hCoeffs, Scalar& out_det)
+  {
+    out_det = Scalar(1);
+    Index size = hCoeffs.rows();
+    for (Index i = 0; i < size; i ++)
+    {
+      // For each valid reflection Q_n,
+      // det(Q_n) = - conj(h_n) / h_n
+      // where h_n is the Householder coefficient.
+      if (hCoeffs(i) != Scalar(0))
+        out_det *= - numext::conj(hCoeffs(i)) / hCoeffs(i);
+    }
+  }
+};
+
+/** \internal */
+template<typename HCoeffs, typename Scalar>
+struct householder_determinant<HCoeffs, Scalar, false>
+{
+  static void run(const HCoeffs& hCoeffs, Scalar& out_det)
+  {
+    bool negated = false;
+    Index size = hCoeffs.rows();
+    for (Index i = 0; i < size; i ++)
+    {
+      // Each valid reflection negates the determinant.
+      if (hCoeffs(i) != Scalar(0))
+        negated ^= true;
+    }
+    out_det = negated ? Scalar(-1) : Scalar(1);
+  }
+};
+
+} // end namespace internal
+
+template<typename MatrixType>
+typename MatrixType::Scalar HouseholderQR<MatrixType>::determinant() const
+{
+  eigen_assert(m_isInitialized && "HouseholderQR is not initialized.");
+  eigen_assert(m_qr.rows() == m_qr.cols() && "You can't take the determinant of a non-square matrix!");
+  Scalar detQ;
+  internal::householder_determinant<HCoeffsType, Scalar, NumTraits<Scalar>::IsComplex>::run(m_hCoeffs, detQ);
+  return m_qr.diagonal().prod() * detQ;
+}
+
 template<typename MatrixType>
 typename MatrixType::RealScalar HouseholderQR<MatrixType>::absDeterminant() const
 {
@@ -297,6 +362,43 @@ void householder_qr_inplace_unblocked(MatrixQR& mat, HCoeffs& hCoeffs, typename
   }
 }
 
+// TODO: add a corresponding public API for updating a QR factorization
+/** \internal
+ * Basically a modified copy of @c Eigen::internal::householder_qr_inplace_unblocked that
+ * performs a rank-1 update of the QR matrix in compact storage. This function assumes, that
+ * the first @c k-1 columns of the matrix @c mat contain the QR decomposition of \f$A^N\f$ up to
+ * column k-1. Then the QR decomposition of the k-th column (given by @c newColumn) is computed by
+ * applying the k-1 Householder projectors on it and finally compute the projector \f$H_k\f$ of
+ * it. On exit the matrix @c mat and the vector @c hCoeffs contain the QR decomposition of the
+ * first k columns of \f$A^N\f$. The \a tempData argument must point to at least mat.cols() scalars.  */
+template <typename MatrixQR, typename HCoeffs, typename VectorQR>
+void householder_qr_inplace_update(MatrixQR& mat, HCoeffs& hCoeffs, const VectorQR& newColumn,
+                                   typename MatrixQR::Index k, typename MatrixQR::Scalar* tempData) {
+  typedef typename MatrixQR::Index Index;
+  typedef typename MatrixQR::RealScalar RealScalar;
+  Index rows = mat.rows();
+
+  eigen_assert(k < mat.cols());
+  eigen_assert(k < rows);
+  eigen_assert(hCoeffs.size() == mat.cols());
+  eigen_assert(newColumn.size() == rows);
+  eigen_assert(tempData);
+
+  // Store new column in mat at column k
+  mat.col(k) = newColumn;
+  // Apply H = H_1...H_{k-1} on newColumn (skip if k=0)
+  for (Index i = 0; i < k; ++i) {
+    Index remainingRows = rows - i;
+    mat.col(k)
+        .tail(remainingRows)
+        .applyHouseholderOnTheLeft(mat.col(i).tail(remainingRows - 1), hCoeffs.coeffRef(i), tempData + i + 1);
+  }
+  // Construct Householder projector in-place in column k
+  RealScalar beta;
+  mat.col(k).tail(rows - k).makeHouseholderInPlace(hCoeffs.coeffRef(k), beta);
+  mat.coeffRef(k, k) = beta;
+}
+
 /** \internal */
 template<typename MatrixQR, typename HCoeffs,
   typename MatrixQRScalar = typename MatrixQR::Scalar,
@@ -356,9 +458,9 @@ struct householder_qr_inplace_blocked
 } // end namespace internal
 
 #ifndef EIGEN_PARSED_BY_DOXYGEN
-template<typename _MatrixType>
+template<typename MatrixType_>
 template<typename RhsType, typename DstType>
-void HouseholderQR<_MatrixType>::_solve_impl(const RhsType &rhs, DstType &dst) const
+void HouseholderQR<MatrixType_>::_solve_impl(const RhsType &rhs, DstType &dst) const
 {
   const Index rank = (std::min)(rows(), cols());
 
@@ -374,9 +476,9 @@ void HouseholderQR<_MatrixType>::_solve_impl(const RhsType &rhs, DstType &dst) c
   dst.bottomRows(cols()-rank).setZero();
 }
 
-template<typename _MatrixType>
+template<typename MatrixType_>
 template<bool Conjugate, typename RhsType, typename DstType>
-void HouseholderQR<_MatrixType>::_solve_impl_transposed(const RhsType &rhs, DstType &dst) const
+void HouseholderQR<MatrixType_>::_solve_impl_transposed(const RhsType &rhs, DstType &dst) const
 {
   const Index rank = (std::min)(rows(), cols());
 
@@ -403,8 +505,6 @@ void HouseholderQR<_MatrixType>::_solve_impl_transposed(const RhsType &rhs, DstT
 template<typename MatrixType>
 void HouseholderQR<MatrixType>::computeInPlace()
 {
-  check_template_parameters();
-  
   Index rows = m_qr.rows();
   Index cols = m_qr.cols();
   Index size = (std::min)(rows,cols);
diff --git a/libs/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h b/libs/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h
index 1dc7d53..57c2f6a 100644
--- a/libs/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h
+++ b/libs/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h
@@ -34,32 +34,41 @@
 #ifndef EIGEN_QR_LAPACKE_H
 #define EIGEN_QR_LAPACKE_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
 
-/** \internal Specialization for the data types supported by LAPACKe */
+namespace lapacke_helpers {
 
-#define EIGEN_LAPACKE_QR_NOPIV(EIGTYPE, LAPACKE_TYPE, LAPACKE_PREFIX) \
-template<typename MatrixQR, typename HCoeffs> \
-struct householder_qr_inplace_blocked<MatrixQR, HCoeffs, EIGTYPE, true> \
-{ \
-  static void run(MatrixQR& mat, HCoeffs& hCoeffs, Index = 32, \
-      typename MatrixQR::Scalar* = 0) \
-  { \
-    lapack_int m = (lapack_int) mat.rows(); \
-    lapack_int n = (lapack_int) mat.cols(); \
-    lapack_int lda = (lapack_int) mat.outerStride(); \
-    lapack_int matrix_order = (MatrixQR::IsRowMajor) ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR; \
-    LAPACKE_##LAPACKE_PREFIX##geqrf( matrix_order, m, n, (LAPACKE_TYPE*)mat.data(), lda, (LAPACKE_TYPE*)hCoeffs.data()); \
-    hCoeffs.adjointInPlace(); \
-  } \
+template<typename MatrixQR, typename HCoeffs>
+struct lapacke_hqr
+{
+  static void run(MatrixQR& mat, HCoeffs& hCoeffs, Index = 32, typename MatrixQR::Scalar* = 0)
+  {
+    lapack_int m = to_lapack(mat.rows());
+    lapack_int n = to_lapack(mat.cols());
+    lapack_int lda = to_lapack(mat.outerStride());
+    lapack_int matrix_order = lapack_storage_of(mat);
+    geqrf(matrix_order, m, n, to_lapack(mat.data()), lda, to_lapack(hCoeffs.data()));
+    hCoeffs.adjointInPlace();
+  }
 };
 
-EIGEN_LAPACKE_QR_NOPIV(double, double, d)
-EIGEN_LAPACKE_QR_NOPIV(float, float, s)
-EIGEN_LAPACKE_QR_NOPIV(dcomplex, lapack_complex_double, z)
-EIGEN_LAPACKE_QR_NOPIV(scomplex, lapack_complex_float, c)
+}
+
+/** \internal Specialization for the data types supported by LAPACKe */
+#define EIGEN_LAPACKE_HH_QR(EIGTYPE) \
+template<typename MatrixQR, typename HCoeffs> \
+struct householder_qr_inplace_blocked<MatrixQR, HCoeffs, EIGTYPE, true> : public lapacke_helpers::lapacke_hqr<MatrixQR, HCoeffs> {};
+
+EIGEN_LAPACKE_HH_QR(double)
+EIGEN_LAPACKE_HH_QR(float)
+EIGEN_LAPACKE_HH_QR(std::complex<double>)
+EIGEN_LAPACKE_HH_QR(std::complex<float>)
+
+#undef EIGEN_LAPACKE_HH_QR
 
 } // end namespace internal
 
diff --git a/libs/eigen/Eigen/src/QR/InternalHeaderCheck.h b/libs/eigen/Eigen/src/QR/InternalHeaderCheck.h
new file mode 100644
index 0000000..bf8df01
--- /dev/null
+++ b/libs/eigen/Eigen/src/QR/InternalHeaderCheck.h
@@ -0,0 +1,3 @@
+#ifndef EIGEN_QR_MODULE_H
+#error "Please include Eigen/QR instead of including headers inside the src directory directly."
+#endif
diff --git a/libs/eigen/Eigen/src/SPQRSupport/InternalHeaderCheck.h b/libs/eigen/Eigen/src/SPQRSupport/InternalHeaderCheck.h
new file mode 100644
index 0000000..8d94ba4
--- /dev/null
+++ b/libs/eigen/Eigen/src/SPQRSupport/InternalHeaderCheck.h
@@ -0,0 +1,3 @@
+#ifndef EIGEN_SPQRSUPPORT_MODULE_H
+#error "Please include Eigen/SPQRSupport instead of including headers inside the src directory directly."
+#endif
diff --git a/libs/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h b/libs/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h
index 013c7ae..36e8ead 100644
--- a/libs/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h
+++ b/libs/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h
@@ -11,6 +11,8 @@
 #ifndef EIGEN_SUITESPARSEQRSUPPORT_H
 #define EIGEN_SUITESPARSEQRSUPPORT_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
   
   template<typename MatrixType> class SPQR; 
@@ -50,21 +52,21 @@ namespace Eigen {
   * R is the sparse triangular factor. Use matrixQR() to get it as SparseMatrix.
   * NOTE : The Index type of R is always SuiteSparse_long. You can get it with SPQR::Index
   *
-  * \tparam _MatrixType The type of the sparse matrix A, must be a column-major SparseMatrix<>
+  * \tparam MatrixType_ The type of the sparse matrix A, must be a column-major SparseMatrix<>
   *
   * \implsparsesolverconcept
   *
   *
   */
-template<typename _MatrixType>
-class SPQR : public SparseSolverBase<SPQR<_MatrixType> >
+template<typename MatrixType_>
+class SPQR : public SparseSolverBase<SPQR<MatrixType_> >
 {
   protected:
-    typedef SparseSolverBase<SPQR<_MatrixType> > Base;
+    typedef SparseSolverBase<SPQR<MatrixType_> > Base;
     using Base::m_isInitialized;
   public:
-    typedef typename _MatrixType::Scalar Scalar;
-    typedef typename _MatrixType::RealScalar RealScalar;
+    typedef typename MatrixType_::Scalar Scalar;
+    typedef typename MatrixType_::RealScalar RealScalar;
     typedef SuiteSparse_long StorageIndex ;
     typedef SparseMatrix<Scalar, ColMajor, StorageIndex> MatrixType;
     typedef Map<PermutationMatrix<Dynamic, Dynamic, StorageIndex> > PermutationType;
@@ -90,7 +92,7 @@ class SPQR : public SparseSolverBase<SPQR<_MatrixType> >
       cholmod_l_start(&m_cc);
     }
     
-    explicit SPQR(const _MatrixType& matrix)
+    explicit SPQR(const MatrixType_& matrix)
       : m_analysisIsOk(false),
         m_factorizationIsOk(false),
         m_isRUpToDate(false),
@@ -122,7 +124,7 @@ class SPQR : public SparseSolverBase<SPQR<_MatrixType> >
       std::free(m_HPinv);
     }
 
-    void compute(const _MatrixType& matrix)
+    void compute(const MatrixType_& matrix)
     {
       if(m_isInitialized) SPQR_free();
 
@@ -137,7 +139,7 @@ class SPQR : public SparseSolverBase<SPQR<_MatrixType> >
       {
         RealScalar max2Norm = 0.0;
         for (int j = 0; j < mat.cols(); j++) max2Norm = numext::maxi(max2Norm, mat.col(j).norm());
-        if(max2Norm==RealScalar(0))
+        if(numext::is_exactly_zero(max2Norm))
           max2Norm = RealScalar(1);
         pivotThreshold = 20 * (mat.rows() + mat.cols()) * max2Norm * NumTraits<RealScalar>::epsilon();
       }
@@ -258,12 +260,12 @@ class SPQR : public SparseSolverBase<SPQR<_MatrixType> >
     int m_ordering; // Ordering method to use, see SPQR's manual
     int m_allow_tol; // Allow to use some tolerance during numerical factorization.
     RealScalar m_tolerance; // treat columns with 2-norm below this tolerance as zero
-    mutable cholmod_sparse *m_cR; // The sparse R factor in cholmod format
+    mutable cholmod_sparse *m_cR = nullptr; // The sparse R factor in cholmod format
     mutable MatrixType m_R; // The sparse matrix R in Eigen format
-    mutable StorageIndex *m_E; // The permutation applied to columns
-    mutable cholmod_sparse *m_H;  //The householder vectors
-    mutable StorageIndex *m_HPinv; // The row permutation of H
-    mutable cholmod_dense *m_HTau; // The Householder coefficients
+    mutable StorageIndex *m_E = nullptr; // The permutation applied to columns
+    mutable cholmod_sparse *m_H = nullptr;  //The householder vectors
+    mutable StorageIndex *m_HPinv = nullptr; // The row permutation of H
+    mutable cholmod_dense *m_HTau = nullptr; // The Householder coefficients
     mutable Index m_rank; // The rank of the matrix
     mutable cholmod_common m_cc; // Workspace and parameters
     bool m_useDefaultThreshold;     // Use default threshold
diff --git a/libs/eigen/Eigen/src/SVD/BDCSVD.h b/libs/eigen/Eigen/src/SVD/BDCSVD.h
index 17f8e44..a69fdca 100644
--- a/libs/eigen/Eigen/src/SVD/BDCSVD.h
+++ b/libs/eigen/Eigen/src/SVD/BDCSVD.h
@@ -1,9 +1,9 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
-// 
+//
 // We used the "A Divide-And-Conquer Algorithm for the Bidiagonal SVD"
 // research report written by Ming Gu and Stanley C.Eisenstat
-// The code variable names correspond to the names they used in their 
+// The code variable names correspond to the names they used in their
 // report
 //
 // Copyright (C) 2013 Gauthier Brun <brun.gauthier@gmail.com>
@@ -27,26 +27,50 @@
 #define eigen_internal_assert(X) assert(X);
 #endif
 
+#include "./InternalHeaderCheck.h"
+
+#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
+#include <iostream>
+#endif
+
 namespace Eigen {
 
 #ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
 IOFormat bdcsvdfmt(8, 0, ", ", "\n", "  [", "]");
 #endif
-  
-template<typename _MatrixType> class BDCSVD;
+
+template <typename MatrixType_, int Options>
+class BDCSVD;
 
 namespace internal {
 
-template<typename _MatrixType> 
-struct traits<BDCSVD<_MatrixType> >
-        : traits<_MatrixType>
-{
-  typedef _MatrixType MatrixType;
-};  
+template <typename MatrixType_, int Options>
+struct traits<BDCSVD<MatrixType_, Options> > : svd_traits<MatrixType_, Options> {
+  typedef MatrixType_ MatrixType;
+};
+
+template <typename MatrixType, int Options>
+struct allocate_small_svd {
+  static void run(JacobiSVD<MatrixType, Options>& smallSvd, Index rows, Index cols, unsigned int computationOptions) {
+    (void)computationOptions;
+    smallSvd = JacobiSVD<MatrixType, Options>(rows, cols);
+  }
+};
+
+EIGEN_DIAGNOSTICS(push)
+EIGEN_DISABLE_DEPRECATED_WARNING
+
+template <typename MatrixType>
+struct allocate_small_svd<MatrixType, 0> {
+  static void run(JacobiSVD<MatrixType>& smallSvd, Index rows, Index cols, unsigned int computationOptions) {
+    smallSvd = JacobiSVD<MatrixType>(rows, cols, computationOptions);
+  }
+};
+
+EIGEN_DIAGNOSTICS(pop)
 
 } // end namespace internal
-  
-  
+
 /** \ingroup SVD_Module
  *
  *
@@ -54,7 +78,14 @@ struct traits<BDCSVD<_MatrixType> >
  *
  * \brief class Bidiagonal Divide and Conquer SVD
  *
- * \tparam _MatrixType the type of the matrix of which we are computing the SVD decomposition
+ * \tparam MatrixType_ the type of the matrix of which we are computing the SVD decomposition
+ *
+ * \tparam Options_ this optional parameter allows one to specify options for computing unitaries \a U and \a V.
+ *                  Possible values are #ComputeThinU, #ComputeThinV, #ComputeFullU, #ComputeFullV, and
+ *                  #DisableQRDecomposition. It is not possible to request both the thin and full version of \a U or
+ *                  \a V. By default, unitaries are not computed. BDCSVD uses R-Bidiagonalization to improve
+ *                  performance on tall and wide matrices. For backwards compatility, the option
+ *                  #DisableQRDecomposition can be used to disable this optimization.
  *
  * This class first reduces the input matrix to bi-diagonal form using class UpperBidiagonalization,
  * and then performs a divide-and-conquer diagonalization. Small blocks are diagonalized using class JacobiSVD.
@@ -69,35 +100,38 @@ struct traits<BDCSVD<_MatrixType> >
  *
  * \sa class JacobiSVD
  */
-template<typename _MatrixType> 
-class BDCSVD : public SVDBase<BDCSVD<_MatrixType> >
-{
+template <typename MatrixType_, int Options_>
+class BDCSVD : public SVDBase<BDCSVD<MatrixType_, Options_> > {
   typedef SVDBase<BDCSVD> Base;
-    
+
 public:
   using Base::rows;
   using Base::cols;
   using Base::computeU;
   using Base::computeV;
-  
-  typedef _MatrixType MatrixType;
-  typedef typename MatrixType::Scalar Scalar;
-  typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar;
+
+  typedef MatrixType_ MatrixType;
+  typedef typename Base::Scalar Scalar;
+  typedef typename Base::RealScalar RealScalar;
   typedef typename NumTraits<RealScalar>::Literal Literal;
+  typedef typename Base::Index Index;
   enum {
-    RowsAtCompileTime = MatrixType::RowsAtCompileTime, 
-    ColsAtCompileTime = MatrixType::ColsAtCompileTime, 
-    DiagSizeAtCompileTime = EIGEN_SIZE_MIN_PREFER_DYNAMIC(RowsAtCompileTime, ColsAtCompileTime), 
-    MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, 
-    MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime, 
-    MaxDiagSizeAtCompileTime = EIGEN_SIZE_MIN_PREFER_FIXED(MaxRowsAtCompileTime, MaxColsAtCompileTime), 
-    MatrixOptions = MatrixType::Options
+    Options = Options_,
+    QRDecomposition = Options & internal::QRPreconditionerBits,
+    ComputationOptions = Options & internal::ComputationOptionsBits,
+    RowsAtCompileTime = Base::RowsAtCompileTime,
+    ColsAtCompileTime = Base::ColsAtCompileTime,
+    DiagSizeAtCompileTime = Base::DiagSizeAtCompileTime,
+    MaxRowsAtCompileTime = Base::MaxRowsAtCompileTime,
+    MaxColsAtCompileTime = Base::MaxColsAtCompileTime,
+    MaxDiagSizeAtCompileTime = Base::MaxDiagSizeAtCompileTime,
+    MatrixOptions = Base::MatrixOptions
   };
 
   typedef typename Base::MatrixUType MatrixUType;
   typedef typename Base::MatrixVType MatrixVType;
   typedef typename Base::SingularValuesType SingularValuesType;
-  
+
   typedef Matrix<Scalar, Dynamic, Dynamic, ColMajor> MatrixX;
   typedef Matrix<RealScalar, Dynamic, Dynamic, ColMajor> MatrixXr;
   typedef Matrix<RealScalar, Dynamic, 1> VectorType;
@@ -114,70 +148,95 @@ public:
   BDCSVD() : m_algoswap(16), m_isTranspose(false), m_compU(false), m_compV(false), m_numIters(0)
   {}
 
+  /** \brief Default Constructor with memory preallocation
+   *
+   * Like the default constructor but with preallocation of the internal data
+   * according to the specified problem size and \a Options template parameter.
+   * \sa BDCSVD()
+   */
+  BDCSVD(Index rows, Index cols) : m_algoswap(16), m_numIters(0) {
+    allocate(rows, cols, internal::get_computation_options(Options));
+  }
 
   /** \brief Default Constructor with memory preallocation
    *
    * Like the default constructor but with preallocation of the internal data
-   * according to the specified problem size.
+   * according to the specified problem size and the \a computationOptions.
+   *
+   * One \b cannot request unitiaries using both the \a Options template parameter
+   * and the constructor. If possible, prefer using the \a Options template parameter.
+   *
+   * \param computationOptions specifification for computing Thin/Full unitaries U/V
    * \sa BDCSVD()
+   *
+   * \deprecated Will be removed in the next major Eigen version. Options should
+   * be specified in the \a Options template parameter.
    */
-  BDCSVD(Index rows, Index cols, unsigned int computationOptions = 0)
-    : m_algoswap(16), m_numIters(0)
-  {
+  EIGEN_DEPRECATED
+  BDCSVD(Index rows, Index cols, unsigned int computationOptions) : m_algoswap(16), m_numIters(0) {
+    internal::check_svd_options_assertions<MatrixType, Options>(computationOptions, rows, cols);
     allocate(rows, cols, computationOptions);
   }
 
-  /** \brief Constructor performing the decomposition of given matrix.
+  /** \brief Constructor performing the decomposition of given matrix, using the custom options specified
+   *         with the \a Options template paramter.
    *
    * \param matrix the matrix to decompose
-   * \param computationOptions optional parameter allowing to specify if you want full or thin U or V unitaries to be computed.
-   *                           By default, none is computed. This is a bit - field, the possible bits are #ComputeFullU, #ComputeThinU, 
-   *                           #ComputeFullV, #ComputeThinV.
-   *
-   * Thin unitaries are only available if your matrix type has a Dynamic number of columns (for example MatrixXf). They also are not
-   * available with the (non - default) FullPivHouseholderQR preconditioner.
    */
-  BDCSVD(const MatrixType& matrix, unsigned int computationOptions = 0)
-    : m_algoswap(16), m_numIters(0)
-  {
-    compute(matrix, computationOptions);
+  BDCSVD(const MatrixType& matrix) : m_algoswap(16), m_numIters(0) {
+    compute_impl(matrix, internal::get_computation_options(Options));
   }
 
-  ~BDCSVD() 
-  {
+  /** \brief Constructor performing the decomposition of given matrix using specified options
+   *         for computing unitaries.
+   *
+   *  One \b cannot request unitiaries using both the \a Options template parameter
+   *  and the constructor. If possible, prefer using the \a Options template parameter.
+   *
+   * \param matrix the matrix to decompose
+   * \param computationOptions specifification for computing Thin/Full unitaries U/V
+   *
+   * \deprecated Will be removed in the next major Eigen version. Options should
+   * be specified in the \a Options template parameter.
+   */
+  EIGEN_DEPRECATED
+  BDCSVD(const MatrixType& matrix, unsigned int computationOptions) : m_algoswap(16), m_numIters(0) {
+    internal::check_svd_options_assertions<MatrixType, Options>(computationOptions, matrix.rows(), matrix.cols());
+    compute_impl(matrix, computationOptions);
   }
+
+  ~BDCSVD() {}
+
+  /** \brief Method performing the decomposition of given matrix. Computes Thin/Full unitaries U/V if specified
+   *         using the \a Options template parameter or the class constructor.
+   *
+   * \param matrix the matrix to decompose
+   */
+  BDCSVD& compute(const MatrixType& matrix) { return compute_impl(matrix, m_computationOptions); }
   
-  /** \brief Method performing the decomposition of given matrix using custom options.
+  /** \brief Method performing the decomposition of given matrix, as specified by
+   *         the `computationOptions` parameter.
    *
    * \param matrix the matrix to decompose
-   * \param computationOptions optional parameter allowing to specify if you want full or thin U or V unitaries to be computed.
-   *                           By default, none is computed. This is a bit - field, the possible bits are #ComputeFullU, #ComputeThinU, 
-   *                           #ComputeFullV, #ComputeThinV.
-   *
-   * Thin unitaries are only available if your matrix type has a Dynamic number of columns (for example MatrixXf). They also are not
-   * available with the (non - default) FullPivHouseholderQR preconditioner.
+   * \param computationOptions specify whether to compute Thin/Full unitaries U/V
+   * 
+   * \deprecated Will be removed in the next major Eigen version. Options should
+   * be specified in the \a Options template parameter.
    */
-  BDCSVD& compute(const MatrixType& matrix, unsigned int computationOptions);
-
-  /** \brief Method performing the decomposition of given matrix using current options.
-   *
-   * \param matrix the matrix to decompose
-   *
-   * This method uses the current \a computationOptions, as already passed to the constructor or to compute(const MatrixType&, unsigned int).
-   */
-  BDCSVD& compute(const MatrixType& matrix)
-  {
-    return compute(matrix, this->m_computationOptions);
+  EIGEN_DEPRECATED
+  BDCSVD& compute(const MatrixType& matrix, unsigned int computationOptions) {
+    internal::check_svd_options_assertions<MatrixType, Options>(computationOptions, matrix.rows(), matrix.cols());
+    return compute_impl(matrix, computationOptions);
   }
 
-  void setSwitchSize(int s) 
+  void setSwitchSize(int s)
   {
-    eigen_assert(s>3 && "BDCSVD the size of the algo switch has to be greater than 3");
+    eigen_assert(s>=3 && "BDCSVD the size of the algo switch has to be at least 3.");
     m_algoswap = s;
   }
- 
+
 private:
-  void allocate(Index rows, Index cols, unsigned int computationOptions);
+  BDCSVD& compute_impl(const MatrixType& matrix, unsigned int computationOptions);
   void divide(Index firstCol, Index lastCol, Index firstRowW, Index firstColW, Index shift);
   void computeSVDofM(Index firstCol, Index n, MatrixXr& U, VectorType& singVals, MatrixXr& V);
   void computeSingVals(const ArrayRef& col0, const ArrayRef& diag, const IndicesRef& perm, VectorType& singVals, ArrayRef shifts, ArrayRef mus);
@@ -190,84 +249,107 @@ private:
   void copyUV(const HouseholderU &householderU, const HouseholderV &householderV, const NaiveU &naiveU, const NaiveV &naivev);
   void structured_update(Block<MatrixXr,Dynamic,Dynamic> A, const MatrixXr &B, Index n1);
   static RealScalar secularEq(RealScalar x, const ArrayRef& col0, const ArrayRef& diag, const IndicesRef &perm, const ArrayRef& diagShifted, RealScalar shift);
+  template <typename SVDType>
+  void computeBaseCase(SVDType& svd, Index n, Index firstCol, Index firstRowW, Index firstColW, Index shift);
 
-protected:
+ protected:
+  void allocate(Index rows, Index cols, unsigned int computationOptions);
   MatrixXr m_naiveU, m_naiveV;
   MatrixXr m_computed;
   Index m_nRec;
   ArrayXr m_workspace;
   ArrayXi m_workspaceI;
   int m_algoswap;
-  bool m_isTranspose, m_compU, m_compV;
-  
-  using Base::m_singularValues;
-  using Base::m_diagSize;
-  using Base::m_computeFullU;
-  using Base::m_computeFullV;
+  bool m_isTranspose, m_compU, m_compV, m_useQrDecomp;
+  JacobiSVD<MatrixType, ComputationOptions> smallSvd;
+  HouseholderQR<MatrixX> qrDecomp;
+  internal::UpperBidiagonalization<MatrixX> bid;
+  MatrixX copyWorkspace;
+  MatrixX reducedTriangle;
+
+  using Base::m_computationOptions;
   using Base::m_computeThinU;
   using Base::m_computeThinV;
-  using Base::m_matrixU;
-  using Base::m_matrixV;
+  using Base::m_diagSize;
   using Base::m_info;
   using Base::m_isInitialized;
+  using Base::m_matrixU;
+  using Base::m_matrixV;
   using Base::m_nonzeroSingularValues;
+  using Base::m_singularValues;
 
-public:  
+ public:
   int m_numIters;
-}; //end class BDCSVD
-
+};  // end class BDCSVD
 
 // Method to allocate and initialize matrix and attributes
-template<typename MatrixType>
-void BDCSVD<MatrixType>::allocate(Eigen::Index rows, Eigen::Index cols, unsigned int computationOptions)
-{
-  m_isTranspose = (cols > rows);
-
+template <typename MatrixType, int Options>
+void BDCSVD<MatrixType, Options>::allocate(Index rows, Index cols, unsigned int computationOptions) {
   if (Base::allocate(rows, cols, computationOptions))
     return;
-  
+
+  if (cols < m_algoswap)
+    internal::allocate_small_svd<MatrixType, ComputationOptions>::run(smallSvd, rows, cols, computationOptions);
+
   m_computed = MatrixXr::Zero(m_diagSize + 1, m_diagSize );
   m_compU = computeV();
   m_compV = computeU();
+  m_isTranspose = (cols > rows);
   if (m_isTranspose)
     std::swap(m_compU, m_compV);
-  
+
+  // kMinAspectRatio is the crossover point that determines if we perform R-Bidiagonalization
+  // or bidiagonalize the input matrix directly.
+  // It is based off of LAPACK's dgesdd routine, which uses 11.0/6.0
+  // we use a larger scalar to prevent a regression for relatively square matrices.
+  constexpr Index kMinAspectRatio = 4;
+  constexpr bool disableQrDecomp = static_cast<int>(QRDecomposition) == static_cast<int>(DisableQRDecomposition);
+  m_useQrDecomp = !disableQrDecomp && ((rows / kMinAspectRatio > cols) || (cols / kMinAspectRatio > rows));
+  if (m_useQrDecomp) {
+    qrDecomp = HouseholderQR<MatrixX>((std::max)(rows, cols), (std::min)(rows, cols));
+    reducedTriangle = MatrixX(m_diagSize, m_diagSize);
+  }
+
+  copyWorkspace = MatrixX(m_isTranspose ? cols : rows, m_isTranspose ? rows : cols);
+  bid = internal::UpperBidiagonalization<MatrixX>(m_useQrDecomp ? m_diagSize : copyWorkspace.rows(),
+                                                  m_useQrDecomp ? m_diagSize : copyWorkspace.cols());
+
   if (m_compU) m_naiveU = MatrixXr::Zero(m_diagSize + 1, m_diagSize + 1 );
   else         m_naiveU = MatrixXr::Zero(2, m_diagSize + 1 );
-  
+
   if (m_compV) m_naiveV = MatrixXr::Zero(m_diagSize, m_diagSize);
-  
+
   m_workspace.resize((m_diagSize+1)*(m_diagSize+1)*3);
   m_workspaceI.resize(3*m_diagSize);
-}// end allocate
+}  // end allocate
 
-template<typename MatrixType>
-BDCSVD<MatrixType>& BDCSVD<MatrixType>::compute(const MatrixType& matrix, unsigned int computationOptions) 
-{
+template <typename MatrixType, int Options>
+BDCSVD<MatrixType, Options>& BDCSVD<MatrixType, Options>::compute_impl(const MatrixType& matrix,
+                                                                       unsigned int computationOptions) {
 #ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
   std::cout << "\n\n\n======================================================================================================================\n\n\n";
 #endif
-  allocate(matrix.rows(), matrix.cols(), computationOptions);
   using std::abs;
 
+  allocate(matrix.rows(), matrix.cols(), computationOptions);
+
   const RealScalar considerZero = (std::numeric_limits<RealScalar>::min)();
-  
+
   //**** step -1 - If the problem is too small, directly falls back to JacobiSVD and return
   if(matrix.cols() < m_algoswap)
   {
-    // FIXME this line involves temporaries
-    JacobiSVD<MatrixType> jsvd(matrix,computationOptions);
+    smallSvd.compute(matrix);
     m_isInitialized = true;
-    m_info = jsvd.info();
+    m_info = smallSvd.info();
     if (m_info == Success || m_info == NoConvergence) {
-      if(computeU()) m_matrixU = jsvd.matrixU();
-      if(computeV()) m_matrixV = jsvd.matrixV();
-      m_singularValues = jsvd.singularValues();
-      m_nonzeroSingularValues = jsvd.nonzeroSingularValues();
+      if (computeU()) m_matrixU = smallSvd.matrixU();
+      if (computeV()) m_matrixV = smallSvd.matrixV();
+      m_singularValues = smallSvd.singularValues();
+      m_nonzeroSingularValues = smallSvd.nonzeroSingularValues();
     }
     return *this;
   }
-  
+
   //**** step 0 - Copy the input matrix and apply scaling to reduce over/under-flows
   RealScalar scale = matrix.cwiseAbs().template maxCoeff<PropagateNaN>();
   if (!(numext::isfinite)(scale)) {
@@ -276,14 +358,23 @@ BDCSVD<MatrixType>& BDCSVD<MatrixType>::compute(const MatrixType& matrix, unsign
     return *this;
   }
 
-  if(scale==Literal(0)) scale = Literal(1);
-  MatrixX copy;
-  if (m_isTranspose) copy = matrix.adjoint()/scale;
-  else               copy = matrix/scale;
-  
-  //**** step 1 - Bidiagonalization
-  // FIXME this line involves temporaries
-  internal::UpperBidiagonalization<MatrixX> bid(copy);
+  if(numext::is_exactly_zero(scale)) scale = Literal(1);
+
+  if (m_isTranspose) copyWorkspace = matrix.adjoint() / scale;
+  else copyWorkspace = matrix / scale;
+
+  //**** step 1 - Bidiagonalization.
+  // If the problem is sufficiently rectangular, we perform R-Bidiagonalization: compute A = Q(R/0)
+  // and then bidiagonalize R. Otherwise, if the problem is relatively square, we
+  // bidiagonalize the input matrix directly.
+  if (m_useQrDecomp) {
+    qrDecomp.compute(copyWorkspace);
+    reducedTriangle = qrDecomp.matrixQR().topRows(m_diagSize);
+    reducedTriangle.template triangularView<StrictlyLower>().setZero();
+    bid.compute(reducedTriangle);
+  } else {
+    bid.compute(copyWorkspace);
+  }
 
   //**** step 2 - Divide & Conquer
   m_naiveU.setZero();
@@ -296,7 +387,7 @@ BDCSVD<MatrixType>& BDCSVD<MatrixType>::compute(const MatrixType& matrix, unsign
     m_isInitialized = true;
     return *this;
   }
-    
+
   //**** step 3 - Copy singular values and vectors
   for (int i=0; i<m_diagSize; i++)
   {
@@ -315,36 +406,41 @@ BDCSVD<MatrixType>& BDCSVD<MatrixType>::compute(const MatrixType& matrix, unsign
     }
   }
 
-#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
-//   std::cout << "m_naiveU\n" << m_naiveU << "\n\n";
-//   std::cout << "m_naiveV\n" << m_naiveV << "\n\n";
-#endif
+  //**** step 4 - Finalize unitaries U and V
   if(m_isTranspose) copyUV(bid.householderV(), bid.householderU(), m_naiveV, m_naiveU);
   else              copyUV(bid.householderU(), bid.householderV(), m_naiveU, m_naiveV);
 
+  if (m_useQrDecomp) {
+    if (m_isTranspose && computeV()) m_matrixV.applyOnTheLeft(qrDecomp.householderQ());
+    else if (!m_isTranspose && computeU()) m_matrixU.applyOnTheLeft(qrDecomp.householderQ());
+  }
+
   m_isInitialized = true;
   return *this;
-}// end compute
+}  // end compute
 
-
-template<typename MatrixType>
-template<typename HouseholderU, typename HouseholderV, typename NaiveU, typename NaiveV>
-void BDCSVD<MatrixType>::copyUV(const HouseholderU &householderU, const HouseholderV &householderV, const NaiveU &naiveU, const NaiveV &naiveV)
-{
+template <typename MatrixType, int Options>
+template <typename HouseholderU, typename HouseholderV, typename NaiveU, typename NaiveV>
+void BDCSVD<MatrixType, Options>::copyUV(const HouseholderU& householderU, const HouseholderV& householderV,
+                                         const NaiveU& naiveU, const NaiveV& naiveV) {
   // Note exchange of U and V: m_matrixU is set from m_naiveV and vice versa
   if (computeU())
   {
-    Index Ucols = m_computeThinU ? m_diagSize : householderU.cols();
-    m_matrixU = MatrixX::Identity(householderU.cols(), Ucols);
+    Index Ucols = m_computeThinU ? m_diagSize : rows();
+    m_matrixU = MatrixX::Identity(rows(), Ucols);
     m_matrixU.topLeftCorner(m_diagSize, m_diagSize) = naiveV.template cast<Scalar>().topLeftCorner(m_diagSize, m_diagSize);
-    householderU.applyThisOnTheLeft(m_matrixU); // FIXME this line involves a temporary buffer
+    // FIXME the following conditionals involve temporary buffers
+    if (m_useQrDecomp) m_matrixU.topLeftCorner(householderU.cols(), m_diagSize).applyOnTheLeft(householderU);
+    else m_matrixU.applyOnTheLeft(householderU);
   }
   if (computeV())
   {
-    Index Vcols = m_computeThinV ? m_diagSize : householderV.cols();
-    m_matrixV = MatrixX::Identity(householderV.cols(), Vcols);
+    Index Vcols = m_computeThinV ? m_diagSize : cols();
+    m_matrixV = MatrixX::Identity(cols(), Vcols);
     m_matrixV.topLeftCorner(m_diagSize, m_diagSize) = naiveU.template cast<Scalar>().topLeftCorner(m_diagSize, m_diagSize);
-    householderV.applyThisOnTheLeft(m_matrixV); // FIXME this line involves a temporary buffer
+    // FIXME the following conditionals involve temporary buffers
+    if (m_useQrDecomp) m_matrixV.topLeftCorner(householderV.cols(), m_diagSize).applyOnTheLeft(householderV);
+    else m_matrixV.applyOnTheLeft(householderV);
   }
 }
 
@@ -356,9 +452,8 @@ void BDCSVD<MatrixType>::copyUV(const HouseholderU &householderU, const Househol
   * We can thus pack them prior to the the matrix product. However, this is only worth the effort if the matrix is large
   * enough.
   */
-template<typename MatrixType>
-void BDCSVD<MatrixType>::structured_update(Block<MatrixXr,Dynamic,Dynamic> A, const MatrixXr &B, Index n1)
-{
+template <typename MatrixType, int Options>
+void BDCSVD<MatrixType, Options>::structured_update(Block<MatrixXr, Dynamic, Dynamic> A, const MatrixXr& B, Index n1) {
   Index n = A.rows();
   if(n>100)
   {
@@ -385,7 +480,7 @@ void BDCSVD<MatrixType>::structured_update(Block<MatrixXr,Dynamic,Dynamic> A, co
         ++k2;
       }
     }
-  
+
     A.topRows(n1).noalias()    = A1.leftCols(k1) * B1.topRows(k1);
     A.bottomRows(n2).noalias() = A2.leftCols(k2) * B2.topRows(k2);
   }
@@ -397,19 +492,37 @@ void BDCSVD<MatrixType>::structured_update(Block<MatrixXr,Dynamic,Dynamic> A, co
   }
 }
 
-// The divide algorithm is done "in place", we are always working on subsets of the same matrix. The divide methods takes as argument the 
-// place of the submatrix we are currently working on.
+template <typename MatrixType, int Options>
+template <typename SVDType>
+void BDCSVD<MatrixType, Options>::computeBaseCase(SVDType& svd, Index n, Index firstCol, Index firstRowW,
+                                                  Index firstColW, Index shift) {
+  svd.compute(m_computed.block(firstCol, firstCol, n + 1, n));
+  m_info = svd.info();
+  if (m_info != Success && m_info != NoConvergence) return;
+  if (m_compU)
+    m_naiveU.block(firstCol, firstCol, n + 1, n + 1).real() = svd.matrixU();
+  else {
+    m_naiveU.row(0).segment(firstCol, n + 1).real() = svd.matrixU().row(0);
+    m_naiveU.row(1).segment(firstCol, n + 1).real() = svd.matrixU().row(n);
+  }
+  if (m_compV) m_naiveV.block(firstRowW, firstColW, n, n).real() = svd.matrixV();
+  m_computed.block(firstCol + shift, firstCol + shift, n + 1, n).setZero();
+  m_computed.diagonal().segment(firstCol + shift, n) = svd.singularValues().head(n);
+}
+
+// The divide algorithm is done "in place", we are always working on subsets of the same matrix. The divide methods
+// takes as argument the place of the submatrix we are currently working on.
 
 //@param firstCol : The Index of the first column of the submatrix of m_computed and for m_naiveU;
-//@param lastCol : The Index of the last column of the submatrix of m_computed and for m_naiveU; 
+//@param lastCol : The Index of the last column of the submatrix of m_computed and for m_naiveU;
 // lastCol + 1 - firstCol is the size of the submatrix.
 //@param firstRowW : The Index of the first row of the matrix W that we are to change. (see the reference paper section 1 for more information on W)
-//@param firstRowW : Same as firstRowW with the column.
-//@param shift : Each time one takes the left submatrix, one must add 1 to the shift. Why? Because! We actually want the last column of the U submatrix 
+//@param firstColW : Same as firstRowW with the column.
+//@param shift : Each time one takes the left submatrix, one must add 1 to the shift. Why? Because! We actually want the last column of the U submatrix
 // to become the first column (*coeff) and to shift all the other columns to the right. There are more details on the reference paper.
-template<typename MatrixType>
-void BDCSVD<MatrixType>::divide(Eigen::Index firstCol, Eigen::Index lastCol, Eigen::Index firstRowW, Eigen::Index firstColW, Eigen::Index shift)
-{
+template <typename MatrixType, int Options>
+void BDCSVD<MatrixType, Options>::divide(Index firstCol, Index lastCol, Index firstRowW,
+                                         Index firstColW, Index shift) {
   // requires rows = cols + 1;
   using std::pow;
   using std::sqrt;
@@ -418,36 +531,30 @@ void BDCSVD<MatrixType>::divide(Eigen::Index firstCol, Eigen::Index lastCol, Eig
   const Index k = n/2;
   const RealScalar considerZero = (std::numeric_limits<RealScalar>::min)();
   RealScalar alphaK;
-  RealScalar betaK; 
-  RealScalar r0; 
+  RealScalar betaK;
+  RealScalar r0;
   RealScalar lambda, phi, c0, s0;
   VectorType l, f;
-  // We use the other algorithm which is more efficient for small 
+  // We use the other algorithm which is more efficient for small
   // matrices.
   if (n < m_algoswap)
   {
-    // FIXME this line involves temporaries
-    JacobiSVD<MatrixXr> b(m_computed.block(firstCol, firstCol, n + 1, n), ComputeFullU | (m_compV ? ComputeFullV : 0));
-    m_info = b.info();
-    if (m_info != Success && m_info != NoConvergence) return;
-    if (m_compU)
-      m_naiveU.block(firstCol, firstCol, n + 1, n + 1).real() = b.matrixU();
-    else 
-    {
-      m_naiveU.row(0).segment(firstCol, n + 1).real() = b.matrixU().row(0);
-      m_naiveU.row(1).segment(firstCol, n + 1).real() = b.matrixU().row(n);
+    // FIXME this block involves temporaries
+    if (m_compV) {
+      JacobiSVD<MatrixXr, ComputeFullU | ComputeFullV> baseSvd;
+      computeBaseCase(baseSvd, n, firstCol, firstRowW, firstColW, shift);
+    } else {
+      JacobiSVD<MatrixXr, ComputeFullU> baseSvd;
+      computeBaseCase(baseSvd, n, firstCol, firstRowW, firstColW, shift);
     }
-    if (m_compV) m_naiveV.block(firstRowW, firstColW, n, n).real() = b.matrixV();
-    m_computed.block(firstCol + shift, firstCol + shift, n + 1, n).setZero();
-    m_computed.diagonal().segment(firstCol + shift, n) = b.singularValues().head(n);
     return;
   }
   // We use the divide and conquer algorithm
   alphaK =  m_computed(firstCol + k, firstCol + k);
   betaK = m_computed(firstCol + k + 1, firstCol + k);
   // The divide must be done in that order in order to have good results. Divide change the data inside the submatrices
-  // and the divide of the right submatrice reads one column of the left submatrice. That's why we need to treat the 
-  // right submatrix before the left one. 
+  // and the divide of the right submatrice reads one column of the left submatrice. That's why we need to treat the
+  // right submatrix before the left one.
   divide(k + 1 + firstCol, lastCol, k + 1 + firstRowW, k + 1 + firstColW, shift);
   if (m_info != Success && m_info != NoConvergence) return;
   divide(firstCol, k - 1 + firstCol, firstRowW, firstColW + 1, shift + 1);
@@ -457,8 +564,8 @@ void BDCSVD<MatrixType>::divide(Eigen::Index firstCol, Eigen::Index lastCol, Eig
   {
     lambda = m_naiveU(firstCol + k, firstCol + k);
     phi = m_naiveU(firstCol + k + 1, lastCol + 1);
-  } 
-  else 
+  }
+  else
   {
     lambda = m_naiveU(1, firstCol + k);
     phi = m_naiveU(0, lastCol + 1);
@@ -468,8 +575,8 @@ void BDCSVD<MatrixType>::divide(Eigen::Index firstCol, Eigen::Index lastCol, Eig
   {
     l = m_naiveU.row(firstCol + k).segment(firstCol, k);
     f = m_naiveU.row(firstCol + k + 1).segment(firstCol + k + 1, n - k - 1);
-  } 
-  else 
+  }
+  else
   {
     l = m_naiveU.row(1).segment(firstCol, k);
     f = m_naiveU.row(0).segment(firstCol + k + 1, n - k - 1);
@@ -485,52 +592,52 @@ void BDCSVD<MatrixType>::divide(Eigen::Index firstCol, Eigen::Index lastCol, Eig
     c0 = alphaK * lambda / r0;
     s0 = betaK * phi / r0;
   }
-  
+
 #ifdef EIGEN_BDCSVD_SANITY_CHECKS
-  assert(m_naiveU.allFinite());
-  assert(m_naiveV.allFinite());
-  assert(m_computed.allFinite());
+  eigen_internal_assert(m_naiveU.allFinite());
+  eigen_internal_assert(m_naiveV.allFinite());
+  eigen_internal_assert(m_computed.allFinite());
 #endif
-  
+
   if (m_compU)
   {
-    MatrixXr q1 (m_naiveU.col(firstCol + k).segment(firstCol, k + 1));     
+    MatrixXr q1 (m_naiveU.col(firstCol + k).segment(firstCol, k + 1));
     // we shiftW Q1 to the right
-    for (Index i = firstCol + k - 1; i >= firstCol; i--) 
+    for (Index i = firstCol + k - 1; i >= firstCol; i--)
       m_naiveU.col(i + 1).segment(firstCol, k + 1) = m_naiveU.col(i).segment(firstCol, k + 1);
     // we shift q1 at the left with a factor c0
     m_naiveU.col(firstCol).segment( firstCol, k + 1) = (q1 * c0);
     // last column = q1 * - s0
     m_naiveU.col(lastCol + 1).segment(firstCol, k + 1) = (q1 * ( - s0));
     // first column = q2 * s0
-    m_naiveU.col(firstCol).segment(firstCol + k + 1, n - k) = m_naiveU.col(lastCol + 1).segment(firstCol + k + 1, n - k) * s0; 
+    m_naiveU.col(firstCol).segment(firstCol + k + 1, n - k) = m_naiveU.col(lastCol + 1).segment(firstCol + k + 1, n - k) * s0;
     // q2 *= c0
     m_naiveU.col(lastCol + 1).segment(firstCol + k + 1, n - k) *= c0;
-  } 
-  else 
+  }
+  else
   {
     RealScalar q1 = m_naiveU(0, firstCol + k);
     // we shift Q1 to the right
-    for (Index i = firstCol + k - 1; i >= firstCol; i--) 
+    for (Index i = firstCol + k - 1; i >= firstCol; i--)
       m_naiveU(0, i + 1) = m_naiveU(0, i);
     // we shift q1 at the left with a factor c0
     m_naiveU(0, firstCol) = (q1 * c0);
     // last column = q1 * - s0
     m_naiveU(0, lastCol + 1) = (q1 * ( - s0));
     // first column = q2 * s0
-    m_naiveU(1, firstCol) = m_naiveU(1, lastCol + 1) *s0; 
+    m_naiveU(1, firstCol) = m_naiveU(1, lastCol + 1) *s0;
     // q2 *= c0
     m_naiveU(1, lastCol + 1) *= c0;
     m_naiveU.row(1).segment(firstCol + 1, k).setZero();
     m_naiveU.row(0).segment(firstCol + k + 1, n - k - 1).setZero();
   }
-  
+
 #ifdef EIGEN_BDCSVD_SANITY_CHECKS
-  assert(m_naiveU.allFinite());
-  assert(m_naiveV.allFinite());
-  assert(m_computed.allFinite());
+  eigen_internal_assert(m_naiveU.allFinite());
+  eigen_internal_assert(m_naiveV.allFinite());
+  eigen_internal_assert(m_computed.allFinite());
 #endif
-  
+
   m_computed(firstCol + shift, firstCol + shift) = r0;
   m_computed.col(firstCol + shift).segment(firstCol + shift + 1, k) = alphaK * l.transpose().real();
   m_computed.col(firstCol + shift).segment(firstCol + shift + k + 1, n - k - 1) = betaK * f.transpose().real();
@@ -547,21 +654,21 @@ void BDCSVD<MatrixType>::divide(Eigen::Index firstCol, Eigen::Index lastCol, Eig
   std::cout << "err:      " << ((tmp1-tmp2).abs()>1e-12*tmp2.abs()).transpose() << "\n";
   static int count = 0;
   std::cout << "# " << ++count << "\n\n";
-  assert((tmp1-tmp2).matrix().norm() < 1e-14*tmp2.matrix().norm());
-//   assert(count<681);
-//   assert(((tmp1-tmp2).abs()<1e-13*tmp2.abs()).all());
+  eigen_internal_assert((tmp1-tmp2).matrix().norm() < 1e-14*tmp2.matrix().norm());
+//   eigen_internal_assert(count<681);
+//   eigen_internal_assert(((tmp1-tmp2).abs()<1e-13*tmp2.abs()).all());
 #endif
-  
+
   // Third part: compute SVD of combined matrix
   MatrixXr UofSVD, VofSVD;
   VectorType singVals;
   computeSVDofM(firstCol + shift, n, UofSVD, singVals, VofSVD);
-  
+
 #ifdef EIGEN_BDCSVD_SANITY_CHECKS
-  assert(UofSVD.allFinite());
-  assert(VofSVD.allFinite());
+  eigen_internal_assert(UofSVD.allFinite());
+  eigen_internal_assert(VofSVD.allFinite());
 #endif
-  
+
   if (m_compU)
     structured_update(m_naiveU.block(firstCol, firstCol, n + 1, n + 1), UofSVD, (n+2)/2);
   else
@@ -570,18 +677,18 @@ void BDCSVD<MatrixType>::divide(Eigen::Index firstCol, Eigen::Index lastCol, Eig
     tmp.noalias() = m_naiveU.middleCols(firstCol, n+1) * UofSVD;
     m_naiveU.middleCols(firstCol, n + 1) = tmp;
   }
-  
+
   if (m_compV)  structured_update(m_naiveV.block(firstRowW, firstColW, n, n), VofSVD, (n+1)/2);
-  
+
 #ifdef EIGEN_BDCSVD_SANITY_CHECKS
-  assert(m_naiveU.allFinite());
-  assert(m_naiveV.allFinite());
-  assert(m_computed.allFinite());
+  eigen_internal_assert(m_naiveU.allFinite());
+  eigen_internal_assert(m_naiveV.allFinite());
+  eigen_internal_assert(m_computed.allFinite());
 #endif
-  
+
   m_computed.block(firstCol + shift, firstCol + shift, n, n).setZero();
   m_computed.block(firstCol + shift, firstCol + shift, n, n).diagonal() = singVals;
-}// end divide
+}  // end divide
 
 // Compute SVD of m_computed.block(firstCol, firstCol, n + 1, n); this block only has non-zeros in
 // the first column and on the diagonal and has undergone deflation, so diagonal is in increasing
@@ -591,9 +698,9 @@ void BDCSVD<MatrixType>::divide(Eigen::Index firstCol, Eigen::Index lastCol, Eig
 // TODO Opportunities for optimization: better root finding algo, better stopping criterion, better
 // handling of round-off errors, be consistent in ordering
 // For instance, to solve the secular equation using FMM, see http://www.stat.uchicago.edu/~lekheng/courses/302/classics/greengard-rokhlin.pdf
-template <typename MatrixType>
-void BDCSVD<MatrixType>::computeSVDofM(Eigen::Index firstCol, Eigen::Index n, MatrixXr& U, VectorType& singVals, MatrixXr& V)
-{
+template <typename MatrixType, int Options>
+void BDCSVD<MatrixType, Options>::computeSVDofM(Index firstCol, Index n, MatrixXr& U,
+                                                VectorType& singVals, MatrixXr& V) {
   const RealScalar considerZero = (std::numeric_limits<RealScalar>::min)();
   using std::abs;
   ArrayRef col0 = m_computed.col(firstCol).segment(firstCol, n);
@@ -610,18 +717,21 @@ void BDCSVD<MatrixType>::computeSVDofM(Eigen::Index firstCol, Eigen::Index n, Ma
   if (col0.hasNaN() || diag.hasNaN())
     std::cout << "\n\nHAS NAN\n\n";
 #endif
-  
+
   // Many singular values might have been deflated, the zero ones have been moved to the end,
   // but others are interleaved and we must ignore them at this stage.
   // To this end, let's compute a permutation skipping them:
   Index actual_n = n;
-  while(actual_n>1 && diag(actual_n-1)==Literal(0)) {--actual_n; eigen_internal_assert(col0(actual_n)==Literal(0)); }
+  while(actual_n>1 && numext::is_exactly_zero(diag(actual_n - 1))) {
+    --actual_n;
+    eigen_internal_assert(numext::is_exactly_zero(col0(actual_n)));
+  }
   Index m = 0; // size of the deflated problem
   for(Index k=0;k<actual_n;++k)
     if(abs(col0(k))>considerZero)
       m_workspaceI(m++) = k;
   Map<ArrayXi> perm(m_workspaceI.data(),m);
-  
+
   Map<ArrayXr> shifts(m_workspace.data()+1*n, n);
   Map<ArrayXr> mus(m_workspace.data()+2*n, n);
   Map<ArrayXr> zhat(m_workspace.data()+3*n, n);
@@ -631,58 +741,58 @@ void BDCSVD<MatrixType>::computeSVDofM(Eigen::Index firstCol, Eigen::Index n, Ma
   std::cout << "  z: " << col0.transpose() << "\n";
   std::cout << "  d: " << diag.transpose() << "\n";
 #endif
-  
+
   // Compute singVals, shifts, and mus
   computeSingVals(col0, diag, perm, singVals, shifts, mus);
-  
+
 #ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
   std::cout << "  j:        " << (m_computed.block(firstCol, firstCol, n, n)).jacobiSvd().singularValues().transpose().reverse() << "\n\n";
   std::cout << "  sing-val: " << singVals.transpose() << "\n";
   std::cout << "  mu:       " << mus.transpose() << "\n";
   std::cout << "  shift:    " << shifts.transpose() << "\n";
-  
+
   {
     std::cout << "\n\n    mus:    " << mus.head(actual_n).transpose() << "\n\n";
     std::cout << "    check1 (expect0) : " << ((singVals.array()-(shifts+mus)) / singVals.array()).head(actual_n).transpose() << "\n\n";
-    assert((((singVals.array()-(shifts+mus)) / singVals.array()).head(actual_n) >= 0).all());
+    eigen_internal_assert((((singVals.array()-(shifts+mus)) / singVals.array()).head(actual_n) >= 0).all());
     std::cout << "    check2 (>0)      : " << ((singVals.array()-diag) / singVals.array()).head(actual_n).transpose() << "\n\n";
-    assert((((singVals.array()-diag) / singVals.array()).head(actual_n) >= 0).all());
+    eigen_internal_assert((((singVals.array()-diag) / singVals.array()).head(actual_n) >= 0).all());
   }
 #endif
-  
+
 #ifdef EIGEN_BDCSVD_SANITY_CHECKS
-  assert(singVals.allFinite());
-  assert(mus.allFinite());
-  assert(shifts.allFinite());
+  eigen_internal_assert(singVals.allFinite());
+  eigen_internal_assert(mus.allFinite());
+  eigen_internal_assert(shifts.allFinite());
 #endif
-  
+
   // Compute zhat
   perturbCol0(col0, diag, perm, singVals, shifts, mus, zhat);
 #ifdef  EIGEN_BDCSVD_DEBUG_VERBOSE
   std::cout << "  zhat: " << zhat.transpose() << "\n";
 #endif
-  
+
 #ifdef EIGEN_BDCSVD_SANITY_CHECKS
-  assert(zhat.allFinite());
+  eigen_internal_assert(zhat.allFinite());
 #endif
-  
+
   computeSingVecs(zhat, diag, perm, singVals, shifts, mus, U, V);
-  
+
 #ifdef  EIGEN_BDCSVD_DEBUG_VERBOSE
   std::cout << "U^T U: " << (U.transpose() * U - MatrixXr(MatrixXr::Identity(U.cols(),U.cols()))).norm() << "\n";
   std::cout << "V^T V: " << (V.transpose() * V - MatrixXr(MatrixXr::Identity(V.cols(),V.cols()))).norm() << "\n";
 #endif
-  
+
 #ifdef EIGEN_BDCSVD_SANITY_CHECKS
-  assert(m_naiveU.allFinite());
-  assert(m_naiveV.allFinite());
-  assert(m_computed.allFinite());
-  assert(U.allFinite());
-  assert(V.allFinite());
-//   assert((U.transpose() * U - MatrixXr(MatrixXr::Identity(U.cols(),U.cols()))).norm() < 100*NumTraits<RealScalar>::epsilon() * n);
-//   assert((V.transpose() * V - MatrixXr(MatrixXr::Identity(V.cols(),V.cols()))).norm() < 100*NumTraits<RealScalar>::epsilon() * n);
+  eigen_internal_assert(m_naiveU.allFinite());
+  eigen_internal_assert(m_naiveV.allFinite());
+  eigen_internal_assert(m_computed.allFinite());
+  eigen_internal_assert(U.allFinite());
+  eigen_internal_assert(V.allFinite());
+//   eigen_internal_assert((U.transpose() * U - MatrixXr(MatrixXr::Identity(U.cols(),U.cols()))).norm() < 100*NumTraits<RealScalar>::epsilon() * n);
+//   eigen_internal_assert((V.transpose() * V - MatrixXr(MatrixXr::Identity(V.cols(),V.cols()))).norm() < 100*NumTraits<RealScalar>::epsilon() * n);
 #endif
-  
+
   // Because of deflation, the singular values might not be completely sorted.
   // Fortunately, reordering them is a O(n) problem
   for(Index i=0; i<actual_n-1; ++i)
@@ -701,16 +811,16 @@ void BDCSVD<MatrixType>::computeSVDofM(Eigen::Index firstCol, Eigen::Index n, Ma
     bool singular_values_sorted = (((singVals.segment(1,actual_n-1)-singVals.head(actual_n-1))).array() >= 0).all();
     if(!singular_values_sorted)
       std::cout << "Singular values are not sorted: " << singVals.segment(1,actual_n).transpose() << "\n";
-    assert(singular_values_sorted);
+    eigen_internal_assert(singular_values_sorted);
   }
 #endif
-  
+
   // Reverse order so that singular values in increased order
   // Because of deflation, the zeros singular-values are already at the end
   singVals.head(actual_n).reverseInPlace();
   U.leftCols(actual_n).rowwise().reverseInPlace();
   if (m_compV) V.leftCols(actual_n).rowwise().reverseInPlace();
-  
+
 #ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
   JacobiSVD<MatrixXr> jsvd(m_computed.block(firstCol, firstCol, n, n) );
   std::cout << "  * j:        " << jsvd.singularValues().transpose() << "\n\n";
@@ -719,9 +829,10 @@ void BDCSVD<MatrixType>::computeSVDofM(Eigen::Index firstCol, Eigen::Index n, Ma
 #endif
 }
 
-template <typename MatrixType>
-typename BDCSVD<MatrixType>::RealScalar BDCSVD<MatrixType>::secularEq(RealScalar mu, const ArrayRef& col0, const ArrayRef& diag, const IndicesRef &perm, const ArrayRef& diagShifted, RealScalar shift)
-{
+template <typename MatrixType, int Options>
+typename BDCSVD<MatrixType, Options>::RealScalar BDCSVD<MatrixType, Options>::secularEq(
+    RealScalar mu, const ArrayRef& col0, const ArrayRef& diag, const IndicesRef& perm, const ArrayRef& diagShifted,
+    RealScalar shift) {
   Index m = perm.size();
   RealScalar res = Literal(1);
   for(Index i=0; i<m; ++i)
@@ -732,13 +843,11 @@ typename BDCSVD<MatrixType>::RealScalar BDCSVD<MatrixType>::secularEq(RealScalar
     res += (col0(j) / (diagShifted(j) - mu)) * (col0(j) / (diag(j) + shift + mu));
   }
   return res;
-
 }
 
-template <typename MatrixType>
-void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& diag, const IndicesRef &perm,
-                                         VectorType& singVals, ArrayRef shifts, ArrayRef mus)
-{
+template <typename MatrixType, int Options>
+void BDCSVD<MatrixType, Options>::computeSingVals(const ArrayRef& col0, const ArrayRef& diag, const IndicesRef& perm,
+                                                  VectorType& singVals, ArrayRef shifts, ArrayRef mus) {
   using std::abs;
   using std::swap;
   using std::sqrt;
@@ -747,11 +856,11 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d
   Index actual_n = n;
   // Note that here actual_n is computed based on col0(i)==0 instead of diag(i)==0 as above
   // because 1) we have diag(i)==0 => col0(i)==0 and 2) if col0(i)==0, then diag(i) is already a singular value.
-  while(actual_n>1 && col0(actual_n-1)==Literal(0)) --actual_n;
+  while(actual_n>1 && numext::is_exactly_zero(col0(actual_n - 1))) --actual_n;
 
   for (Index k = 0; k < n; ++k)
   {
-    if (col0(k) == Literal(0) || actual_n==1)
+    if (numext::is_exactly_zero(col0(k)) || actual_n == 1)
     {
       // if col0(k) == 0, then entry is deflated, so singular value is on diagonal
       // if actual_n==1, then the deflated problem is already diagonalized
@@ -759,7 +868,7 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d
       mus(k) = Literal(0);
       shifts(k) = k==0 ? col0(0) : diag(k);
       continue;
-    } 
+    }
 
     // otherwise, use secular equation to find singular value
     RealScalar left = diag(k);
@@ -772,7 +881,7 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d
       // recall that at this stage we assume that z[j]!=0 and all entries for which z[j]==0 have been put aside.
       // This should be equivalent to using perm[]
       Index l = k+1;
-      while(col0(l)==Literal(0)) { ++l; eigen_internal_assert(l<actual_n); }
+      while(numext::is_exactly_zero(col0(l))) { ++l; eigen_internal_assert(l < actual_n); }
       right = diag(l);
     }
 
@@ -798,7 +907,7 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d
               << " "       << secularEq(left+RealScalar(0.999999)*(right-left), col0, diag, perm, diag, 0) << "\n";
 #endif
     RealScalar shift = (k == actual_n-1 || fMid > Literal(0)) ? left : right;
-    
+
     // measure everything relative to shift
     Map<ArrayXr> diagShifted(m_workspace.data()+4*n, n);
     diagShifted = diag - shift;
@@ -807,7 +916,8 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d
     {
       // check that after the shift, f(mid) is still negative:
       RealScalar midShifted = (right - left) / RealScalar(2);
-      if(shift==right)
+      // we can test exact equality here, because shift comes from `... ? left : right`
+      if(numext::equal_strict(shift, right))
         midShifted = -midShifted;
       RealScalar fMidShifted = secularEq(midShifted, col0, diag, perm, diagShifted, shift);
       if(fMidShifted>0)
@@ -817,10 +927,11 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d
         diagShifted = diag - shift;
       }
     }
-    
+
     // initial guess
     RealScalar muPrev, muCur;
-    if (shift == left)
+    // we can test exact equality here, because shift comes from `... ? left : right`
+    if (numext::equal_strict(shift, left))
     {
       muPrev = (right - left) * RealScalar(0.1);
       if (k == actual_n-1) muCur = right - left;
@@ -843,7 +954,7 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d
     // rational interpolation: fit a function of the form a / mu + b through the two previous
     // iterates and use its zero to compute the next iterate
     bool useBisection = fPrev*fCur>Literal(0);
-    while (fCur!=Literal(0) && abs(muCur - muPrev) > Literal(8) * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(abs(muCur), abs(muPrev)) && abs(fCur - fPrev)>NumTraits<RealScalar>::epsilon() && !useBisection)
+    while (!numext::is_exactly_zero(fCur) && abs(muCur - muPrev) > Literal(8) * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(abs(muCur), abs(muPrev)) && abs(fCur - fPrev) > NumTraits<RealScalar>::epsilon() && !useBisection)
     {
       ++m_numIters;
 
@@ -855,16 +966,17 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d
       RealScalar fZero = secularEq(muZero, col0, diag, perm, diagShifted, shift);
 
 #ifdef EIGEN_BDCSVD_SANITY_CHECKS
-      assert((numext::isfinite)(fZero));
+      eigen_internal_assert((numext::isfinite)(fZero));
 #endif
-      
+
       muPrev = muCur;
       fPrev = fCur;
       muCur = muZero;
       fCur = fZero;
-      
-      if (shift == left  && (muCur < Literal(0) || muCur > right - left)) useBisection = true;
-      if (shift == right && (muCur < -(right - left) || muCur > Literal(0))) useBisection = true;
+
+      // we can test exact equality here, because shift comes from `... ? left : right`
+      if (numext::equal_strict(shift, left)  && (muCur < Literal(0) || muCur > right - left)) useBisection = true;
+      if (numext::equal_strict(shift, right) && (muCur < -(right - left) || muCur > Literal(0))) useBisection = true;
       if (abs(fCur)>abs(fPrev)) useBisection = true;
     }
 
@@ -875,7 +987,8 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d
       std::cout << "useBisection for k = " << k << ", actual_n = " << actual_n << "\n";
 #endif
       RealScalar leftShifted, rightShifted;
-      if (shift == left)
+      // we can test exact equality here, because shift comes from `... ? left : right`
+      if (numext::equal_strict(shift, left))
       {
         // to avoid overflow, we must have mu > max(real_min, |z(k)|/sqrt(real_max)),
         // the factor 2 is to be more conservative
@@ -899,20 +1012,20 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d
       RealScalar fLeft = secularEq(leftShifted, col0, diag, perm, diagShifted, shift);
       eigen_internal_assert(fLeft<Literal(0));
 
-#if defined EIGEN_INTERNAL_DEBUGGING || defined EIGEN_BDCSVD_SANITY_CHECKS
+#if defined EIGEN_BDCSVD_DEBUG_VERBOSE || defined EIGEN_BDCSVD_SANITY_CHECKS || defined EIGEN_INTERNAL_DEBUGGING
       RealScalar fRight = secularEq(rightShifted, col0, diag, perm, diagShifted, shift);
 #endif
 
 #ifdef EIGEN_BDCSVD_SANITY_CHECKS
       if(!(numext::isfinite)(fLeft))
         std::cout << "f(" << leftShifted << ") =" << fLeft << " ; " << left << " " << shift << " " << right << "\n";
-      assert((numext::isfinite)(fLeft));
+      eigen_internal_assert((numext::isfinite)(fLeft));
 
       if(!(numext::isfinite)(fRight))
         std::cout << "f(" << rightShifted << ") =" << fRight << " ; " << left << " " << shift << " " << right << "\n";
-      // assert((numext::isfinite)(fRight));
+      // eigen_internal_assert((numext::isfinite)(fRight));
 #endif
-    
+
 #ifdef  EIGEN_BDCSVD_DEBUG_VERBOSE
       if(!(fLeft * fRight<0))
       {
@@ -946,18 +1059,19 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d
         }
         muCur = (leftShifted + rightShifted) / Literal(2);
       }
-      else 
+      else
       {
         // We have a problem as shifting on the left or right give either a positive or negative value
         // at the middle of [left,right]...
         // Instead fo abbording or entering an infinite loop,
         // let's just use the middle as the estimated zero-crossing:
         muCur = (right - left) * RealScalar(0.5);
-        if(shift == right)
+        // we can test exact equality here, because shift comes from `... ? left : right`
+        if(numext::equal_strict(shift, right))
           muCur = -muCur;
       }
     }
-      
+
     singVals[k] = shift + muCur;
     shifts[k] = shift;
     mus[k] = muCur;
@@ -967,25 +1081,23 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d
       std::cout << "found " << singVals[k] << " == " << shift << " + " << muCur << " from " << diag(k) << " .. "  << diag(k+1) << "\n";
 #endif
 #ifdef EIGEN_BDCSVD_SANITY_CHECKS
-    assert(k==0 || singVals[k]>=singVals[k-1]);
-    assert(singVals[k]>=diag(k));
+    eigen_internal_assert(k==0 || singVals[k]>=singVals[k-1]);
+    eigen_internal_assert(singVals[k]>=diag(k));
 #endif
 
     // perturb singular value slightly if it equals diagonal entry to avoid division by zero later
     // (deflation is supposed to avoid this from happening)
     // - this does no seem to be necessary anymore -
-//     if (singVals[k] == left) singVals[k] *= 1 + NumTraits<RealScalar>::epsilon();
-//     if (singVals[k] == right) singVals[k] *= 1 - NumTraits<RealScalar>::epsilon();
+    // if (singVals[k] == left) singVals[k] *= 1 + NumTraits<RealScalar>::epsilon();
+    // if (singVals[k] == right) singVals[k] *= 1 - NumTraits<RealScalar>::epsilon();
   }
 }
 
-
 // zhat is perturbation of col0 for which singular vectors can be computed stably (see Section 3.1)
-template <typename MatrixType>
-void BDCSVD<MatrixType>::perturbCol0
-   (const ArrayRef& col0, const ArrayRef& diag, const IndicesRef &perm, const VectorType& singVals,
-    const ArrayRef& shifts, const ArrayRef& mus, ArrayRef zhat)
-{
+template <typename MatrixType, int Options>
+void BDCSVD<MatrixType, Options>::perturbCol0(const ArrayRef& col0, const ArrayRef& diag, const IndicesRef& perm,
+                                              const VectorType& singVals, const ArrayRef& shifts, const ArrayRef& mus,
+                                              ArrayRef zhat) {
   using std::sqrt;
   Index n = col0.size();
   Index m = perm.size();
@@ -998,7 +1110,7 @@ void BDCSVD<MatrixType>::perturbCol0
   // The offset permits to skip deflated entries while computing zhat
   for (Index k = 0; k < n; ++k)
   {
-    if (col0(k) == Literal(0)) // deflated
+    if (numext::is_exactly_zero(col0(k))) // deflated
       zhat(k) = Literal(0);
     else
     {
@@ -1011,7 +1123,7 @@ void BDCSVD<MatrixType>::perturbCol0
         std::cout << "prod = " << "(" << singVals(lastIdx) << " + " << dk << ") * (" << mus(lastIdx) << " + (" << shifts(lastIdx) << " - " << dk << "))" << "\n";
         std::cout << "     = " << singVals(lastIdx) + dk << " * " << mus(lastIdx) + (shifts(lastIdx) - dk) <<  "\n";
       }
-      assert(prod>=0);
+      eigen_internal_assert(prod>=0);
 #endif
 
       for(Index l = 0; l<m; ++l)
@@ -1029,17 +1141,24 @@ void BDCSVD<MatrixType>::perturbCol0
             std::cout << "  " << "j=" << j << "\n";
           }
 #endif
-          Index j = i<k ? i : perm(l-1);
+          // Avoid index out of bounds.
+          // Will end up setting zhat(k) = 0.
+          if (i >= k && l == 0) {
+            m_info = NumericalIssue;
+            prod = 0;
+            break;
+          }
+          Index j = i<k ? i : l > 0 ? perm(l-1) : i;
 #ifdef EIGEN_BDCSVD_SANITY_CHECKS
           if(!(dk!=Literal(0) || diag(i)!=Literal(0)))
           {
             std::cout << "k=" << k << ", i=" << i << ", l=" << l << ", perm.size()=" << perm.size() << "\n";
           }
-          assert(dk!=Literal(0) || diag(i)!=Literal(0));
+          eigen_internal_assert(dk!=Literal(0) || diag(i)!=Literal(0));
 #endif
           prod *= ((singVals(j)+dk) / ((diag(i)+dk))) * ((mus(j)+(shifts(j)-dk)) / ((diag(i)-dk)));
 #ifdef EIGEN_BDCSVD_SANITY_CHECKS
-          assert(prod>=0);
+          eigen_internal_assert(prod>=0);
 #endif
 #ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
           if(i!=k && numext::abs(((singVals(j)+dk)*(mus(j)+(shifts(j)-dk)))/((diag(i)+dk)*(diag(i)-dk)) - 1) > 0.9 )
@@ -1053,7 +1172,7 @@ void BDCSVD<MatrixType>::perturbCol0
 #endif
       RealScalar tmp = sqrt(prod);
 #ifdef EIGEN_BDCSVD_SANITY_CHECKS
-      assert((numext::isfinite)(tmp));
+      eigen_internal_assert((numext::isfinite)(tmp));
 #endif
       zhat(k) = col0(k) > Literal(0) ? RealScalar(tmp) : RealScalar(-tmp);
     }
@@ -1061,17 +1180,16 @@ void BDCSVD<MatrixType>::perturbCol0
 }
 
 // compute singular vectors
-template <typename MatrixType>
-void BDCSVD<MatrixType>::computeSingVecs
-   (const ArrayRef& zhat, const ArrayRef& diag, const IndicesRef &perm, const VectorType& singVals,
-    const ArrayRef& shifts, const ArrayRef& mus, MatrixXr& U, MatrixXr& V)
-{
+template <typename MatrixType, int Options>
+void BDCSVD<MatrixType, Options>::computeSingVecs(const ArrayRef& zhat, const ArrayRef& diag, const IndicesRef& perm,
+                                                  const VectorType& singVals, const ArrayRef& shifts,
+                                                  const ArrayRef& mus, MatrixXr& U, MatrixXr& V) {
   Index n = zhat.size();
   Index m = perm.size();
-  
+
   for (Index k = 0; k < n; ++k)
   {
-    if (zhat(k) == Literal(0))
+    if (numext::is_exactly_zero(zhat(k)))
     {
       U.col(k) = VectorType::Unit(n+1, k);
       if (m_compV) V.col(k) = VectorType::Unit(n, k);
@@ -1086,7 +1204,7 @@ void BDCSVD<MatrixType>::computeSingVecs
       }
       U(n,k) = Literal(0);
       U.col(k).normalize();
-    
+
       if (m_compV)
       {
         V.col(k).setZero();
@@ -1103,13 +1221,12 @@ void BDCSVD<MatrixType>::computeSingVecs
   U.col(n) = VectorType::Unit(n+1, n);
 }
 
-
 // page 12_13
 // i >= 1, di almost null and zi non null.
 // We use a rotation to zero out zi applied to the left of M
-template <typename MatrixType>
-void BDCSVD<MatrixType>::deflation43(Eigen::Index firstCol, Eigen::Index shift, Eigen::Index i, Eigen::Index size)
-{
+template <typename MatrixType, int Options>
+void BDCSVD<MatrixType, Options>::deflation43(Index firstCol, Index shift, Index i,
+                                              Index size) {
   using std::abs;
   using std::sqrt;
   using std::pow;
@@ -1117,28 +1234,28 @@ void BDCSVD<MatrixType>::deflation43(Eigen::Index firstCol, Eigen::Index shift,
   RealScalar c = m_computed(start, start);
   RealScalar s = m_computed(start+i, start);
   RealScalar r = numext::hypot(c,s);
-  if (r == Literal(0))
+  if (numext::is_exactly_zero(r))
   {
     m_computed(start+i, start+i) = Literal(0);
     return;
   }
-  m_computed(start,start) = r;  
+  m_computed(start,start) = r;
   m_computed(start+i, start) = Literal(0);
   m_computed(start+i, start+i) = Literal(0);
-  
+
   JacobiRotation<RealScalar> J(c/r,-s/r);
   if (m_compU)  m_naiveU.middleRows(firstCol, size+1).applyOnTheRight(firstCol, firstCol+i, J);
   else          m_naiveU.applyOnTheRight(firstCol, firstCol+i, J);
-}// end deflation 43
-
+}  // end deflation 43
 
 // page 13
 // i,j >= 1, i!=j and |di - dj| < epsilon * norm2(M)
 // We apply two rotations to have zj = 0;
 // TODO deflation44 is still broken and not properly tested
-template <typename MatrixType>
-void BDCSVD<MatrixType>::deflation44(Eigen::Index firstColu , Eigen::Index firstColm, Eigen::Index firstRowW, Eigen::Index firstColW, Eigen::Index i, Eigen::Index j, Eigen::Index size)
-{
+template <typename MatrixType, int Options>
+void BDCSVD<MatrixType, Options>::deflation44(Index firstColu, Index firstColm, Index firstRowW,
+                                              Index firstColW, Index i, Index j,
+                                              Index size) {
   using std::abs;
   using std::sqrt;
   using std::conj;
@@ -1157,7 +1274,7 @@ void BDCSVD<MatrixType>::deflation44(Eigen::Index firstColu , Eigen::Index first
     << m_computed(firstColm + i+1, firstColm+i+1) << " "
     << m_computed(firstColm + i+2, firstColm+i+2) << "\n";
 #endif
-  if (r==Literal(0))
+  if (numext::is_exactly_zero(r))
   {
     m_computed(firstColm + i, firstColm + i) = m_computed(firstColm + j, firstColm + j);
     return;
@@ -1172,39 +1289,38 @@ void BDCSVD<MatrixType>::deflation44(Eigen::Index firstColu , Eigen::Index first
   if (m_compU)  m_naiveU.middleRows(firstColu, size+1).applyOnTheRight(firstColu + i, firstColu + j, J);
   else          m_naiveU.applyOnTheRight(firstColu+i, firstColu+j, J);
   if (m_compV)  m_naiveV.middleRows(firstRowW, size).applyOnTheRight(firstColW + i, firstColW + j, J);
-}// end deflation 44
-
+}  // end deflation 44
 
 // acts on block from (firstCol+shift, firstCol+shift) to (lastCol+shift, lastCol+shift) [inclusive]
-template <typename MatrixType>
-void BDCSVD<MatrixType>::deflation(Eigen::Index firstCol, Eigen::Index lastCol, Eigen::Index k, Eigen::Index firstRowW, Eigen::Index firstColW, Eigen::Index shift)
-{
+template <typename MatrixType, int Options>
+void BDCSVD<MatrixType, Options>::deflation(Index firstCol, Index lastCol, Index k,
+                                            Index firstRowW, Index firstColW, Index shift) {
   using std::sqrt;
   using std::abs;
   const Index length = lastCol + 1 - firstCol;
-  
+
   Block<MatrixXr,Dynamic,1> col0(m_computed, firstCol+shift, firstCol+shift, length, 1);
   Diagonal<MatrixXr> fulldiag(m_computed);
   VectorBlock<Diagonal<MatrixXr>,Dynamic> diag(fulldiag, firstCol+shift, length);
-  
+
   const RealScalar considerZero = (std::numeric_limits<RealScalar>::min)();
   RealScalar maxDiag = diag.tail((std::max)(Index(1),length-1)).cwiseAbs().maxCoeff();
   RealScalar epsilon_strict = numext::maxi<RealScalar>(considerZero,NumTraits<RealScalar>::epsilon() * maxDiag);
   RealScalar epsilon_coarse = Literal(8) * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(col0.cwiseAbs().maxCoeff(), maxDiag);
-  
+
 #ifdef EIGEN_BDCSVD_SANITY_CHECKS
-  assert(m_naiveU.allFinite());
-  assert(m_naiveV.allFinite());
-  assert(m_computed.allFinite());
+  eigen_internal_assert(m_naiveU.allFinite());
+  eigen_internal_assert(m_naiveV.allFinite());
+  eigen_internal_assert(m_computed.allFinite());
 #endif
 
-#ifdef  EIGEN_BDCSVD_DEBUG_VERBOSE  
+#ifdef  EIGEN_BDCSVD_DEBUG_VERBOSE
   std::cout << "\ndeflate:" << diag.head(k+1).transpose() << "  |  " << diag.segment(k+1,length-k-1).transpose() << "\n";
 #endif
-  
+
   //condition 4.1
   if (diag(0) < epsilon_coarse)
-  { 
+  {
 #ifdef  EIGEN_BDCSVD_DEBUG_VERBOSE
     std::cout << "deflation 4.1, because " << diag(0) << " < " << epsilon_coarse << "\n";
 #endif
@@ -1232,31 +1348,31 @@ void BDCSVD<MatrixType>::deflation(Eigen::Index firstCol, Eigen::Index lastCol,
     }
 
 #ifdef EIGEN_BDCSVD_SANITY_CHECKS
-  assert(m_naiveU.allFinite());
-  assert(m_naiveV.allFinite());
-  assert(m_computed.allFinite());
+  eigen_internal_assert(m_naiveU.allFinite());
+  eigen_internal_assert(m_naiveV.allFinite());
+  eigen_internal_assert(m_computed.allFinite());
 #endif
 #ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
   std::cout << "to be sorted: " << diag.transpose() << "\n\n";
   std::cout << "            : " << col0.transpose() << "\n\n";
 #endif
   {
-    // Check for total deflation
-    // If we have a total deflation, then we have to consider col0(0)==diag(0) as a singular value during sorting
-    bool total_deflation = (col0.tail(length-1).array()<considerZero).all();
-    
+    // Check for total deflation:
+    // If we have a total deflation, then we have to consider col0(0)==diag(0) as a singular value during sorting.
+    const bool total_deflation = (col0.tail(length-1).array().abs()<considerZero).all();
+
     // Sort the diagonal entries, since diag(1:k-1) and diag(k:length) are already sorted, let's do a sorted merge.
     // First, compute the respective permutation.
     Index *permutation = m_workspaceI.data();
     {
       permutation[0] = 0;
       Index p = 1;
-      
+
       // Move deflated diagonal entries at the end.
       for(Index i=1; i<length; ++i)
         if(abs(diag(i))<considerZero)
           permutation[p++] = i;
-        
+
       Index i=1, j=k+1;
       for( ; p < length; ++p)
       {
@@ -1266,7 +1382,7 @@ void BDCSVD<MatrixType>::deflation(Eigen::Index firstCol, Eigen::Index lastCol,
         else                        permutation[p] = i++;
       }
     }
-    
+
     // If we have a total deflation, then we have to insert diag(0) at the right place
     if(total_deflation)
     {
@@ -1282,22 +1398,22 @@ void BDCSVD<MatrixType>::deflation(Eigen::Index firstCol, Eigen::Index lastCol,
         }
       }
     }
-    
+
     // Current index of each col, and current column of each index
     Index *realInd = m_workspaceI.data()+length;
     Index *realCol = m_workspaceI.data()+2*length;
-    
+
     for(int pos = 0; pos< length; pos++)
     {
       realCol[pos] = pos;
       realInd[pos] = pos;
     }
-    
+
     for(Index i = total_deflation?0:1; i < length; i++)
     {
       const Index pi = permutation[length - (total_deflation ? i+1 : i)];
       const Index J = realCol[pi];
-      
+
       using std::swap;
       // swap diagonal and first column entries:
       swap(diag(i), diag(J));
@@ -1320,7 +1436,7 @@ void BDCSVD<MatrixType>::deflation(Eigen::Index firstCol, Eigen::Index lastCol,
   std::cout << "sorted: " << diag.transpose().format(bdcsvdfmt) << "\n";
   std::cout << "      : " << col0.transpose() << "\n\n";
 #endif
-    
+
   //condition 4.4
   {
     Index i = length-1;
@@ -1335,18 +1451,18 @@ void BDCSVD<MatrixType>::deflation(Eigen::Index firstCol, Eigen::Index lastCol,
         deflation44(firstCol, firstCol + shift, firstRowW, firstColW, i-1, i, length);
       }
   }
-  
+
 #ifdef EIGEN_BDCSVD_SANITY_CHECKS
   for(Index j=2;j<length;++j)
-    assert(diag(j-1)<=diag(j) || abs(diag(j))<considerZero);
+    eigen_internal_assert(diag(j-1)<=diag(j) || abs(diag(j))<considerZero);
 #endif
-  
+
 #ifdef EIGEN_BDCSVD_SANITY_CHECKS
-  assert(m_naiveU.allFinite());
-  assert(m_naiveV.allFinite());
-  assert(m_computed.allFinite());
+  eigen_internal_assert(m_naiveU.allFinite());
+  eigen_internal_assert(m_naiveV.allFinite());
+  eigen_internal_assert(m_computed.allFinite());
 #endif
-}//end deflation
+}  // end deflation
 
 /** \svd_module
   *
@@ -1354,11 +1470,23 @@ void BDCSVD<MatrixType>::deflation(Eigen::Index firstCol, Eigen::Index lastCol,
   *
   * \sa class BDCSVD
   */
-template<typename Derived>
-BDCSVD<typename MatrixBase<Derived>::PlainObject>
-MatrixBase<Derived>::bdcSvd(unsigned int computationOptions) const
-{
-  return BDCSVD<PlainObject>(*this, computationOptions);
+template <typename Derived>
+template <int Options>
+BDCSVD<typename MatrixBase<Derived>::PlainObject, Options> MatrixBase<Derived>::bdcSvd() const {
+  return BDCSVD<PlainObject, Options>(*this);
+}
+
+/** \svd_module
+ *
+ * \return the singular value decomposition of \c *this computed by Divide & Conquer algorithm
+ *
+ * \sa class BDCSVD
+ */
+template <typename Derived>
+template <int Options>
+BDCSVD<typename MatrixBase<Derived>::PlainObject, Options> MatrixBase<Derived>::bdcSvd(
+    unsigned int computationOptions) const {
+  return BDCSVD<PlainObject, Options>(*this, computationOptions);
 }
 
 } // end namespace Eigen
diff --git a/libs/eigen/Eigen/src/SVD/BDCSVD_LAPACKE.h b/libs/eigen/Eigen/src/SVD/BDCSVD_LAPACKE.h
new file mode 100644
index 0000000..d4cc173
--- /dev/null
+++ b/libs/eigen/Eigen/src/SVD/BDCSVD_LAPACKE.h
@@ -0,0 +1,163 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2022 Melven Roehrig-Zoellner <Melven.Roehrig-Zoellner@DLR.de>
+// Copyright (c) 2011, Intel Corporation. All rights reserved.
+//
+// This file is based on the JacobiSVD_LAPACKE.h originally from Intel -
+// see license notice below:
+/*
+ Redistribution and use in source and binary forms, with or without modification,
+ are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its contributors may
+   be used to endorse or promote products derived from this software without
+   specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ ********************************************************************************
+ *   Content : Eigen bindings to LAPACKe
+ *    Singular Value Decomposition - SVD (divide and conquer variant)
+ ********************************************************************************
+*/
+#ifndef EIGEN_BDCSVD_LAPACKE_H
+#define EIGEN_BDCSVD_LAPACKE_H
+
+namespace Eigen {
+
+namespace internal {
+
+namespace lapacke_helpers {
+
+/** \internal Specialization for the data types supported by LAPACKe */
+
+// defining a derived class to allow access to protected members
+template <typename MatrixType_, int Options>
+class BDCSVD_LAPACKE : public BDCSVD<MatrixType_, Options> {
+  typedef BDCSVD<MatrixType_, Options> SVD;
+  typedef typename SVD::MatrixType MatrixType;
+  typedef typename SVD::Scalar Scalar;
+  typedef typename SVD::RealScalar RealScalar;
+
+public:
+  // construct this by moving from a parent object
+  BDCSVD_LAPACKE(SVD&& svd) : SVD(std::move(svd)) {}
+
+  void compute_impl_lapacke(const MatrixType& matrix, unsigned int computationOptions) {
+
+    SVD::allocate(matrix.rows(), matrix.cols(), computationOptions);
+
+    SVD::m_nonzeroSingularValues = SVD::m_diagSize;
+
+    // prepare arguments to ?gesdd
+    const lapack_int matrix_order = lapack_storage_of(matrix);
+    const char jobz  = (SVD::m_computeFullU || SVD::m_computeFullV) ? 'A' : (SVD::m_computeThinU || SVD::m_computeThinV) ? 'S' : 'N';
+    const lapack_int u_cols = (jobz == 'A') ? to_lapack(SVD::m_rows) : (jobz == 'S') ? to_lapack(SVD::m_diagSize) : 1;
+    const lapack_int vt_rows = (jobz == 'A') ? to_lapack(SVD::m_cols) : (jobz == 'S') ? to_lapack(SVD::m_diagSize) : 1;
+    lapack_int ldu, ldvt;
+    Scalar *u, *vt, dummy;
+    MatrixType localU;
+    if (SVD::computeU() && !(SVD::m_computeThinU && SVD::m_computeFullV) ) {
+      ldu  = to_lapack(SVD::m_matrixU.outerStride());
+      u    = SVD::m_matrixU.data();
+    } else if (SVD::computeV()) {
+      localU.resize(SVD::m_rows, u_cols);
+      ldu  = to_lapack(localU.outerStride());
+      u    = localU.data();
+    } else { ldu=1; u=&dummy; }
+    MatrixType localV;
+    if (SVD::computeU() || SVD::computeV()) {
+      localV.resize(vt_rows, SVD::m_cols);
+      ldvt  = to_lapack(localV.outerStride());
+      vt   = localV.data();
+    } else { ldvt=1; vt=&dummy; }
+    MatrixType temp; temp = matrix;
+
+    // actual call to ?gesdd
+    lapack_int info = gesdd( matrix_order, jobz, to_lapack(SVD::m_rows), to_lapack(SVD::m_cols),
+                             to_lapack(temp.data()), to_lapack(temp.outerStride()), (RealScalar*)SVD::m_singularValues.data(),
+                             to_lapack(u), ldu, to_lapack(vt), ldvt);
+
+    // Check the result of the LAPACK call
+    if (info < 0 || !SVD::m_singularValues.allFinite()) {
+      // this includes info == -4 => NaN entry in A
+      SVD::m_info = InvalidInput;
+    } else if (info > 0 ) {
+      SVD::m_info = NoConvergence;
+    } else {
+      SVD::m_info = Success;
+      if (SVD::m_computeThinU && SVD::m_computeFullV) {
+        SVD::m_matrixU = localU.leftCols(SVD::m_matrixU.cols());
+      }
+      if (SVD::computeV()) {
+        SVD::m_matrixV = localV.adjoint().leftCols(SVD::m_matrixV.cols());
+      }
+    }
+    SVD::m_isInitialized = true;
+  }
+};
+
+template<typename MatrixType_, int Options>
+BDCSVD<MatrixType_, Options>& BDCSVD_wrapper(BDCSVD<MatrixType_, Options>& svd, const MatrixType_& matrix, int computationOptions)
+{
+  // we need to move to the wrapper type and back
+  BDCSVD_LAPACKE<MatrixType_, Options> tmpSvd(std::move(svd));
+  tmpSvd.compute_impl_lapacke(matrix, computationOptions);
+  svd = std::move(tmpSvd);
+  return svd;
+}
+
+} // end namespace lapacke_helpers
+
+} // end namespace internal
+
+#define EIGEN_LAPACKE_SDD(EIGTYPE, EIGCOLROW, OPTIONS) \
+template<> inline \
+BDCSVD<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic>, OPTIONS>& \
+BDCSVD<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic>, OPTIONS>::compute_impl(const Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic>& matrix, unsigned int computationOptions) {\
+  return internal::lapacke_helpers::BDCSVD_wrapper(*this, matrix, computationOptions); \
+}
+
+#define EIGEN_LAPACK_SDD_OPTIONS(OPTIONS) \
+  EIGEN_LAPACKE_SDD(double,   ColMajor, OPTIONS) \
+  EIGEN_LAPACKE_SDD(float,    ColMajor, OPTIONS) \
+  EIGEN_LAPACKE_SDD(dcomplex, ColMajor, OPTIONS) \
+  EIGEN_LAPACKE_SDD(scomplex, ColMajor, OPTIONS) \
+\
+  EIGEN_LAPACKE_SDD(double,   RowMajor, OPTIONS) \
+  EIGEN_LAPACKE_SDD(float,    RowMajor, OPTIONS) \
+  EIGEN_LAPACKE_SDD(dcomplex, RowMajor, OPTIONS) \
+  EIGEN_LAPACKE_SDD(scomplex, RowMajor, OPTIONS)
+
+EIGEN_LAPACK_SDD_OPTIONS(0)
+EIGEN_LAPACK_SDD_OPTIONS(ComputeThinU)
+EIGEN_LAPACK_SDD_OPTIONS(ComputeThinV)
+EIGEN_LAPACK_SDD_OPTIONS(ComputeFullU)
+EIGEN_LAPACK_SDD_OPTIONS(ComputeFullV)
+EIGEN_LAPACK_SDD_OPTIONS(ComputeThinU | ComputeThinV)
+EIGEN_LAPACK_SDD_OPTIONS(ComputeFullU | ComputeFullV)
+EIGEN_LAPACK_SDD_OPTIONS(ComputeThinU | ComputeFullV)
+EIGEN_LAPACK_SDD_OPTIONS(ComputeFullU | ComputeThinV)
+
+#undef EIGEN_LAPACK_SDD_OPTIONS
+
+#undef EIGEN_LAPACKE_SDD
+
+} // end namespace Eigen
+
+#endif // EIGEN_BDCSVD_LAPACKE_H
diff --git a/libs/eigen/Eigen/src/SVD/InternalHeaderCheck.h b/libs/eigen/Eigen/src/SVD/InternalHeaderCheck.h
new file mode 100644
index 0000000..fa67b96
--- /dev/null
+++ b/libs/eigen/Eigen/src/SVD/InternalHeaderCheck.h
@@ -0,0 +1,3 @@
+#ifndef EIGEN_SVD_MODULE_H
+#error "Please include Eigen/SVD instead of including headers inside the src directory directly."
+#endif
diff --git a/libs/eigen/Eigen/src/SVD/JacobiSVD.h b/libs/eigen/Eigen/src/SVD/JacobiSVD.h
index 9d95acd..d7dc209 100644
--- a/libs/eigen/Eigen/src/SVD/JacobiSVD.h
+++ b/libs/eigen/Eigen/src/SVD/JacobiSVD.h
@@ -11,13 +11,15 @@
 #ifndef EIGEN_JACOBISVD_H
 #define EIGEN_JACOBISVD_H
 
-namespace Eigen { 
+#include "./InternalHeaderCheck.h"
+
+namespace Eigen {
 
 namespace internal {
+
 // forward declaration (needed by ICC)
 // the empty body is required by MSVC
-template<typename MatrixType, int QRPreconditioner,
-         bool IsComplex = NumTraits<typename MatrixType::Scalar>::IsComplex>
+template <typename MatrixType, int Options, bool IsComplex = NumTraits<typename MatrixType::Scalar>::IsComplex>
 struct svd_precondition_2x2_block_to_be_real {};
 
 /*** QR preconditioners (R-SVD)
@@ -44,47 +46,40 @@ struct qr_preconditioner_should_do_anything
   };
 };
 
-template<typename MatrixType, int QRPreconditioner, int Case,
-         bool DoAnything = qr_preconditioner_should_do_anything<MatrixType, QRPreconditioner, Case>::ret
-> struct qr_preconditioner_impl {};
+template <typename MatrixType, int Options, int QRPreconditioner, int Case,
+          bool DoAnything = qr_preconditioner_should_do_anything<MatrixType, QRPreconditioner, Case>::ret>
+struct qr_preconditioner_impl {};
 
-template<typename MatrixType, int QRPreconditioner, int Case>
-class qr_preconditioner_impl<MatrixType, QRPreconditioner, Case, false>
-{
-public:
-  void allocate(const JacobiSVD<MatrixType, QRPreconditioner>&) {}
-  bool run(JacobiSVD<MatrixType, QRPreconditioner>&, const MatrixType&)
-  {
-    return false;
-  }
+template <typename MatrixType, int Options, int QRPreconditioner, int Case>
+class qr_preconditioner_impl<MatrixType, Options, QRPreconditioner, Case, false> {
+ public:
+  void allocate(const JacobiSVD<MatrixType, Options>&) {}
+  bool run(JacobiSVD<MatrixType, Options>&, const MatrixType&) { return false; }
 };
 
 /*** preconditioner using FullPivHouseholderQR ***/
 
-template<typename MatrixType>
-class qr_preconditioner_impl<MatrixType, FullPivHouseholderQRPreconditioner, PreconditionIfMoreRowsThanCols, true>
-{
-public:
+template <typename MatrixType, int Options>
+class qr_preconditioner_impl<MatrixType, Options, FullPivHouseholderQRPreconditioner, PreconditionIfMoreRowsThanCols,
+                             true> {
+ public:
   typedef typename MatrixType::Scalar Scalar;
-  enum
-  {
-    RowsAtCompileTime = MatrixType::RowsAtCompileTime,
-    MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime
-  };
-  typedef Matrix<Scalar, 1, RowsAtCompileTime, RowMajor, 1, MaxRowsAtCompileTime> WorkspaceType;
+  typedef JacobiSVD<MatrixType, Options> SVDType;
 
-  void allocate(const JacobiSVD<MatrixType, FullPivHouseholderQRPreconditioner>& svd)
-  {
+  enum { WorkspaceSize = MatrixType::RowsAtCompileTime, MaxWorkspaceSize = MatrixType::MaxRowsAtCompileTime };
+
+  typedef Matrix<Scalar, 1, WorkspaceSize, RowMajor, 1, MaxWorkspaceSize> WorkspaceType;
+
+  void allocate(const SVDType& svd) {
     if (svd.rows() != m_qr.rows() || svd.cols() != m_qr.cols())
     {
-      m_qr.~QRType();
-      ::new (&m_qr) QRType(svd.rows(), svd.cols());
+      internal::destroy_at(&m_qr);
+      internal::construct_at(&m_qr, svd.rows(), svd.cols());
     }
     if (svd.m_computeFullU) m_workspace.resize(svd.rows());
   }
 
-  bool run(JacobiSVD<MatrixType, FullPivHouseholderQRPreconditioner>& svd, const MatrixType& matrix)
-  {
+  bool run(SVDType& svd, const MatrixType& matrix) {
     if(matrix.rows() > matrix.cols())
     {
       m_qr.compute(matrix);
@@ -95,43 +90,43 @@ public:
     }
     return false;
   }
+
 private:
   typedef FullPivHouseholderQR<MatrixType> QRType;
   QRType m_qr;
   WorkspaceType m_workspace;
 };
 
-template<typename MatrixType>
-class qr_preconditioner_impl<MatrixType, FullPivHouseholderQRPreconditioner, PreconditionIfMoreColsThanRows, true>
-{
-public:
+template <typename MatrixType, int Options>
+class qr_preconditioner_impl<MatrixType, Options, FullPivHouseholderQRPreconditioner, PreconditionIfMoreColsThanRows,
+                             true> {
+ public:
   typedef typename MatrixType::Scalar Scalar;
-  enum
-  {
+  typedef JacobiSVD<MatrixType, Options> SVDType;
+
+  enum {
     RowsAtCompileTime = MatrixType::RowsAtCompileTime,
     ColsAtCompileTime = MatrixType::ColsAtCompileTime,
     MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
     MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
-    Options = MatrixType::Options
+    MatrixOptions = MatrixType::Options
   };
 
-  typedef typename internal::make_proper_matrix_type<
-    Scalar, ColsAtCompileTime, RowsAtCompileTime, Options, MaxColsAtCompileTime, MaxRowsAtCompileTime
-  >::type TransposeTypeWithSameStorageOrder;
+  typedef typename internal::make_proper_matrix_type<Scalar, ColsAtCompileTime, RowsAtCompileTime, MatrixOptions,
+                                                     MaxColsAtCompileTime, MaxRowsAtCompileTime>::type
+      TransposeTypeWithSameStorageOrder;
 
-  void allocate(const JacobiSVD<MatrixType, FullPivHouseholderQRPreconditioner>& svd)
-  {
+  void allocate(const SVDType& svd) {
     if (svd.cols() != m_qr.rows() || svd.rows() != m_qr.cols())
     {
-      m_qr.~QRType();
-      ::new (&m_qr) QRType(svd.cols(), svd.rows());
+      internal::destroy_at(&m_qr);
+      internal::construct_at(&m_qr, svd.cols(), svd.rows());
     }
     m_adjoint.resize(svd.cols(), svd.rows());
     if (svd.m_computeFullV) m_workspace.resize(svd.cols());
   }
 
-  bool run(JacobiSVD<MatrixType, FullPivHouseholderQRPreconditioner>& svd, const MatrixType& matrix)
-  {
+  bool run(SVDType& svd, const MatrixType& matrix) {
     if(matrix.cols() > matrix.rows())
     {
       m_adjoint = matrix.adjoint();
@@ -143,32 +138,41 @@ public:
     }
     else return false;
   }
+
 private:
   typedef FullPivHouseholderQR<TransposeTypeWithSameStorageOrder> QRType;
   QRType m_qr;
   TransposeTypeWithSameStorageOrder m_adjoint;
-  typename internal::plain_row_type<MatrixType>::type m_workspace;
+  typename plain_row_type<MatrixType>::type m_workspace;
 };
 
 /*** preconditioner using ColPivHouseholderQR ***/
 
-template<typename MatrixType>
-class qr_preconditioner_impl<MatrixType, ColPivHouseholderQRPreconditioner, PreconditionIfMoreRowsThanCols, true>
-{
-public:
-  void allocate(const JacobiSVD<MatrixType, ColPivHouseholderQRPreconditioner>& svd)
-  {
+template <typename MatrixType, int Options>
+class qr_preconditioner_impl<MatrixType, Options, ColPivHouseholderQRPreconditioner, PreconditionIfMoreRowsThanCols,
+                             true> {
+ public:
+  typedef typename MatrixType::Scalar Scalar;
+  typedef JacobiSVD<MatrixType, Options> SVDType;
+
+  enum {
+    WorkspaceSize = internal::traits<SVDType>::MatrixUColsAtCompileTime,
+    MaxWorkspaceSize = internal::traits<SVDType>::MatrixUMaxColsAtCompileTime
+  };
+
+  typedef Matrix<Scalar, 1, WorkspaceSize, RowMajor, 1, MaxWorkspaceSize> WorkspaceType;
+
+  void allocate(const SVDType& svd) {
     if (svd.rows() != m_qr.rows() || svd.cols() != m_qr.cols())
     {
-      m_qr.~QRType();
-      ::new (&m_qr) QRType(svd.rows(), svd.cols());
+      internal::destroy_at(&m_qr);
+      internal::construct_at(&m_qr, svd.rows(), svd.cols());
     }
     if (svd.m_computeFullU) m_workspace.resize(svd.rows());
     else if (svd.m_computeThinU) m_workspace.resize(svd.cols());
   }
 
-  bool run(JacobiSVD<MatrixType, ColPivHouseholderQRPreconditioner>& svd, const MatrixType& matrix)
-  {
+  bool run(SVDType& svd, const MatrixType& matrix) {
     if(matrix.rows() > matrix.cols())
     {
       m_qr.compute(matrix);
@@ -188,41 +192,44 @@ public:
 private:
   typedef ColPivHouseholderQR<MatrixType> QRType;
   QRType m_qr;
-  typename internal::plain_col_type<MatrixType>::type m_workspace;
+  WorkspaceType m_workspace;
 };
 
-template<typename MatrixType>
-class qr_preconditioner_impl<MatrixType, ColPivHouseholderQRPreconditioner, PreconditionIfMoreColsThanRows, true>
-{
-public:
+template <typename MatrixType, int Options>
+class qr_preconditioner_impl<MatrixType, Options, ColPivHouseholderQRPreconditioner, PreconditionIfMoreColsThanRows,
+                             true> {
+ public:
   typedef typename MatrixType::Scalar Scalar;
-  enum
-  {
+  typedef JacobiSVD<MatrixType, Options> SVDType;
+
+  enum {
     RowsAtCompileTime = MatrixType::RowsAtCompileTime,
     ColsAtCompileTime = MatrixType::ColsAtCompileTime,
     MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
     MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
-    Options = MatrixType::Options
+    MatrixOptions = MatrixType::Options,
+    WorkspaceSize = internal::traits<SVDType>::MatrixVColsAtCompileTime,
+    MaxWorkspaceSize = internal::traits<SVDType>::MatrixVMaxColsAtCompileTime
   };
 
-  typedef typename internal::make_proper_matrix_type<
-    Scalar, ColsAtCompileTime, RowsAtCompileTime, Options, MaxColsAtCompileTime, MaxRowsAtCompileTime
-  >::type TransposeTypeWithSameStorageOrder;
+  typedef Matrix<Scalar, WorkspaceSize, 1, ColMajor, MaxWorkspaceSize, 1> WorkspaceType;
 
-  void allocate(const JacobiSVD<MatrixType, ColPivHouseholderQRPreconditioner>& svd)
-  {
+  typedef typename internal::make_proper_matrix_type<Scalar, ColsAtCompileTime, RowsAtCompileTime, MatrixOptions,
+                                                     MaxColsAtCompileTime, MaxRowsAtCompileTime>::type
+      TransposeTypeWithSameStorageOrder;
+
+  void allocate(const SVDType& svd) {
     if (svd.cols() != m_qr.rows() || svd.rows() != m_qr.cols())
     {
-      m_qr.~QRType();
-      ::new (&m_qr) QRType(svd.cols(), svd.rows());
+      internal::destroy_at(&m_qr);
+      internal::construct_at(&m_qr, svd.cols(), svd.rows());
     }
     if (svd.m_computeFullV) m_workspace.resize(svd.cols());
     else if (svd.m_computeThinV) m_workspace.resize(svd.rows());
     m_adjoint.resize(svd.cols(), svd.rows());
   }
 
-  bool run(JacobiSVD<MatrixType, ColPivHouseholderQRPreconditioner>& svd, const MatrixType& matrix)
-  {
+  bool run(SVDType& svd, const MatrixType& matrix) {
     if(matrix.cols() > matrix.rows())
     {
       m_adjoint = matrix.adjoint();
@@ -245,28 +252,35 @@ private:
   typedef ColPivHouseholderQR<TransposeTypeWithSameStorageOrder> QRType;
   QRType m_qr;
   TransposeTypeWithSameStorageOrder m_adjoint;
-  typename internal::plain_row_type<MatrixType>::type m_workspace;
+  WorkspaceType m_workspace;
 };
 
 /*** preconditioner using HouseholderQR ***/
 
-template<typename MatrixType>
-class qr_preconditioner_impl<MatrixType, HouseholderQRPreconditioner, PreconditionIfMoreRowsThanCols, true>
-{
-public:
-  void allocate(const JacobiSVD<MatrixType, HouseholderQRPreconditioner>& svd)
-  {
+template <typename MatrixType, int Options>
+class qr_preconditioner_impl<MatrixType, Options, HouseholderQRPreconditioner, PreconditionIfMoreRowsThanCols, true> {
+ public:
+  typedef typename MatrixType::Scalar Scalar;
+  typedef JacobiSVD<MatrixType, Options> SVDType;
+
+  enum {
+    WorkspaceSize = internal::traits<SVDType>::MatrixUColsAtCompileTime,
+    MaxWorkspaceSize = internal::traits<SVDType>::MatrixUMaxColsAtCompileTime
+  };
+
+  typedef Matrix<Scalar, 1, WorkspaceSize, RowMajor, 1, MaxWorkspaceSize> WorkspaceType;
+
+  void allocate(const SVDType& svd) {
     if (svd.rows() != m_qr.rows() || svd.cols() != m_qr.cols())
     {
-      m_qr.~QRType();
-      ::new (&m_qr) QRType(svd.rows(), svd.cols());
+      internal::destroy_at(&m_qr);
+      internal::construct_at(&m_qr, svd.rows(), svd.cols());
     }
     if (svd.m_computeFullU) m_workspace.resize(svd.rows());
     else if (svd.m_computeThinU) m_workspace.resize(svd.cols());
   }
 
-  bool run(JacobiSVD<MatrixType, HouseholderQRPreconditioner>& svd, const MatrixType& matrix)
-  {
+  bool run(SVDType& svd, const MatrixType& matrix) {
     if(matrix.rows() > matrix.cols())
     {
       m_qr.compute(matrix);
@@ -282,44 +296,47 @@ public:
     }
     return false;
   }
+
 private:
   typedef HouseholderQR<MatrixType> QRType;
   QRType m_qr;
-  typename internal::plain_col_type<MatrixType>::type m_workspace;
+  WorkspaceType m_workspace;
 };
 
-template<typename MatrixType>
-class qr_preconditioner_impl<MatrixType, HouseholderQRPreconditioner, PreconditionIfMoreColsThanRows, true>
-{
-public:
+template <typename MatrixType, int Options>
+class qr_preconditioner_impl<MatrixType, Options, HouseholderQRPreconditioner, PreconditionIfMoreColsThanRows, true> {
+ public:
   typedef typename MatrixType::Scalar Scalar;
-  enum
-  {
+  typedef JacobiSVD<MatrixType, Options> SVDType;
+
+  enum {
     RowsAtCompileTime = MatrixType::RowsAtCompileTime,
     ColsAtCompileTime = MatrixType::ColsAtCompileTime,
     MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
     MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
-    Options = MatrixType::Options
+    MatrixOptions = MatrixType::Options,
+    WorkspaceSize = internal::traits<SVDType>::MatrixVColsAtCompileTime,
+    MaxWorkspaceSize = internal::traits<SVDType>::MatrixVMaxColsAtCompileTime
   };
 
-  typedef typename internal::make_proper_matrix_type<
-    Scalar, ColsAtCompileTime, RowsAtCompileTime, Options, MaxColsAtCompileTime, MaxRowsAtCompileTime
-  >::type TransposeTypeWithSameStorageOrder;
+  typedef Matrix<Scalar, WorkspaceSize, 1, ColMajor, MaxWorkspaceSize, 1> WorkspaceType;
 
-  void allocate(const JacobiSVD<MatrixType, HouseholderQRPreconditioner>& svd)
-  {
+  typedef typename internal::make_proper_matrix_type<Scalar, ColsAtCompileTime, RowsAtCompileTime, MatrixOptions,
+                                                     MaxColsAtCompileTime, MaxRowsAtCompileTime>::type
+      TransposeTypeWithSameStorageOrder;
+
+  void allocate(const SVDType& svd) {
     if (svd.cols() != m_qr.rows() || svd.rows() != m_qr.cols())
     {
-      m_qr.~QRType();
-      ::new (&m_qr) QRType(svd.cols(), svd.rows());
+      internal::destroy_at(&m_qr);
+      internal::construct_at(&m_qr, svd.cols(), svd.rows());
     }
     if (svd.m_computeFullV) m_workspace.resize(svd.cols());
     else if (svd.m_computeThinV) m_workspace.resize(svd.rows());
     m_adjoint.resize(svd.cols(), svd.rows());
   }
 
-  bool run(JacobiSVD<MatrixType, HouseholderQRPreconditioner>& svd, const MatrixType& matrix)
-  {
+  bool run(SVDType& svd, const MatrixType& matrix) {
     if(matrix.cols() > matrix.rows())
     {
       m_adjoint = matrix.adjoint();
@@ -342,7 +359,7 @@ private:
   typedef HouseholderQR<TransposeTypeWithSameStorageOrder> QRType;
   QRType m_qr;
   TransposeTypeWithSameStorageOrder m_adjoint;
-  typename internal::plain_row_type<MatrixType>::type m_workspace;
+  WorkspaceType m_workspace;
 };
 
 /*** 2x2 SVD implementation
@@ -350,18 +367,16 @@ private:
  *** JacobiSVD consists in performing a series of 2x2 SVD subproblems
  ***/
 
-template<typename MatrixType, int QRPreconditioner>
-struct svd_precondition_2x2_block_to_be_real<MatrixType, QRPreconditioner, false>
-{
-  typedef JacobiSVD<MatrixType, QRPreconditioner> SVD;
+template <typename MatrixType, int Options>
+struct svd_precondition_2x2_block_to_be_real<MatrixType, Options, false> {
+  typedef JacobiSVD<MatrixType, Options> SVD;
   typedef typename MatrixType::RealScalar RealScalar;
   static bool run(typename SVD::WorkMatrixType&, SVD&, Index, Index, RealScalar&) { return true; }
 };
 
-template<typename MatrixType, int QRPreconditioner>
-struct svd_precondition_2x2_block_to_be_real<MatrixType, QRPreconditioner, true>
-{
-  typedef JacobiSVD<MatrixType, QRPreconditioner> SVD;
+template <typename MatrixType, int Options>
+struct svd_precondition_2x2_block_to_be_real<MatrixType, Options, true> {
+  typedef JacobiSVD<MatrixType, Options> SVD;
   typedef typename MatrixType::Scalar Scalar;
   typedef typename MatrixType::RealScalar RealScalar;
   static bool run(typename SVD::WorkMatrixType& work_matrix, SVD& svd, Index p, Index q, RealScalar& maxDiagEntry)
@@ -375,7 +390,7 @@ struct svd_precondition_2x2_block_to_be_real<MatrixType, QRPreconditioner, true>
     const RealScalar considerAsZero = (std::numeric_limits<RealScalar>::min)();
     const RealScalar precision = NumTraits<Scalar>::epsilon();
 
-    if(n==0)
+    if(numext::is_exactly_zero(n))
     {
       // make sure first column is zero
       work_matrix.coeffRef(p,p) = work_matrix.coeffRef(q,p) = Scalar(0);
@@ -423,249 +438,258 @@ struct svd_precondition_2x2_block_to_be_real<MatrixType, QRPreconditioner, true>
   }
 };
 
-template<typename _MatrixType, int QRPreconditioner> 
-struct traits<JacobiSVD<_MatrixType,QRPreconditioner> >
-        : traits<_MatrixType>
-{
-  typedef _MatrixType MatrixType;
+template <typename MatrixType_, int Options>
+struct traits<JacobiSVD<MatrixType_, Options> > : svd_traits<MatrixType_, Options> {
+  typedef MatrixType_ MatrixType;
 };
 
 } // end namespace internal
 
 /** \ingroup SVD_Module
-  *
-  *
-  * \class JacobiSVD
-  *
-  * \brief Two-sided Jacobi SVD decomposition of a rectangular matrix
-  *
-  * \tparam _MatrixType the type of the matrix of which we are computing the SVD decomposition
-  * \tparam QRPreconditioner this optional parameter allows to specify the type of QR decomposition that will be used internally
-  *                        for the R-SVD step for non-square matrices. See discussion of possible values below.
-  *
-  * SVD decomposition consists in decomposing any n-by-p matrix \a A as a product
-  *   \f[ A = U S V^* \f]
-  * where \a U is a n-by-n unitary, \a V is a p-by-p unitary, and \a S is a n-by-p real positive matrix which is zero outside of its main diagonal;
-  * the diagonal entries of S are known as the \em singular \em values of \a A and the columns of \a U and \a V are known as the left
-  * and right \em singular \em vectors of \a A respectively.
-  *
-  * Singular values are always sorted in decreasing order.
-  *
-  * This JacobiSVD decomposition computes only the singular values by default. If you want \a U or \a V, you need to ask for them explicitly.
-  *
-  * You can ask for only \em thin \a U or \a V to be computed, meaning the following. In case of a rectangular n-by-p matrix, letting \a m be the
-  * smaller value among \a n and \a p, there are only \a m singular vectors; the remaining columns of \a U and \a V do not correspond to actual
-  * singular vectors. Asking for \em thin \a U or \a V means asking for only their \a m first columns to be formed. So \a U is then a n-by-m matrix,
-  * and \a V is then a p-by-m matrix. Notice that thin \a U and \a V are all you need for (least squares) solving.
-  *
-  * Here's an example demonstrating basic usage:
-  * \include JacobiSVD_basic.cpp
-  * Output: \verbinclude JacobiSVD_basic.out
-  *
-  * This JacobiSVD class is a two-sided Jacobi R-SVD decomposition, ensuring optimal reliability and accuracy. The downside is that it's slower than
-  * bidiagonalizing SVD algorithms for large square matrices; however its complexity is still \f$ O(n^2p) \f$ where \a n is the smaller dimension and
-  * \a p is the greater dimension, meaning that it is still of the same order of complexity as the faster bidiagonalizing R-SVD algorithms.
-  * In particular, like any R-SVD, it takes advantage of non-squareness in that its complexity is only linear in the greater dimension.
-  *
-  * If the input matrix has inf or nan coefficients, the result of the computation is undefined, but the computation is guaranteed to
-  * terminate in finite (and reasonable) time.
-  *
-  * The possible values for QRPreconditioner are:
-  * \li ColPivHouseholderQRPreconditioner is the default. In practice it's very safe. It uses column-pivoting QR.
-  * \li FullPivHouseholderQRPreconditioner, is the safest and slowest. It uses full-pivoting QR.
-  *     Contrary to other QRs, it doesn't allow computing thin unitaries.
-  * \li HouseholderQRPreconditioner is the fastest, and less safe and accurate than the pivoting variants. It uses non-pivoting QR.
-  *     This is very similar in safety and accuracy to the bidiagonalization process used by bidiagonalizing SVD algorithms (since bidiagonalization
-  *     is inherently non-pivoting). However the resulting SVD is still more reliable than bidiagonalizing SVDs because the Jacobi-based iterarive
-  *     process is more reliable than the optimized bidiagonal SVD iterations.
-  * \li NoQRPreconditioner allows not to use a QR preconditioner at all. This is useful if you know that you will only be computing
-  *     JacobiSVD decompositions of square matrices. Non-square matrices require a QR preconditioner. Using this option will result in
-  *     faster compilation and smaller executable code. It won't significantly speed up computation, since JacobiSVD is always checking
-  *     if QR preconditioning is needed before applying it anyway.
-  *
-  * \sa MatrixBase::jacobiSvd()
-  */
-template<typename _MatrixType, int QRPreconditioner> class JacobiSVD
- : public SVDBase<JacobiSVD<_MatrixType,QRPreconditioner> >
-{
-    typedef SVDBase<JacobiSVD> Base;
-  public:
+ *
+ *
+ * \class JacobiSVD
+ *
+ * \brief Two-sided Jacobi SVD decomposition of a rectangular matrix
+ *
+ * \tparam MatrixType_ the type of the matrix of which we are computing the SVD decomposition
+ * \tparam Options this optional parameter allows one to specify the type of QR decomposition that will be used
+ * internally for the R-SVD step for non-square matrices. Additionally, it allows one to specify whether to compute thin
+ * or full unitaries \a U and \a V. See discussion of possible values below.
+ *
+ * SVD decomposition consists in decomposing any n-by-p matrix \a A as a product
+ *   \f[ A = U S V^* \f]
+ * where \a U is a n-by-n unitary, \a V is a p-by-p unitary, and \a S is a n-by-p real positive matrix which is zero
+ * outside of its main diagonal; the diagonal entries of S are known as the \em singular \em values of \a A and the
+ * columns of \a U and \a V are known as the left and right \em singular \em vectors of \a A respectively.
+ *
+ * Singular values are always sorted in decreasing order.
+ *
+ * This JacobiSVD decomposition computes only the singular values by default. If you want \a U or \a V, you need to ask
+ * for them explicitly.
+ *
+ * You can ask for only \em thin \a U or \a V to be computed, meaning the following. In case of a rectangular n-by-p
+ * matrix, letting \a m be the smaller value among \a n and \a p, there are only \a m singular vectors; the remaining
+ * columns of \a U and \a V do not correspond to actual singular vectors. Asking for \em thin \a U or \a V means asking
+ * for only their \a m first columns to be formed. So \a U is then a n-by-m matrix, and \a V is then a p-by-m matrix.
+ * Notice that thin \a U and \a V are all you need for (least squares) solving.
+ *
+ * Here's an example demonstrating basic usage:
+ * \include JacobiSVD_basic.cpp
+ * Output: \verbinclude JacobiSVD_basic.out
+ *
+ * This JacobiSVD class is a two-sided Jacobi R-SVD decomposition, ensuring optimal reliability and accuracy. The
+ * downside is that it's slower than bidiagonalizing SVD algorithms for large square matrices; however its complexity is
+ * still \f$ O(n^2p) \f$ where \a n is the smaller dimension and \a p is the greater dimension, meaning that it is still
+ * of the same order of complexity as the faster bidiagonalizing R-SVD algorithms. In particular, like any R-SVD, it
+ * takes advantage of non-squareness in that its complexity is only linear in the greater dimension.
+ *
+ * If the input matrix has inf or nan coefficients, the result of the computation is undefined, but the computation is
+ * guaranteed to terminate in finite (and reasonable) time.
+ *
+ * The possible QR preconditioners that can be set with Options template parameter are:
+ * \li ColPivHouseholderQRPreconditioner is the default. In practice it's very safe. It uses column-pivoting QR.
+ * \li FullPivHouseholderQRPreconditioner, is the safest and slowest. It uses full-pivoting QR.
+ *     Contrary to other QRs, it doesn't allow computing thin unitaries.
+ * \li HouseholderQRPreconditioner is the fastest, and less safe and accurate than the pivoting variants. It uses
+ * non-pivoting QR. This is very similar in safety and accuracy to the bidiagonalization process used by bidiagonalizing
+ * SVD algorithms (since bidiagonalization is inherently non-pivoting). However the resulting SVD is still more reliable
+ * than bidiagonalizing SVDs because the Jacobi-based iterarive process is more reliable than the optimized bidiagonal
+ * SVD iterations. \li NoQRPreconditioner allows not to use a QR preconditioner at all. This is useful if you know that
+ * you will only be computing JacobiSVD decompositions of square matrices. Non-square matrices require a QR
+ * preconditioner. Using this option will result in faster compilation and smaller executable code. It won't
+ * significantly speed up computation, since JacobiSVD is always checking if QR preconditioning is needed before
+ * applying it anyway.
+ *
+ * One may also use the Options template parameter to specify how the unitaries should be computed. The options are
+ * #ComputeThinU, #ComputeThinV, #ComputeFullU, #ComputeFullV. It is not possible to request both the thin and full
+ * versions of a unitary. By default, unitaries will not be computed.
+ *
+ * You can set the QRPreconditioner and unitary options together: JacobiSVD<MatrixType,
+ * ColPivHouseholderQRPreconditioner | ComputeThinU | ComputeFullV>
+ *
+ * \sa MatrixBase::jacobiSvd()
+ */
+template <typename MatrixType_, int Options_>
+class JacobiSVD : public SVDBase<JacobiSVD<MatrixType_, Options_> > {
+  typedef SVDBase<JacobiSVD> Base;
 
-    typedef _MatrixType MatrixType;
-    typedef typename MatrixType::Scalar Scalar;
-    typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar;
-    enum {
-      RowsAtCompileTime = MatrixType::RowsAtCompileTime,
-      ColsAtCompileTime = MatrixType::ColsAtCompileTime,
-      DiagSizeAtCompileTime = EIGEN_SIZE_MIN_PREFER_DYNAMIC(RowsAtCompileTime,ColsAtCompileTime),
-      MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
-      MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
-      MaxDiagSizeAtCompileTime = EIGEN_SIZE_MIN_PREFER_FIXED(MaxRowsAtCompileTime,MaxColsAtCompileTime),
-      MatrixOptions = MatrixType::Options
-    };
+ public:
+  typedef MatrixType_ MatrixType;
+  typedef typename Base::Scalar Scalar;
+  typedef typename Base::RealScalar RealScalar;
+  typedef typename Base::Index Index;
+  enum {
+    Options = Options_,
+    QRPreconditioner = internal::get_qr_preconditioner(Options),
+    RowsAtCompileTime = Base::RowsAtCompileTime,
+    ColsAtCompileTime = Base::ColsAtCompileTime,
+    DiagSizeAtCompileTime = Base::DiagSizeAtCompileTime,
+    MaxRowsAtCompileTime = Base::MaxRowsAtCompileTime,
+    MaxColsAtCompileTime = Base::MaxColsAtCompileTime,
+    MaxDiagSizeAtCompileTime = Base::MaxDiagSizeAtCompileTime,
+    MatrixOptions = Base::MatrixOptions
+  };
 
-    typedef typename Base::MatrixUType MatrixUType;
-    typedef typename Base::MatrixVType MatrixVType;
-    typedef typename Base::SingularValuesType SingularValuesType;
-    
-    typedef typename internal::plain_row_type<MatrixType>::type RowType;
-    typedef typename internal::plain_col_type<MatrixType>::type ColType;
-    typedef Matrix<Scalar, DiagSizeAtCompileTime, DiagSizeAtCompileTime,
-                   MatrixOptions, MaxDiagSizeAtCompileTime, MaxDiagSizeAtCompileTime>
-            WorkMatrixType;
+  typedef typename Base::MatrixUType MatrixUType;
+  typedef typename Base::MatrixVType MatrixVType;
+  typedef typename Base::SingularValuesType SingularValuesType;
+  typedef Matrix<Scalar, DiagSizeAtCompileTime, DiagSizeAtCompileTime, MatrixOptions, MaxDiagSizeAtCompileTime,
+                 MaxDiagSizeAtCompileTime>
+      WorkMatrixType;
 
-    /** \brief Default Constructor.
-      *
-      * The default constructor is useful in cases in which the user intends to
-      * perform decompositions via JacobiSVD::compute(const MatrixType&).
-      */
-    JacobiSVD()
-    {}
+  /** \brief Default Constructor.
+   *
+   * The default constructor is useful in cases in which the user intends to
+   * perform decompositions via JacobiSVD::compute(const MatrixType&).
+   */
+  JacobiSVD() {}
 
+  /** \brief Default Constructor with memory preallocation
+   *
+   * Like the default constructor but with preallocation of the internal data
+   * according to the specified problem size and \a Options template parameter.
+   *
+   * \sa JacobiSVD()
+   */
+  JacobiSVD(Index rows, Index cols) { allocate(rows, cols, internal::get_computation_options(Options)); }
 
-    /** \brief Default Constructor with memory preallocation
-      *
-      * Like the default constructor but with preallocation of the internal data
-      * according to the specified problem size.
-      * \sa JacobiSVD()
-      */
-    JacobiSVD(Index rows, Index cols, unsigned int computationOptions = 0)
-    {
-      allocate(rows, cols, computationOptions);
-    }
+  /** \brief Default Constructor with memory preallocation
+   *
+   * Like the default constructor but with preallocation of the internal data
+   * according to the specified problem size.
+   *
+   * One \b cannot request unitaries using both the \a Options template parameter
+   * and the constructor. If possible, prefer using the \a Options template parameter.
+   *
+   * \param computationOptions specify whether to compute Thin/Full unitaries U/V
+   * \sa JacobiSVD()
+   *
+   * \deprecated Will be removed in the next major Eigen version. Options should
+   * be specified in the \a Options template parameter.
+   */
+  EIGEN_DEPRECATED
+  JacobiSVD(Index rows, Index cols, unsigned int computationOptions) {
+    internal::check_svd_options_assertions<MatrixType, Options>(computationOptions, rows, cols);
+    allocate(rows, cols, computationOptions);
+  }
 
-    /** \brief Constructor performing the decomposition of given matrix.
-     *
-     * \param matrix the matrix to decompose
-     * \param computationOptions optional parameter allowing to specify if you want full or thin U or V unitaries to be computed.
-     *                           By default, none is computed. This is a bit-field, the possible bits are #ComputeFullU, #ComputeThinU,
-     *                           #ComputeFullV, #ComputeThinV.
-     *
-     * Thin unitaries are only available if your matrix type has a Dynamic number of columns (for example MatrixXf). They also are not
-     * available with the (non-default) FullPivHouseholderQR preconditioner.
-     */
-    explicit JacobiSVD(const MatrixType& matrix, unsigned int computationOptions = 0)
-    {
-      compute(matrix, computationOptions);
-    }
+  /** \brief Constructor performing the decomposition of given matrix, using the custom options specified
+   *         with the \a Options template paramter.
+   *
+   * \param matrix the matrix to decompose
+   */
+  explicit JacobiSVD(const MatrixType& matrix) { compute_impl(matrix, internal::get_computation_options(Options)); }
 
-    /** \brief Method performing the decomposition of given matrix using custom options.
-     *
-     * \param matrix the matrix to decompose
-     * \param computationOptions optional parameter allowing to specify if you want full or thin U or V unitaries to be computed.
-     *                           By default, none is computed. This is a bit-field, the possible bits are #ComputeFullU, #ComputeThinU,
-     *                           #ComputeFullV, #ComputeThinV.
-     *
-     * Thin unitaries are only available if your matrix type has a Dynamic number of columns (for example MatrixXf). They also are not
-     * available with the (non-default) FullPivHouseholderQR preconditioner.
-     */
-    JacobiSVD& compute(const MatrixType& matrix, unsigned int computationOptions);
+  /** \brief Constructor performing the decomposition of given matrix using specified options
+   *         for computing unitaries.
+   *
+   *  One \b cannot request unitiaries using both the \a Options template parameter
+   *  and the constructor. If possible, prefer using the \a Options template parameter.
+   *
+   * \param matrix the matrix to decompose
+   * \param computationOptions specify whether to compute Thin/Full unitaries U/V
+   *
+   * \deprecated Will be removed in the next major Eigen version. Options should
+   * be specified in the \a Options template parameter.
+   */
+  // EIGEN_DEPRECATED // TODO(cantonios): re-enable after fixing a few 3p libraries that error on deprecation warnings.
+  JacobiSVD(const MatrixType& matrix, unsigned int computationOptions) {
+    internal::check_svd_options_assertions<MatrixType, Options>(computationOptions, matrix.rows(), matrix.cols());
+    compute_impl(matrix, computationOptions);
+  }
 
-    /** \brief Method performing the decomposition of given matrix using current options.
-     *
-     * \param matrix the matrix to decompose
-     *
-     * This method uses the current \a computationOptions, as already passed to the constructor or to compute(const MatrixType&, unsigned int).
-     */
-    JacobiSVD& compute(const MatrixType& matrix)
-    {
-      return compute(matrix, m_computationOptions);
-    }
+  /** \brief Method performing the decomposition of given matrix. Computes Thin/Full unitaries U/V if specified
+   *         using the \a Options template parameter or the class constructor.
+   *
+   * \param matrix the matrix to decompose
+   */
+  JacobiSVD& compute(const MatrixType& matrix) { return compute_impl(matrix, m_computationOptions); }
 
-    using Base::computeU;
-    using Base::computeV;
-    using Base::rows;
-    using Base::cols;
-    using Base::rank;
+  /** \brief Method performing the decomposition of given matrix, as specified by
+   *         the `computationOptions` parameter.
+   *
+   * \param matrix the matrix to decompose
+   * \param computationOptions specify whether to compute Thin/Full unitaries U/V
+   * 
+   * \deprecated Will be removed in the next major Eigen version. Options should
+   * be specified in the \a Options template parameter.
+   */
+  EIGEN_DEPRECATED
+  JacobiSVD& compute(const MatrixType& matrix, unsigned int computationOptions) {
+    internal::check_svd_options_assertions<MatrixType, Options>(m_computationOptions, matrix.rows(), matrix.cols());
+    return compute_impl(matrix, computationOptions);
+  }
 
-  private:
-    void allocate(Index rows, Index cols, unsigned int computationOptions);
+  using Base::computeU;
+  using Base::computeV;
+  using Base::rows;
+  using Base::cols;
+  using Base::rank;
 
-  protected:
-    using Base::m_matrixU;
-    using Base::m_matrixV;
-    using Base::m_singularValues;
-    using Base::m_info;
-    using Base::m_isInitialized;
-    using Base::m_isAllocated;
-    using Base::m_usePrescribedThreshold;
-    using Base::m_computeFullU;
-    using Base::m_computeThinU;
-    using Base::m_computeFullV;
-    using Base::m_computeThinV;
-    using Base::m_computationOptions;
-    using Base::m_nonzeroSingularValues;
-    using Base::m_rows;
-    using Base::m_cols;
-    using Base::m_diagSize;
-    using Base::m_prescribedThreshold;
-    WorkMatrixType m_workMatrix;
+ private:
+  void allocate(Index rows, Index cols, unsigned int computationOptions);
+  JacobiSVD& compute_impl(const MatrixType& matrix, unsigned int computationOptions);
 
-    template<typename __MatrixType, int _QRPreconditioner, bool _IsComplex>
-    friend struct internal::svd_precondition_2x2_block_to_be_real;
-    template<typename __MatrixType, int _QRPreconditioner, int _Case, bool _DoAnything>
-    friend struct internal::qr_preconditioner_impl;
+ protected:
+  using Base::m_cols;
+  using Base::m_computationOptions;
+  using Base::m_computeFullU;
+  using Base::m_computeFullV;
+  using Base::m_computeThinU;
+  using Base::m_computeThinV;
+  using Base::m_diagSize;
+  using Base::m_info;
+  using Base::m_isAllocated;
+  using Base::m_isInitialized;
+  using Base::m_matrixU;
+  using Base::m_matrixV;
+  using Base::m_nonzeroSingularValues;
+  using Base::m_prescribedThreshold;
+  using Base::m_rows;
+  using Base::m_singularValues;
+  using Base::m_usePrescribedThreshold;
+  using Base::ShouldComputeThinU;
+  using Base::ShouldComputeThinV;
 
-    internal::qr_preconditioner_impl<MatrixType, QRPreconditioner, internal::PreconditionIfMoreColsThanRows> m_qr_precond_morecols;
-    internal::qr_preconditioner_impl<MatrixType, QRPreconditioner, internal::PreconditionIfMoreRowsThanCols> m_qr_precond_morerows;
-    MatrixType m_scaledMatrix;
+  EIGEN_STATIC_ASSERT(!(ShouldComputeThinU && int(QRPreconditioner) == int(FullPivHouseholderQRPreconditioner)) &&
+                          !(ShouldComputeThinU && int(QRPreconditioner) == int(FullPivHouseholderQRPreconditioner)),
+                      "JacobiSVD: can't compute thin U or thin V with the FullPivHouseholderQR preconditioner. "
+                      "Use the ColPivHouseholderQR preconditioner instead.")
+
+  template <typename MatrixType__, int Options__, bool IsComplex_>
+  friend struct internal::svd_precondition_2x2_block_to_be_real;
+  template <typename MatrixType__, int Options__, int QRPreconditioner_, int Case_, bool DoAnything_>
+  friend struct internal::qr_preconditioner_impl;
+
+  internal::qr_preconditioner_impl<MatrixType, Options, QRPreconditioner, internal::PreconditionIfMoreColsThanRows>
+      m_qr_precond_morecols;
+  internal::qr_preconditioner_impl<MatrixType, Options, QRPreconditioner, internal::PreconditionIfMoreRowsThanCols>
+      m_qr_precond_morerows;
+  WorkMatrixType m_workMatrix;
+  MatrixType m_scaledMatrix;
 };
 
-template<typename MatrixType, int QRPreconditioner>
-void JacobiSVD<MatrixType, QRPreconditioner>::allocate(Eigen::Index rows, Eigen::Index cols, unsigned int computationOptions)
-{
-  eigen_assert(rows >= 0 && cols >= 0);
+template <typename MatrixType, int Options>
+void JacobiSVD<MatrixType, Options>::allocate(Index rows, Index cols, unsigned int computationOptions) {
+  if (Base::allocate(rows, cols, computationOptions)) return;
 
-  if (m_isAllocated &&
-      rows == m_rows &&
-      cols == m_cols &&
-      computationOptions == m_computationOptions)
-  {
-    return;
-  }
+  eigen_assert(!(ShouldComputeThinU && int(QRPreconditioner) == int(FullPivHouseholderQRPreconditioner)) &&
+               !(ShouldComputeThinU && int(QRPreconditioner) == int(FullPivHouseholderQRPreconditioner)) &&
+               "JacobiSVD: can't compute thin U or thin V with the FullPivHouseholderQR preconditioner. "
+               "Use the ColPivHouseholderQR preconditioner instead.");
 
-  m_rows = rows;
-  m_cols = cols;
-  m_info = Success;
-  m_isInitialized = false;
-  m_isAllocated = true;
-  m_computationOptions = computationOptions;
-  m_computeFullU = (computationOptions & ComputeFullU) != 0;
-  m_computeThinU = (computationOptions & ComputeThinU) != 0;
-  m_computeFullV = (computationOptions & ComputeFullV) != 0;
-  m_computeThinV = (computationOptions & ComputeThinV) != 0;
-  eigen_assert(!(m_computeFullU && m_computeThinU) && "JacobiSVD: you can't ask for both full and thin U");
-  eigen_assert(!(m_computeFullV && m_computeThinV) && "JacobiSVD: you can't ask for both full and thin V");
-  eigen_assert(EIGEN_IMPLIES(m_computeThinU || m_computeThinV, MatrixType::ColsAtCompileTime==Dynamic) &&
-              "JacobiSVD: thin U and V are only available when your matrix has a dynamic number of columns.");
-  if (QRPreconditioner == FullPivHouseholderQRPreconditioner)
-  {
-      eigen_assert(!(m_computeThinU || m_computeThinV) &&
-              "JacobiSVD: can't compute thin U or thin V with the FullPivHouseholderQR preconditioner. "
-              "Use the ColPivHouseholderQR preconditioner instead.");
-  }
-  m_diagSize = (std::min)(m_rows, m_cols);
-  m_singularValues.resize(m_diagSize);
-  if(RowsAtCompileTime==Dynamic)
-    m_matrixU.resize(m_rows, m_computeFullU ? m_rows
-                            : m_computeThinU ? m_diagSize
-                            : 0);
-  if(ColsAtCompileTime==Dynamic)
-    m_matrixV.resize(m_cols, m_computeFullV ? m_cols
-                            : m_computeThinV ? m_diagSize
-                            : 0);
   m_workMatrix.resize(m_diagSize, m_diagSize);
-  
   if(m_cols>m_rows)   m_qr_precond_morecols.allocate(*this);
   if(m_rows>m_cols)   m_qr_precond_morerows.allocate(*this);
   if(m_rows!=m_cols)  m_scaledMatrix.resize(rows,cols);
 }
 
-template<typename MatrixType, int QRPreconditioner>
-JacobiSVD<MatrixType, QRPreconditioner>&
-JacobiSVD<MatrixType, QRPreconditioner>::compute(const MatrixType& matrix, unsigned int computationOptions)
-{
+template <typename MatrixType, int Options>
+JacobiSVD<MatrixType, Options>& JacobiSVD<MatrixType, Options>::compute_impl(const MatrixType& matrix,
+                                                                             unsigned int computationOptions) {
   using std::abs;
+
   allocate(matrix.rows(), matrix.cols(), computationOptions);
 
   // currently we stop when we reach precision 2*epsilon as the last bit of precision can require an unreasonable number of iterations,
@@ -682,7 +706,7 @@ JacobiSVD<MatrixType, QRPreconditioner>::compute(const MatrixType& matrix, unsig
     m_info = InvalidInput;
     return *this;
   }
-  if(scale==RealScalar(0)) scale = RealScalar(1);
+  if(numext::is_exactly_zero(scale)) scale = RealScalar(1);
   
   /*** step 1. The R-SVD step: we use a QR decomposition to reduce to the case of a square matrix */
 
@@ -724,8 +748,8 @@ JacobiSVD<MatrixType, QRPreconditioner>::compute(const MatrixType& matrix, unsig
           finished = false;
           // perform SVD decomposition of 2x2 sub-matrix corresponding to indices p,q to make it diagonal
           // the complex to real operation returns true if the updated 2x2 block is not already diagonal
-          if(internal::svd_precondition_2x2_block_to_be_real<MatrixType, QRPreconditioner>::run(m_workMatrix, *this, p, q, maxDiagEntry))
-          {
+          if (internal::svd_precondition_2x2_block_to_be_real<MatrixType, Options>::run(m_workMatrix, *this, p, q,
+                                                                                        maxDiagEntry)) {
             JacobiRotation<RealScalar> j_left, j_right;
             internal::real_2x2_jacobi_svd(m_workMatrix, p, q, &j_left, &j_right);
 
@@ -775,7 +799,7 @@ JacobiSVD<MatrixType, QRPreconditioner>::compute(const MatrixType& matrix, unsig
   {
     Index pos;
     RealScalar maxRemainingSingularValue = m_singularValues.tail(m_diagSize-i).maxCoeff(&pos);
-    if(maxRemainingSingularValue == RealScalar(0))
+    if(numext::is_exactly_zero(maxRemainingSingularValue))
     {
       m_nonzeroSingularValues = i;
       break;
@@ -800,13 +824,19 @@ JacobiSVD<MatrixType, QRPreconditioner>::compute(const MatrixType& matrix, unsig
   *
   * \sa class JacobiSVD
   */
-template<typename Derived>
-JacobiSVD<typename MatrixBase<Derived>::PlainObject>
-MatrixBase<Derived>::jacobiSvd(unsigned int computationOptions) const
-{
-  return JacobiSVD<PlainObject>(*this, computationOptions);
+template <typename Derived>
+template <int Options>
+JacobiSVD<typename MatrixBase<Derived>::PlainObject, Options> MatrixBase<Derived>::jacobiSvd() const {
+  return JacobiSVD<PlainObject, Options>(*this);
 }
 
-} // end namespace Eigen
+template <typename Derived>
+template <int Options>
+JacobiSVD<typename MatrixBase<Derived>::PlainObject, Options> MatrixBase<Derived>::jacobiSvd(
+    unsigned int computationOptions) const {
+  return JacobiSVD<PlainObject, Options>(*this, computationOptions);
+}
+
+}  // end namespace Eigen
 
 #endif // EIGEN_JACOBISVD_H
diff --git a/libs/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h b/libs/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h
index ff0516f..93244cd 100644
--- a/libs/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h
+++ b/libs/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h
@@ -33,14 +33,17 @@
 #ifndef EIGEN_JACOBISVD_LAPACKE_H
 #define EIGEN_JACOBISVD_LAPACKE_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 /** \internal Specialization for the data types supported by LAPACKe */
 
-#define EIGEN_LAPACKE_SVD(EIGTYPE, LAPACKE_TYPE, LAPACKE_RTYPE, LAPACKE_PREFIX, EIGCOLROW, LAPACKE_COLROW) \
+#define EIGEN_LAPACKE_SVD(EIGTYPE, LAPACKE_TYPE, LAPACKE_RTYPE, LAPACKE_PREFIX, EIGCOLROW, LAPACKE_COLROW, OPTIONS) \
 template<> inline \
-JacobiSVD<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic>, ColPivHouseholderQRPreconditioner>& \
-JacobiSVD<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic>, ColPivHouseholderQRPreconditioner>::compute(const Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic>& matrix, unsigned int computationOptions) \
+JacobiSVD<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic>, OPTIONS>& \
+JacobiSVD<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic>, OPTIONS>::compute_impl(const Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic>& matrix, \
+                                                                                                 unsigned int computationOptions) \
 { \
   typedef Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic> MatrixType; \
   /*typedef MatrixType::Scalar Scalar;*/ \
@@ -69,22 +72,41 @@ JacobiSVD<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic>, ColPiv
   } else { ldvt=1; vt=&dummy; }\
   Matrix<LAPACKE_RTYPE, Dynamic, Dynamic> superb; superb.resize(m_diagSize, 1); \
   MatrixType m_temp; m_temp = matrix; \
-  LAPACKE_##LAPACKE_PREFIX##gesvd( matrix_order, jobu, jobvt, internal::convert_index<lapack_int>(m_rows), internal::convert_index<lapack_int>(m_cols), (LAPACKE_TYPE*)m_temp.data(), lda, (LAPACKE_RTYPE*)m_singularValues.data(), u, ldu, vt, ldvt, superb.data()); \
-  if (computeV()) m_matrixV = localV.adjoint(); \
+  lapack_int info = LAPACKE_##LAPACKE_PREFIX##gesvd( matrix_order, jobu, jobvt, internal::convert_index<lapack_int>(m_rows), internal::convert_index<lapack_int>(m_cols), (LAPACKE_TYPE*)m_temp.data(), lda, (LAPACKE_RTYPE*)m_singularValues.data(), u, ldu, vt, ldvt, superb.data()); \
+  /* Check the result of the LAPACK call */ \
+  if (info < 0 || !m_singularValues.allFinite()) { \
+    m_info = InvalidInput; \
+  } else if (info > 0 ) { \
+    m_info = NoConvergence; \
+  } else { \
+    m_info = Success; \
+    if (computeV()) m_matrixV = localV.adjoint(); \
+  } \
  /* for(int i=0;i<m_diagSize;i++) if (m_singularValues.coeffRef(i) < precision) { m_nonzeroSingularValues--; m_singularValues.coeffRef(i)=RealScalar(0);}*/ \
   m_isInitialized = true; \
   return *this; \
 }
 
-EIGEN_LAPACKE_SVD(double,   double,                double, d, ColMajor, LAPACK_COL_MAJOR)
-EIGEN_LAPACKE_SVD(float,    float,                 float , s, ColMajor, LAPACK_COL_MAJOR)
-EIGEN_LAPACKE_SVD(dcomplex, lapack_complex_double, double, z, ColMajor, LAPACK_COL_MAJOR)
-EIGEN_LAPACKE_SVD(scomplex, lapack_complex_float,  float , c, ColMajor, LAPACK_COL_MAJOR)
+#define EIGEN_LAPACK_SVD_OPTIONS(OPTIONS) \
+  EIGEN_LAPACKE_SVD(double,   double,                double, d, ColMajor, LAPACK_COL_MAJOR, OPTIONS) \
+  EIGEN_LAPACKE_SVD(float,    float,                 float , s, ColMajor, LAPACK_COL_MAJOR, OPTIONS) \
+  EIGEN_LAPACKE_SVD(dcomplex, lapack_complex_double, double, z, ColMajor, LAPACK_COL_MAJOR, OPTIONS) \
+  EIGEN_LAPACKE_SVD(scomplex, lapack_complex_float,  float , c, ColMajor, LAPACK_COL_MAJOR, OPTIONS) \
+\
+  EIGEN_LAPACKE_SVD(double,   double,                double, d, RowMajor, LAPACK_ROW_MAJOR, OPTIONS) \
+  EIGEN_LAPACKE_SVD(float,    float,                 float , s, RowMajor, LAPACK_ROW_MAJOR, OPTIONS) \
+  EIGEN_LAPACKE_SVD(dcomplex, lapack_complex_double, double, z, RowMajor, LAPACK_ROW_MAJOR, OPTIONS) \
+  EIGEN_LAPACKE_SVD(scomplex, lapack_complex_float,  float , c, RowMajor, LAPACK_ROW_MAJOR, OPTIONS)
 
-EIGEN_LAPACKE_SVD(double,   double,                double, d, RowMajor, LAPACK_ROW_MAJOR)
-EIGEN_LAPACKE_SVD(float,    float,                 float , s, RowMajor, LAPACK_ROW_MAJOR)
-EIGEN_LAPACKE_SVD(dcomplex, lapack_complex_double, double, z, RowMajor, LAPACK_ROW_MAJOR)
-EIGEN_LAPACKE_SVD(scomplex, lapack_complex_float,  float , c, RowMajor, LAPACK_ROW_MAJOR)
+EIGEN_LAPACK_SVD_OPTIONS(0)
+EIGEN_LAPACK_SVD_OPTIONS(ComputeThinU)
+EIGEN_LAPACK_SVD_OPTIONS(ComputeThinV)
+EIGEN_LAPACK_SVD_OPTIONS(ComputeFullU)
+EIGEN_LAPACK_SVD_OPTIONS(ComputeFullV)
+EIGEN_LAPACK_SVD_OPTIONS(ComputeThinU | ComputeThinV)
+EIGEN_LAPACK_SVD_OPTIONS(ComputeFullU | ComputeFullV)
+EIGEN_LAPACK_SVD_OPTIONS(ComputeThinU | ComputeFullV)
+EIGEN_LAPACK_SVD_OPTIONS(ComputeFullU | ComputeThinV)
 
 } // end namespace Eigen
 
diff --git a/libs/eigen/Eigen/src/SVD/SVDBase.h b/libs/eigen/Eigen/src/SVD/SVDBase.h
index bc7ab88..f01c7a9 100644
--- a/libs/eigen/Eigen/src/SVD/SVDBase.h
+++ b/libs/eigen/Eigen/src/SVD/SVDBase.h
@@ -16,9 +16,42 @@
 #ifndef EIGEN_SVDBASE_H
 #define EIGEN_SVDBASE_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
+
+enum OptionsMasks {
+  QRPreconditionerBits = NoQRPreconditioner | HouseholderQRPreconditioner | ColPivHouseholderQRPreconditioner |
+                         FullPivHouseholderQRPreconditioner,
+  ComputationOptionsBits = ComputeThinU | ComputeFullU | ComputeThinV | ComputeFullV
+};
+
+constexpr int get_qr_preconditioner(int options) { return options & QRPreconditionerBits; }
+
+constexpr int get_computation_options(int options) { return options & ComputationOptionsBits; }
+
+constexpr bool should_svd_compute_thin_u(int options) { return (options & ComputeThinU) != 0; }
+constexpr bool should_svd_compute_full_u(int options) { return (options & ComputeFullU) != 0; }
+constexpr bool should_svd_compute_thin_v(int options) { return (options & ComputeThinV) != 0; }
+constexpr bool should_svd_compute_full_v(int options) { return (options & ComputeFullV) != 0; }
+
+template<typename MatrixType, int Options>
+void check_svd_options_assertions(unsigned int computationOptions, Index rows, Index cols) {
+  EIGEN_STATIC_ASSERT((Options & ComputationOptionsBits) == 0,
+                      "SVDBase: Cannot request U or V using both static and runtime options, even if they match. "
+                      "Requesting unitaries at runtime is DEPRECATED: "
+                      "Prefer requesting unitaries statically, using the Options template parameter.");
+  eigen_assert(!(should_svd_compute_thin_u(computationOptions) && cols < rows && MatrixType::RowsAtCompileTime != Dynamic) &&
+               !(should_svd_compute_thin_v(computationOptions) && rows < cols && MatrixType::ColsAtCompileTime != Dynamic) &&
+               "SVDBase: If thin U is requested at runtime, your matrix must have more rows than columns or a dynamic number of rows."
+               "Similarly, if thin V is requested at runtime, you matrix must have more columns than rows or a dynamic number of columns.");
+  (void)computationOptions;
+  (void)rows;
+  (void)cols;
+}
+
 template<typename Derived> struct traits<SVDBase<Derived> >
  : traits<Derived>
 {
@@ -27,6 +60,29 @@ template<typename Derived> struct traits<SVDBase<Derived> >
   typedef int StorageIndex;
   enum { Flags = 0 };
 };
+
+template <typename MatrixType, int Options_>
+struct svd_traits : traits<MatrixType> {
+  static constexpr int Options = Options_;
+  static constexpr bool ShouldComputeFullU = internal::should_svd_compute_full_u(Options);
+  static constexpr bool ShouldComputeThinU = internal::should_svd_compute_thin_u(Options);
+  static constexpr bool ShouldComputeFullV = internal::should_svd_compute_full_v(Options);
+  static constexpr bool ShouldComputeThinV = internal::should_svd_compute_thin_v(Options);
+  enum {
+    DiagSizeAtCompileTime =
+        internal::min_size_prefer_dynamic(MatrixType::RowsAtCompileTime, MatrixType::ColsAtCompileTime),
+    MaxDiagSizeAtCompileTime =
+        internal::min_size_prefer_dynamic(MatrixType::MaxRowsAtCompileTime, MatrixType::MaxColsAtCompileTime),
+    MatrixUColsAtCompileTime = ShouldComputeThinU ? DiagSizeAtCompileTime
+                                                  : MatrixType::RowsAtCompileTime,
+    MatrixVColsAtCompileTime = ShouldComputeThinV ? DiagSizeAtCompileTime
+                                                  : MatrixType::ColsAtCompileTime,
+    MatrixUMaxColsAtCompileTime = ShouldComputeThinU ? MaxDiagSizeAtCompileTime
+                                                     : MatrixType::MaxRowsAtCompileTime,
+    MatrixVMaxColsAtCompileTime = ShouldComputeThinV ? MaxDiagSizeAtCompileTime
+                                                     : MatrixType::MaxColsAtCompileTime
+  };
+};
 }
 
 /** \ingroup SVD_Module
@@ -52,7 +108,7 @@ template<typename Derived> struct traits<SVDBase<Derived> >
  * singular vectors. Asking for \em thin \a U or \a V means asking for only their \a m first columns to be formed. So \a U is then a n-by-m matrix,
  * and \a V is then a p-by-m matrix. Notice that thin \a U and \a V are all you need for (least squares) solving.
  * 
- * The status of the computation can be retrived using the \a info() method. Unless \a info() returns \a Success, the results should be not
+ * The status of the computation can be retrieved using the \a info() method. Unless \a info() returns \a Success, the results should be not
  * considered well defined.
  *  
  * If the input matrix has inf or nan coefficients, the result of the computation is undefined, and \a info() will return \a InvalidInput, but the computation is guaranteed to
@@ -72,20 +128,38 @@ public:
   typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar;
   typedef typename Eigen::internal::traits<SVDBase>::StorageIndex StorageIndex;
   typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3
+
+  static constexpr bool ShouldComputeFullU = internal::traits<Derived>::ShouldComputeFullU;
+  static constexpr bool ShouldComputeThinU = internal::traits<Derived>::ShouldComputeThinU;
+  static constexpr bool ShouldComputeFullV = internal::traits<Derived>::ShouldComputeFullV;
+  static constexpr bool ShouldComputeThinV = internal::traits<Derived>::ShouldComputeThinV;
+
   enum {
     RowsAtCompileTime = MatrixType::RowsAtCompileTime,
     ColsAtCompileTime = MatrixType::ColsAtCompileTime,
-    DiagSizeAtCompileTime = EIGEN_SIZE_MIN_PREFER_DYNAMIC(RowsAtCompileTime,ColsAtCompileTime),
+    DiagSizeAtCompileTime = internal::min_size_prefer_dynamic(RowsAtCompileTime, ColsAtCompileTime),
     MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
     MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
-    MaxDiagSizeAtCompileTime = EIGEN_SIZE_MIN_PREFER_FIXED(MaxRowsAtCompileTime,MaxColsAtCompileTime),
-    MatrixOptions = MatrixType::Options
+    MaxDiagSizeAtCompileTime = internal::min_size_prefer_fixed(MaxRowsAtCompileTime, MaxColsAtCompileTime),
+    MatrixOptions = MatrixType::Options,
+    MatrixUColsAtCompileTime = internal::traits<Derived>::MatrixUColsAtCompileTime,
+    MatrixVColsAtCompileTime = internal::traits<Derived>::MatrixVColsAtCompileTime,
+    MatrixUMaxColsAtCompileTime = internal::traits<Derived>::MatrixUMaxColsAtCompileTime,
+    MatrixVMaxColsAtCompileTime = internal::traits<Derived>::MatrixVMaxColsAtCompileTime
   };
 
-  typedef Matrix<Scalar, RowsAtCompileTime, RowsAtCompileTime, MatrixOptions, MaxRowsAtCompileTime, MaxRowsAtCompileTime> MatrixUType;
-  typedef Matrix<Scalar, ColsAtCompileTime, ColsAtCompileTime, MatrixOptions, MaxColsAtCompileTime, MaxColsAtCompileTime> MatrixVType;
+  EIGEN_STATIC_ASSERT(!(ShouldComputeFullU && ShouldComputeThinU), "SVDBase: Cannot request both full and thin U")
+  EIGEN_STATIC_ASSERT(!(ShouldComputeFullV && ShouldComputeThinV), "SVDBase: Cannot request both full and thin V")
+
+  typedef
+      typename internal::make_proper_matrix_type<Scalar, RowsAtCompileTime, MatrixUColsAtCompileTime, MatrixOptions,
+                                                 MaxRowsAtCompileTime, MatrixUMaxColsAtCompileTime>::type MatrixUType;
+  typedef
+      typename internal::make_proper_matrix_type<Scalar, ColsAtCompileTime, MatrixVColsAtCompileTime, MatrixOptions,
+                                                 MaxColsAtCompileTime, MatrixVMaxColsAtCompileTime>::type MatrixVType;
+
   typedef typename internal::plain_diag_type<MatrixType, RealScalar>::type SingularValuesType;
-  
+
   Derived& derived() { return *static_cast<Derived*>(this); }
   const Derived& derived() const { return *static_cast<const Derived*>(this); }
 
@@ -249,10 +323,7 @@ public:
 
 protected:
 
-  static void check_template_parameters()
-  {
-    EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
-  }
+  EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
 
   void _check_compute_assertions() const {
     eigen_assert(m_isInitialized && "SVD is not initialized.");
@@ -267,7 +338,7 @@ protected:
   }
 
   // return true if already allocated
-  bool allocate(Index rows, Index cols, unsigned int computationOptions) ;
+  bool allocate(Index rows, Index cols, unsigned int computationOptions);
 
   MatrixUType m_matrixU;
   MatrixVType m_matrixV;
@@ -285,21 +356,18 @@ protected:
    * Default constructor of SVDBase
    */
   SVDBase()
-    : m_info(Success),
-      m_isInitialized(false),
-      m_isAllocated(false),
-      m_usePrescribedThreshold(false),
-      m_computeFullU(false),
-      m_computeThinU(false),
-      m_computeFullV(false),
-      m_computeThinV(false),
-      m_computationOptions(0),
-      m_rows(-1), m_cols(-1), m_diagSize(0)
-  {
-    check_template_parameters();
-  }
-
-
+      : m_info(Success),
+        m_isInitialized(false),
+        m_isAllocated(false),
+        m_usePrescribedThreshold(false),
+        m_computeFullU(false),
+        m_computeThinU(false),
+        m_computeFullV(false),
+        m_computeThinV(false),
+        m_computationOptions(0),
+        m_rows(-1),
+        m_cols(-1),
+        m_diagSize(0) {}
 };
 
 #ifndef EIGEN_PARSED_BY_DOXYGEN
@@ -333,9 +401,8 @@ void SVDBase<Derived>::_solve_impl_transposed(const RhsType &rhs, DstType &dst)
 }
 #endif
 
-template<typename MatrixType>
-bool SVDBase<MatrixType>::allocate(Index rows, Index cols, unsigned int computationOptions)
-{
+template <typename Derived>
+bool SVDBase<Derived>::allocate(Index rows, Index cols, unsigned int computationOptions) {
   eigen_assert(rows >= 0 && cols >= 0);
 
   if (m_isAllocated &&
@@ -352,14 +419,13 @@ bool SVDBase<MatrixType>::allocate(Index rows, Index cols, unsigned int computat
   m_isInitialized = false;
   m_isAllocated = true;
   m_computationOptions = computationOptions;
-  m_computeFullU = (computationOptions & ComputeFullU) != 0;
-  m_computeThinU = (computationOptions & ComputeThinU) != 0;
-  m_computeFullV = (computationOptions & ComputeFullV) != 0;
-  m_computeThinV = (computationOptions & ComputeThinV) != 0;
+  m_computeFullU = ShouldComputeFullU || internal::should_svd_compute_full_u(computationOptions);
+  m_computeThinU = ShouldComputeThinU || internal::should_svd_compute_thin_u(computationOptions);
+  m_computeFullV = ShouldComputeFullV || internal::should_svd_compute_full_v(computationOptions);
+  m_computeThinV = ShouldComputeThinV || internal::should_svd_compute_thin_v(computationOptions);
+
   eigen_assert(!(m_computeFullU && m_computeThinU) && "SVDBase: you can't ask for both full and thin U");
   eigen_assert(!(m_computeFullV && m_computeThinV) && "SVDBase: you can't ask for both full and thin V");
-  eigen_assert(EIGEN_IMPLIES(m_computeThinU || m_computeThinV, MatrixType::ColsAtCompileTime==Dynamic) &&
-	       "SVDBase: thin U and V are only available when your matrix has a dynamic number of columns.");
 
   m_diagSize = (std::min)(m_rows, m_cols);
   m_singularValues.resize(m_diagSize);
diff --git a/libs/eigen/Eigen/src/SVD/UpperBidiagonalization.h b/libs/eigen/Eigen/src/SVD/UpperBidiagonalization.h
index 997defc..e6c9097 100644
--- a/libs/eigen/Eigen/src/SVD/UpperBidiagonalization.h
+++ b/libs/eigen/Eigen/src/SVD/UpperBidiagonalization.h
@@ -11,17 +11,19 @@
 #ifndef EIGEN_BIDIAGONALIZATION_H
 #define EIGEN_BIDIAGONALIZATION_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
 // UpperBidiagonalization will probably be replaced by a Bidiagonalization class, don't want to make it stable API.
 // At the same time, it's useful to keep for now as it's about the only thing that is testing the BandMatrix class.
 
-template<typename _MatrixType> class UpperBidiagonalization
+template<typename MatrixType_> class UpperBidiagonalization
 {
   public:
 
-    typedef _MatrixType MatrixType;
+    typedef MatrixType_ MatrixType;
     enum {
       RowsAtCompileTime = MatrixType::RowsAtCompileTime,
       ColsAtCompileTime = MatrixType::ColsAtCompileTime,
@@ -37,10 +39,10 @@ template<typename _MatrixType> class UpperBidiagonalization
     typedef Matrix<Scalar, ColsAtCompileTimeMinusOne, 1> SuperDiagVectorType;
     typedef HouseholderSequence<
               const MatrixType,
-              const typename internal::remove_all<typename Diagonal<const MatrixType,0>::ConjugateReturnType>::type
+              const internal::remove_all_t<typename Diagonal<const MatrixType,0>::ConjugateReturnType>
             > HouseholderUSequenceType;
     typedef HouseholderSequence<
-              const typename internal::remove_all<typename MatrixType::ConjugateReturnType>::type,
+              const internal::remove_all_t<typename MatrixType::ConjugateReturnType>,
               Diagonal<const MatrixType,1>,
               OnTheRight
             > HouseholderVSequenceType;
@@ -51,7 +53,7 @@ template<typename _MatrixType> class UpperBidiagonalization
     * The default constructor is useful in cases in which the user intends to
     * perform decompositions via Bidiagonalization::compute(const MatrixType&).
     */
-    UpperBidiagonalization() : m_householder(), m_bidiagonal(), m_isInitialized(false) {}
+    UpperBidiagonalization() : m_householder(), m_bidiagonal(0, 0), m_isInitialized(false) {}
 
     explicit UpperBidiagonalization(const MatrixType& matrix)
       : m_householder(matrix.rows(), matrix.cols()),
@@ -60,7 +62,13 @@ template<typename _MatrixType> class UpperBidiagonalization
     {
       compute(matrix);
     }
-    
+
+    UpperBidiagonalization(Index rows, Index cols)
+      : m_householder(rows, cols),
+        m_bidiagonal(cols, cols),
+        m_isInitialized(false)
+    {}
+
     UpperBidiagonalization& compute(const MatrixType& matrix);
     UpperBidiagonalization& computeUnblocked(const MatrixType& matrix);
     
@@ -161,13 +169,13 @@ void upperbidiagonalization_blocked_helper(MatrixType& A,
   typedef typename MatrixType::Scalar Scalar;
   typedef typename MatrixType::RealScalar RealScalar;
   typedef typename NumTraits<RealScalar>::Literal Literal;
-  enum { StorageOrder = traits<MatrixType>::Flags & RowMajorBit };
-  typedef InnerStride<int(StorageOrder) == int(ColMajor) ? 1 : Dynamic> ColInnerStride;
-  typedef InnerStride<int(StorageOrder) == int(ColMajor) ? Dynamic : 1> RowInnerStride;
+  static constexpr int StorageOrder = (traits<MatrixType>::Flags & RowMajorBit) ? RowMajor : ColMajor;
+  typedef InnerStride<StorageOrder == ColMajor ? 1 : Dynamic> ColInnerStride;
+  typedef InnerStride<StorageOrder == ColMajor ? Dynamic : 1> RowInnerStride;
   typedef Ref<Matrix<Scalar, Dynamic, 1>, 0, ColInnerStride>    SubColumnType;
   typedef Ref<Matrix<Scalar, 1, Dynamic>, 0, RowInnerStride>    SubRowType;
   typedef Ref<Matrix<Scalar, Dynamic, Dynamic, StorageOrder > > SubMatType;
-  
+
   Index brows = A.rows();
   Index bcols = A.cols();
 
@@ -293,7 +301,7 @@ void upperbidiagonalization_inplace_blocked(MatrixType& A, BidiagType& bidiagona
   Index size = (std::min)(rows, cols);
 
   // X and Y are work space
-  enum { StorageOrder = traits<MatrixType>::Flags & RowMajorBit };
+  static constexpr int StorageOrder = (traits<MatrixType>::Flags & RowMajorBit) ? RowMajor : ColMajor;
   Matrix<Scalar,
          MatrixType::RowsAtCompileTime,
          Dynamic,
@@ -355,8 +363,8 @@ void upperbidiagonalization_inplace_blocked(MatrixType& A, BidiagType& bidiagona
   }
 }
 
-template<typename _MatrixType>
-UpperBidiagonalization<_MatrixType>& UpperBidiagonalization<_MatrixType>::computeUnblocked(const _MatrixType& matrix)
+template<typename MatrixType_>
+UpperBidiagonalization<MatrixType_>& UpperBidiagonalization<MatrixType_>::computeUnblocked(const MatrixType_& matrix)
 {
   Index rows = matrix.rows();
   Index cols = matrix.cols();
@@ -377,8 +385,8 @@ UpperBidiagonalization<_MatrixType>& UpperBidiagonalization<_MatrixType>::comput
   return *this;
 }
 
-template<typename _MatrixType>
-UpperBidiagonalization<_MatrixType>& UpperBidiagonalization<_MatrixType>::compute(const _MatrixType& matrix)
+template<typename MatrixType_>
+UpperBidiagonalization<MatrixType_>& UpperBidiagonalization<MatrixType_>::compute(const MatrixType_& matrix)
 {
   Index rows = matrix.rows();
   Index cols = matrix.cols();
diff --git a/libs/eigen/Eigen/src/SparseCholesky/InternalHeaderCheck.h b/libs/eigen/Eigen/src/SparseCholesky/InternalHeaderCheck.h
new file mode 100644
index 0000000..f8d8762
--- /dev/null
+++ b/libs/eigen/Eigen/src/SparseCholesky/InternalHeaderCheck.h
@@ -0,0 +1,3 @@
+#ifndef EIGEN_SPARSECHOLESKY_MODULE_H
+#error "Please include Eigen/SparseCholesky instead of including headers inside the src directory directly."
+#endif
diff --git a/libs/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h b/libs/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h
index 9f93e32..d90ca13 100644
--- a/libs/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h
+++ b/libs/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_SIMPLICIAL_CHOLESKY_H
 #define EIGEN_SIMPLICIAL_CHOLESKY_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 enum SimplicialCholeskyMode {
@@ -271,17 +273,17 @@ class SimplicialCholeskyBase : public SparseSolverBase<Derived>
     RealScalar m_shiftScale;
 };
 
-template<typename _MatrixType, int _UpLo = Lower, typename _Ordering = AMDOrdering<typename _MatrixType::StorageIndex> > class SimplicialLLT;
-template<typename _MatrixType, int _UpLo = Lower, typename _Ordering = AMDOrdering<typename _MatrixType::StorageIndex> > class SimplicialLDLT;
-template<typename _MatrixType, int _UpLo = Lower, typename _Ordering = AMDOrdering<typename _MatrixType::StorageIndex> > class SimplicialCholesky;
+template<typename MatrixType_, int UpLo_ = Lower, typename Ordering_ = AMDOrdering<typename MatrixType_::StorageIndex> > class SimplicialLLT;
+template<typename MatrixType_, int UpLo_ = Lower, typename Ordering_ = AMDOrdering<typename MatrixType_::StorageIndex> > class SimplicialLDLT;
+template<typename MatrixType_, int UpLo_ = Lower, typename Ordering_ = AMDOrdering<typename MatrixType_::StorageIndex> > class SimplicialCholesky;
 
 namespace internal {
 
-template<typename _MatrixType, int _UpLo, typename _Ordering> struct traits<SimplicialLLT<_MatrixType,_UpLo,_Ordering> >
+template<typename MatrixType_, int UpLo_, typename Ordering_> struct traits<SimplicialLLT<MatrixType_,UpLo_,Ordering_> >
 {
-  typedef _MatrixType MatrixType;
-  typedef _Ordering OrderingType;
-  enum { UpLo = _UpLo };
+  typedef MatrixType_ MatrixType;
+  typedef Ordering_ OrderingType;
+  enum { UpLo = UpLo_ };
   typedef typename MatrixType::Scalar                         Scalar;
   typedef typename MatrixType::StorageIndex                   StorageIndex;
   typedef SparseMatrix<Scalar, ColMajor, StorageIndex>        CholMatrixType;
@@ -291,11 +293,11 @@ template<typename _MatrixType, int _UpLo, typename _Ordering> struct traits<Simp
   static inline MatrixU getU(const CholMatrixType& m) { return MatrixU(m.adjoint()); }
 };
 
-template<typename _MatrixType,int _UpLo, typename _Ordering> struct traits<SimplicialLDLT<_MatrixType,_UpLo,_Ordering> >
+template<typename MatrixType_,int UpLo_, typename Ordering_> struct traits<SimplicialLDLT<MatrixType_,UpLo_,Ordering_> >
 {
-  typedef _MatrixType MatrixType;
-  typedef _Ordering OrderingType;
-  enum { UpLo = _UpLo };
+  typedef MatrixType_ MatrixType;
+  typedef Ordering_ OrderingType;
+  enum { UpLo = UpLo_ };
   typedef typename MatrixType::Scalar                             Scalar;
   typedef typename MatrixType::StorageIndex                       StorageIndex;
   typedef SparseMatrix<Scalar, ColMajor, StorageIndex>            CholMatrixType;
@@ -305,11 +307,11 @@ template<typename _MatrixType,int _UpLo, typename _Ordering> struct traits<Simpl
   static inline MatrixU getU(const CholMatrixType& m) { return MatrixU(m.adjoint()); }
 };
 
-template<typename _MatrixType, int _UpLo, typename _Ordering> struct traits<SimplicialCholesky<_MatrixType,_UpLo,_Ordering> >
+template<typename MatrixType_, int UpLo_, typename Ordering_> struct traits<SimplicialCholesky<MatrixType_,UpLo_,Ordering_> >
 {
-  typedef _MatrixType MatrixType;
-  typedef _Ordering OrderingType;
-  enum { UpLo = _UpLo };
+  typedef MatrixType_ MatrixType;
+  typedef Ordering_ OrderingType;
+  enum { UpLo = UpLo_ };
 };
 
 }
@@ -325,21 +327,21 @@ template<typename _MatrixType, int _UpLo, typename _Ordering> struct traits<Simp
   * In order to reduce the fill-in, a symmetric permutation P is applied prior to the factorization
   * such that the factorized matrix is P A P^-1.
   *
-  * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>
-  * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower
+  * \tparam MatrixType_ the type of the sparse matrix A, it must be a SparseMatrix<>
+  * \tparam UpLo_ the triangular part that will be used for the computations. It can be Lower
   *               or Upper. Default is Lower.
-  * \tparam _Ordering The ordering method to use, either AMDOrdering<> or NaturalOrdering<>. Default is AMDOrdering<>
+  * \tparam Ordering_ The ordering method to use, either AMDOrdering<> or NaturalOrdering<>. Default is AMDOrdering<>
   *
   * \implsparsesolverconcept
   *
   * \sa class SimplicialLDLT, class AMDOrdering, class NaturalOrdering
   */
-template<typename _MatrixType, int _UpLo, typename _Ordering>
-    class SimplicialLLT : public SimplicialCholeskyBase<SimplicialLLT<_MatrixType,_UpLo,_Ordering> >
+template<typename MatrixType_, int UpLo_, typename Ordering_>
+    class SimplicialLLT : public SimplicialCholeskyBase<SimplicialLLT<MatrixType_,UpLo_,Ordering_> >
 {
 public:
-    typedef _MatrixType MatrixType;
-    enum { UpLo = _UpLo };
+    typedef MatrixType_ MatrixType;
+    enum { UpLo = UpLo_ };
     typedef SimplicialCholeskyBase<SimplicialLLT> Base;
     typedef typename MatrixType::Scalar Scalar;
     typedef typename MatrixType::RealScalar RealScalar;
@@ -416,21 +418,21 @@ public:
   * In order to reduce the fill-in, a symmetric permutation P is applied prior to the factorization
   * such that the factorized matrix is P A P^-1.
   *
-  * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>
-  * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower
+  * \tparam MatrixType_ the type of the sparse matrix A, it must be a SparseMatrix<>
+  * \tparam UpLo_ the triangular part that will be used for the computations. It can be Lower
   *               or Upper. Default is Lower.
-  * \tparam _Ordering The ordering method to use, either AMDOrdering<> or NaturalOrdering<>. Default is AMDOrdering<>
+  * \tparam Ordering_ The ordering method to use, either AMDOrdering<> or NaturalOrdering<>. Default is AMDOrdering<>
   *
   * \implsparsesolverconcept
   *
   * \sa class SimplicialLLT, class AMDOrdering, class NaturalOrdering
   */
-template<typename _MatrixType, int _UpLo, typename _Ordering>
-    class SimplicialLDLT : public SimplicialCholeskyBase<SimplicialLDLT<_MatrixType,_UpLo,_Ordering> >
+template<typename MatrixType_, int UpLo_, typename Ordering_>
+    class SimplicialLDLT : public SimplicialCholeskyBase<SimplicialLDLT<MatrixType_,UpLo_,Ordering_> >
 {
 public:
-    typedef _MatrixType MatrixType;
-    enum { UpLo = _UpLo };
+    typedef MatrixType_ MatrixType;
+    enum { UpLo = UpLo_ };
     typedef SimplicialCholeskyBase<SimplicialLDLT> Base;
     typedef typename MatrixType::Scalar Scalar;
     typedef typename MatrixType::RealScalar RealScalar;
@@ -507,12 +509,12 @@ public:
   *
   * \sa class SimplicialLDLT, class SimplicialLLT
   */
-template<typename _MatrixType, int _UpLo, typename _Ordering>
-    class SimplicialCholesky : public SimplicialCholeskyBase<SimplicialCholesky<_MatrixType,_UpLo,_Ordering> >
+template<typename MatrixType_, int UpLo_, typename Ordering_>
+    class SimplicialCholesky : public SimplicialCholeskyBase<SimplicialCholesky<MatrixType_,UpLo_,Ordering_> >
 {
 public:
-    typedef _MatrixType MatrixType;
-    enum { UpLo = _UpLo };
+    typedef MatrixType_ MatrixType;
+    enum { UpLo = UpLo_ };
     typedef SimplicialCholeskyBase<SimplicialCholesky> Base;
     typedef typename MatrixType::Scalar Scalar;
     typedef typename MatrixType::RealScalar RealScalar;
diff --git a/libs/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h b/libs/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h
index 72e1740..3106c9b 100644
--- a/libs/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h
+++ b/libs/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h
@@ -20,6 +20,8 @@ the Mozilla Public License v. 2.0, as stated at the top of this file.
 #ifndef EIGEN_SIMPLICIAL_CHOLESKY_IMPL_H
 #define EIGEN_SIMPLICIAL_CHOLESKY_IMPL_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 template<typename Derived>
diff --git a/libs/eigen/Eigen/src/SparseCore/AmbiVector.h b/libs/eigen/Eigen/src/SparseCore/AmbiVector.h
index 2cb7747..594e91d 100644
--- a/libs/eigen/Eigen/src/SparseCore/AmbiVector.h
+++ b/libs/eigen/Eigen/src/SparseCore/AmbiVector.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_AMBIVECTOR_H
 #define EIGEN_AMBIVECTOR_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
@@ -19,12 +21,12 @@ namespace internal {
   *
   * See BasicSparseLLT and SparseProduct for usage examples.
   */
-template<typename _Scalar, typename _StorageIndex>
+template<typename Scalar_, typename StorageIndex_>
 class AmbiVector
 {
   public:
-    typedef _Scalar Scalar;
-    typedef _StorageIndex StorageIndex;
+    typedef Scalar_ Scalar;
+    typedef StorageIndex_ StorageIndex;
     typedef typename NumTraits<Scalar>::Real RealScalar;
 
     explicit AmbiVector(Index size)
@@ -125,8 +127,8 @@ class AmbiVector
 };
 
 /** \returns the number of non zeros in the current sub vector */
-template<typename _Scalar,typename _StorageIndex>
-Index AmbiVector<_Scalar,_StorageIndex>::nonZeros() const
+template<typename Scalar_,typename StorageIndex_>
+Index AmbiVector<Scalar_,StorageIndex_>::nonZeros() const
 {
   if (m_mode==IsSparse)
     return m_llSize;
@@ -134,8 +136,8 @@ Index AmbiVector<_Scalar,_StorageIndex>::nonZeros() const
     return m_end - m_start;
 }
 
-template<typename _Scalar,typename _StorageIndex>
-void AmbiVector<_Scalar,_StorageIndex>::init(double estimatedDensity)
+template<typename Scalar_,typename StorageIndex_>
+void AmbiVector<Scalar_,StorageIndex_>::init(double estimatedDensity)
 {
   if (estimatedDensity>0.1)
     init(IsDense);
@@ -143,8 +145,8 @@ void AmbiVector<_Scalar,_StorageIndex>::init(double estimatedDensity)
     init(IsSparse);
 }
 
-template<typename _Scalar,typename _StorageIndex>
-void AmbiVector<_Scalar,_StorageIndex>::init(int mode)
+template<typename Scalar_,typename StorageIndex_>
+void AmbiVector<Scalar_,StorageIndex_>::init(int mode)
 {
   m_mode = mode;
   // This is only necessary in sparse mode, but we set these unconditionally to avoid some maybe-uninitialized warnings
@@ -160,15 +162,15 @@ void AmbiVector<_Scalar,_StorageIndex>::init(int mode)
   *
   * Don't worry, this function is extremely cheap.
   */
-template<typename _Scalar,typename _StorageIndex>
-void AmbiVector<_Scalar,_StorageIndex>::restart()
+template<typename Scalar_,typename StorageIndex_>
+void AmbiVector<Scalar_,StorageIndex_>::restart()
 {
   m_llCurrent = m_llStart;
 }
 
 /** Set all coefficients of current subvector to zero */
-template<typename _Scalar,typename _StorageIndex>
-void AmbiVector<_Scalar,_StorageIndex>::setZero()
+template<typename Scalar_,typename StorageIndex_>
+void AmbiVector<Scalar_,StorageIndex_>::setZero()
 {
   if (m_mode==IsDense)
   {
@@ -183,8 +185,8 @@ void AmbiVector<_Scalar,_StorageIndex>::setZero()
   }
 }
 
-template<typename _Scalar,typename _StorageIndex>
-_Scalar& AmbiVector<_Scalar,_StorageIndex>::coeffRef(Index i)
+template<typename Scalar_,typename StorageIndex_>
+Scalar_& AmbiVector<Scalar_,StorageIndex_>::coeffRef(Index i)
 {
   if (m_mode==IsDense)
     return m_buffer[i];
@@ -252,8 +254,8 @@ _Scalar& AmbiVector<_Scalar,_StorageIndex>::coeffRef(Index i)
   }
 }
 
-template<typename _Scalar,typename _StorageIndex>
-_Scalar& AmbiVector<_Scalar,_StorageIndex>::coeff(Index i)
+template<typename Scalar_,typename StorageIndex_>
+Scalar_& AmbiVector<Scalar_,StorageIndex_>::coeff(Index i)
 {
   if (m_mode==IsDense)
     return m_buffer[i];
@@ -280,11 +282,11 @@ _Scalar& AmbiVector<_Scalar,_StorageIndex>::coeff(Index i)
 }
 
 /** Iterator over the nonzero coefficients */
-template<typename _Scalar,typename _StorageIndex>
-class AmbiVector<_Scalar,_StorageIndex>::Iterator
+template<typename Scalar_,typename StorageIndex_>
+class AmbiVector<Scalar_,StorageIndex_>::Iterator
 {
   public:
-    typedef _Scalar Scalar;
+    typedef Scalar_ Scalar;
     typedef typename NumTraits<Scalar>::Real RealScalar;
 
     /** Default constructor
diff --git a/libs/eigen/Eigen/src/SparseCore/CompressedStorage.h b/libs/eigen/Eigen/src/SparseCore/CompressedStorage.h
index acd986f..733b1aa 100644
--- a/libs/eigen/Eigen/src/SparseCore/CompressedStorage.h
+++ b/libs/eigen/Eigen/src/SparseCore/CompressedStorage.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_COMPRESSED_STORAGE_H
 #define EIGEN_COMPRESSED_STORAGE_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
@@ -18,13 +20,13 @@ namespace internal {
   * Stores a sparse set of values as a list of values and a list of indices.
   *
   */
-template<typename _Scalar,typename _StorageIndex>
+template<typename Scalar_,typename StorageIndex_>
 class CompressedStorage
 {
   public:
 
-    typedef _Scalar Scalar;
-    typedef _StorageIndex StorageIndex;
+    typedef Scalar_ Scalar;
+    typedef StorageIndex_ StorageIndex;
 
   protected:
 
@@ -69,8 +71,8 @@ class CompressedStorage
 
     ~CompressedStorage()
     {
-      delete[] m_values;
-      delete[] m_indices;
+      conditional_aligned_delete_auto<Scalar, true>(m_values, m_allocatedSize);
+      conditional_aligned_delete_auto<StorageIndex, true>(m_indices, m_allocatedSize);
     }
 
     void reserve(Index size)
@@ -178,24 +180,13 @@ class CompressedStorage
       {
         if (m_allocatedSize<m_size+1)
         {
-          m_allocatedSize = 2*(m_size+1);
-          internal::scoped_array<Scalar> newValues(m_allocatedSize);
-          internal::scoped_array<StorageIndex> newIndices(m_allocatedSize);
-
-          // copy first chunk
-          internal::smart_copy(m_values,  m_values +id, newValues.ptr());
-          internal::smart_copy(m_indices, m_indices+id, newIndices.ptr());
-
-          // copy the rest
-          if(m_size>id)
-          {
-            internal::smart_copy(m_values +id,  m_values +m_size, newValues.ptr() +id+1);
-            internal::smart_copy(m_indices+id,  m_indices+m_size, newIndices.ptr()+id+1);
-          }
-          std::swap(m_values,newValues.ptr());
-          std::swap(m_indices,newIndices.ptr());
+          Index newAllocatedSize = 2 * (m_size + 1);
+          m_values = conditional_aligned_realloc_new_auto<Scalar, true>(m_values, newAllocatedSize, m_allocatedSize);
+          m_indices =
+              conditional_aligned_realloc_new_auto<StorageIndex, true>(m_indices, newAllocatedSize, m_allocatedSize);
+          m_allocatedSize = newAllocatedSize;
         }
-        else if(m_size>id)
+        if(m_size>id)
         {
           internal::smart_memmove(m_values +id, m_values +m_size, m_values +id+1);
           internal::smart_memmove(m_indices+id, m_indices+m_size, m_indices+id+1);
@@ -223,22 +214,6 @@ class CompressedStorage
       }
     }
 
-    void prune(const Scalar& reference, const RealScalar& epsilon = NumTraits<RealScalar>::dummy_precision())
-    {
-      Index k = 0;
-      Index n = size();
-      for (Index i=0; i<n; ++i)
-      {
-        if (!internal::isMuchSmallerThan(value(i), reference, epsilon))
-        {
-          value(k) = value(i);
-          index(k) = index(i);
-          ++k;
-        }
-      }
-      resize(k,0);
-    }
-
   protected:
 
     inline void reallocate(Index size)
@@ -247,15 +222,8 @@ class CompressedStorage
         EIGEN_SPARSE_COMPRESSED_STORAGE_REALLOCATE_PLUGIN
       #endif
       eigen_internal_assert(size!=m_allocatedSize);
-      internal::scoped_array<Scalar> newValues(size);
-      internal::scoped_array<StorageIndex> newIndices(size);
-      Index copySize = (std::min)(size, m_size);
-      if (copySize>0) {
-        internal::smart_copy(m_values, m_values+copySize, newValues.ptr());
-        internal::smart_copy(m_indices, m_indices+copySize, newIndices.ptr());
-      }
-      std::swap(m_values,newValues.ptr());
-      std::swap(m_indices,newIndices.ptr());
+      m_values = conditional_aligned_realloc_new_auto<Scalar, true>(m_values, size, m_allocatedSize);
+      m_indices = conditional_aligned_realloc_new_auto<StorageIndex, true>(m_indices, size, m_allocatedSize);
       m_allocatedSize = size;
     }
 
diff --git a/libs/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h b/libs/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h
index 9486502..f852493 100644
--- a/libs/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h
+++ b/libs/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_CONSERVATIVESPARSESPARSEPRODUCT_H
 #define EIGEN_CONSERVATIVESPARSESPARSEPRODUCT_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
@@ -17,9 +19,9 @@ namespace internal {
 template<typename Lhs, typename Rhs, typename ResultType>
 static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& rhs, ResultType& res, bool sortedInsertion = false)
 {
-  typedef typename remove_all<Lhs>::type::Scalar LhsScalar;
-  typedef typename remove_all<Rhs>::type::Scalar RhsScalar;
-  typedef typename remove_all<ResultType>::type::Scalar ResScalar;
+  typedef typename remove_all_t<Lhs>::Scalar LhsScalar;
+  typedef typename remove_all_t<Rhs>::Scalar RhsScalar;
+  typedef typename remove_all_t<ResultType>::Scalar ResScalar;
 
   // make sure to call innerSize/outerSize since we fake the storage order.
   Index rows = lhs.innerSize();
@@ -124,6 +126,11 @@ static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& r
 
 namespace internal {
 
+
+// Helper template to generate new sparse matrix types
+template<class Source, int Order>
+using WithStorageOrder = SparseMatrix<typename Source::Scalar, Order, typename Source::StorageIndex>;
+
 template<typename Lhs, typename Rhs, typename ResultType,
   int LhsStorageOrder = (traits<Lhs>::Flags&RowMajorBit) ? RowMajor : ColMajor,
   int RhsStorageOrder = (traits<Rhs>::Flags&RowMajorBit) ? RowMajor : ColMajor,
@@ -133,20 +140,20 @@ struct conservative_sparse_sparse_product_selector;
 template<typename Lhs, typename Rhs, typename ResultType>
 struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,ColMajor,ColMajor>
 {
-  typedef typename remove_all<Lhs>::type LhsCleaned;
+  typedef remove_all_t<Lhs> LhsCleaned;
   typedef typename LhsCleaned::Scalar Scalar;
 
   static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
   {
-    typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorMatrix;
-    typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorMatrixAux;
-    typedef typename sparse_eval<ColMajorMatrixAux,ResultType::RowsAtCompileTime,ResultType::ColsAtCompileTime,ColMajorMatrixAux::Flags>::type ColMajorMatrix;
+    using RowMajorMatrix = WithStorageOrder<ResultType, RowMajor>;
+    using ColMajorMatrixAux = WithStorageOrder<ResultType, ColMajor>;
 
     // If the result is tall and thin (in the extreme case a column vector)
     // then it is faster to sort the coefficients inplace instead of transposing twice.
     // FIXME, the following heuristic is probably not very good.
     if(lhs.rows()>rhs.cols())
     {
+      using ColMajorMatrix = typename sparse_eval<ColMajorMatrixAux,ResultType::RowsAtCompileTime,ResultType::ColsAtCompileTime,ColMajorMatrixAux::Flags>::type;
       ColMajorMatrix resCol(lhs.rows(),rhs.cols());
       // perform sorted insertion
       internal::conservative_sparse_sparse_product_impl<Lhs,Rhs,ColMajorMatrix>(lhs, rhs, resCol, true);
@@ -168,8 +175,8 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,RowMajor,C
 {
   static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
   {
-    typedef SparseMatrix<typename Rhs::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorRhs;
-    typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorRes;
+    using RowMajorRhs = WithStorageOrder<Rhs, RowMajor>;
+    using RowMajorRes = WithStorageOrder<ResultType, RowMajor>;
     RowMajorRhs rhsRow = rhs;
     RowMajorRes resRow(lhs.rows(), rhs.cols());
     internal::conservative_sparse_sparse_product_impl<RowMajorRhs,Lhs,RowMajorRes>(rhsRow, lhs, resRow);
@@ -182,8 +189,8 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,R
 {
   static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
   {
-    typedef SparseMatrix<typename Lhs::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorLhs;
-    typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorRes;
+    using RowMajorLhs = WithStorageOrder<Lhs, RowMajor>;
+    using RowMajorRes = WithStorageOrder<ResultType, RowMajor>;
     RowMajorLhs lhsRow = lhs;
     RowMajorRes resRow(lhs.rows(), rhs.cols());
     internal::conservative_sparse_sparse_product_impl<Rhs,RowMajorLhs,RowMajorRes>(rhs, lhsRow, resRow);
@@ -196,9 +203,9 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,RowMajor,R
 {
   static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
   {
-    typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorMatrix;
-    RowMajorMatrix resRow(lhs.rows(), rhs.cols());
-    internal::conservative_sparse_sparse_product_impl<Rhs,Lhs,RowMajorMatrix>(rhs, lhs, resRow);
+    using RowMajorRes = WithStorageOrder<ResultType, RowMajor>;
+    RowMajorRes resRow(lhs.rows(), rhs.cols());
+    internal::conservative_sparse_sparse_product_impl<Rhs,Lhs,RowMajorRes>(rhs, lhs, resRow);
     res = resRow;
   }
 };
@@ -207,13 +214,13 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,RowMajor,R
 template<typename Lhs, typename Rhs, typename ResultType>
 struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,ColMajor,RowMajor>
 {
-  typedef typename traits<typename remove_all<Lhs>::type>::Scalar Scalar;
+  typedef typename traits<remove_all_t<Lhs>>::Scalar Scalar;
 
   static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
   {
-    typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorMatrix;
-    ColMajorMatrix resCol(lhs.rows(), rhs.cols());
-    internal::conservative_sparse_sparse_product_impl<Lhs,Rhs,ColMajorMatrix>(lhs, rhs, resCol);
+    using ColMajorRes = WithStorageOrder<ResultType, ColMajor>;
+    ColMajorRes resCol(lhs.rows(), rhs.cols());
+    internal::conservative_sparse_sparse_product_impl<Lhs,Rhs,ColMajorRes>(lhs, rhs, resCol);
     res = resCol;
   }
 };
@@ -223,8 +230,8 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,RowMajor,C
 {
   static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
   {
-    typedef SparseMatrix<typename Lhs::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorLhs;
-    typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorRes;
+    using ColMajorLhs = WithStorageOrder<Lhs, ColMajor>;
+    using ColMajorRes = WithStorageOrder<ResultType, ColMajor>;
     ColMajorLhs lhsCol = lhs;
     ColMajorRes resCol(lhs.rows(), rhs.cols());
     internal::conservative_sparse_sparse_product_impl<ColMajorLhs,Rhs,ColMajorRes>(lhsCol, rhs, resCol);
@@ -237,8 +244,8 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,R
 {
   static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
   {
-    typedef SparseMatrix<typename Rhs::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorRhs;
-    typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorRes;
+    using ColMajorRhs = WithStorageOrder<Rhs, ColMajor>;
+    using ColMajorRes = WithStorageOrder<ResultType, ColMajor>;
     ColMajorRhs rhsCol = rhs;
     ColMajorRes resCol(lhs.rows(), rhs.cols());
     internal::conservative_sparse_sparse_product_impl<Lhs,ColMajorRhs,ColMajorRes>(lhs, rhsCol, resCol);
@@ -251,12 +258,12 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,RowMajor,R
 {
   static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
   {
-    typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorMatrix;
-    typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorMatrix;
-    RowMajorMatrix resRow(lhs.rows(),rhs.cols());
-    internal::conservative_sparse_sparse_product_impl<Rhs,Lhs,RowMajorMatrix>(rhs, lhs, resRow);
+    using ColMajorRes = WithStorageOrder<ResultType, ColMajor>;
+    using RowMajorRes = WithStorageOrder<ResultType, RowMajor>;
+    RowMajorRes resRow(lhs.rows(),rhs.cols());
+    internal::conservative_sparse_sparse_product_impl<Rhs,Lhs,RowMajorRes>(rhs, lhs, resRow);
     // sort the non zeros:
-    ColMajorMatrix resCol(resRow);
+    ColMajorRes resCol(resRow);
     res = resCol;
   }
 };
@@ -269,8 +276,8 @@ namespace internal {
 template<typename Lhs, typename Rhs, typename ResultType>
 static void sparse_sparse_to_dense_product_impl(const Lhs& lhs, const Rhs& rhs, ResultType& res)
 {
-  typedef typename remove_all<Lhs>::type::Scalar LhsScalar;
-  typedef typename remove_all<Rhs>::type::Scalar RhsScalar;
+  typedef typename remove_all_t<Lhs>::Scalar LhsScalar;
+  typedef typename remove_all_t<Rhs>::Scalar RhsScalar;
   Index cols = rhs.outerSize();
   eigen_assert(lhs.outerSize() == rhs.innerSize());
 
@@ -317,7 +324,7 @@ struct sparse_sparse_to_dense_product_selector<Lhs,Rhs,ResultType,RowMajor,ColMa
 {
   static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
   {
-    typedef SparseMatrix<typename Lhs::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorLhs;
+    using ColMajorLhs = WithStorageOrder<Lhs, ColMajor>;
     ColMajorLhs lhsCol(lhs);
     internal::sparse_sparse_to_dense_product_impl<ColMajorLhs,Rhs,ResultType>(lhsCol, rhs, res);
   }
@@ -328,7 +335,7 @@ struct sparse_sparse_to_dense_product_selector<Lhs,Rhs,ResultType,ColMajor,RowMa
 {
   static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
   {
-    typedef SparseMatrix<typename Rhs::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorRhs;
+    using ColMajorRhs = WithStorageOrder<Rhs, ColMajor>;
     ColMajorRhs rhsCol(rhs);
     internal::sparse_sparse_to_dense_product_impl<Lhs,ColMajorRhs,ResultType>(lhs, rhsCol, res);
   }
diff --git a/libs/eigen/Eigen/src/SparseCore/InternalHeaderCheck.h b/libs/eigen/Eigen/src/SparseCore/InternalHeaderCheck.h
new file mode 100644
index 0000000..9de5936
--- /dev/null
+++ b/libs/eigen/Eigen/src/SparseCore/InternalHeaderCheck.h
@@ -0,0 +1,3 @@
+#ifndef EIGEN_SPARSECORE_MODULE_H
+#error "Please include Eigen/SparseCore instead of including headers inside the src directory directly."
+#endif
diff --git a/libs/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h b/libs/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h
deleted file mode 100644
index 67718c8..0000000
--- a/libs/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h
+++ /dev/null
@@ -1,67 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_MAPPED_SPARSEMATRIX_H
-#define EIGEN_MAPPED_SPARSEMATRIX_H
-
-namespace Eigen {
-
-/** \deprecated Use Map<SparseMatrix<> >
-  * \class MappedSparseMatrix
-  *
-  * \brief Sparse matrix
-  *
-  * \param _Scalar the scalar type, i.e. the type of the coefficients
-  *
-  * See http://www.netlib.org/linalg/html_templates/node91.html for details on the storage scheme.
-  *
-  */
-namespace internal {
-template<typename _Scalar, int _Flags, typename _StorageIndex>
-struct traits<MappedSparseMatrix<_Scalar, _Flags, _StorageIndex> > : traits<SparseMatrix<_Scalar, _Flags, _StorageIndex> >
-{};
-} // end namespace internal
-
-template<typename _Scalar, int _Flags, typename _StorageIndex>
-class MappedSparseMatrix
-  : public Map<SparseMatrix<_Scalar, _Flags, _StorageIndex> >
-{
-    typedef Map<SparseMatrix<_Scalar, _Flags, _StorageIndex> > Base;
-
-  public:
-    
-    typedef typename Base::StorageIndex StorageIndex;
-    typedef typename Base::Scalar Scalar;
-
-    inline MappedSparseMatrix(Index rows, Index cols, Index nnz, StorageIndex* outerIndexPtr, StorageIndex* innerIndexPtr, Scalar* valuePtr, StorageIndex* innerNonZeroPtr = 0)
-      : Base(rows, cols, nnz, outerIndexPtr, innerIndexPtr, valuePtr, innerNonZeroPtr)
-    {}
-
-    /** Empty destructor */
-    inline ~MappedSparseMatrix() {}
-};
-
-namespace internal {
-
-template<typename _Scalar, int _Options, typename _StorageIndex>
-struct evaluator<MappedSparseMatrix<_Scalar,_Options,_StorageIndex> >
-  : evaluator<SparseCompressedBase<MappedSparseMatrix<_Scalar,_Options,_StorageIndex> > >
-{
-  typedef MappedSparseMatrix<_Scalar,_Options,_StorageIndex> XprType;
-  typedef evaluator<SparseCompressedBase<XprType> > Base;
-  
-  evaluator() : Base() {}
-  explicit evaluator(const XprType &mat) : Base(mat) {}
-};
-
-}
-
-} // end namespace Eigen
-
-#endif // EIGEN_MAPPED_SPARSEMATRIX_H
diff --git a/libs/eigen/Eigen/src/SparseCore/SparseAssign.h b/libs/eigen/Eigen/src/SparseCore/SparseAssign.h
index 905485c..29f6af4 100644
--- a/libs/eigen/Eigen/src/SparseCore/SparseAssign.h
+++ b/libs/eigen/Eigen/src/SparseCore/SparseAssign.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_SPARSEASSIGN_H
 #define EIGEN_SPARSEASSIGN_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 template<typename Derived>    
@@ -78,12 +80,18 @@ void assign_sparse_to_sparse(DstXprType &dst, const SrcXprType &src)
 
   const bool transpose = (DstEvaluatorType::Flags & RowMajorBit) != (SrcEvaluatorType::Flags & RowMajorBit);
   const Index outerEvaluationSize = (SrcEvaluatorType::Flags&RowMajorBit) ? src.rows() : src.cols();
+
+  Index reserveSize = 0;
+  for (Index j = 0; j < outerEvaluationSize; ++j)
+    for (typename SrcEvaluatorType::InnerIterator it(srcEvaluator, j); it; ++it)
+      reserveSize++;
+
   if ((!transpose) && src.isRValue())
   {
     // eval without temporary
     dst.resize(src.rows(), src.cols());
     dst.setZero();
-    dst.reserve((std::min)(src.rows()*src.cols(), (std::max)(src.rows(),src.cols())*2));
+    dst.reserve(reserveSize);
     for (Index j=0; j<outerEvaluationSize; ++j)
     {
       dst.startVec(j);
@@ -107,7 +115,7 @@ void assign_sparse_to_sparse(DstXprType &dst, const SrcXprType &src)
     
     DstXprType temp(src.rows(), src.cols());
 
-    temp.reserve((std::min)(src.rows()*src.cols(), (std::max)(src.rows(),src.cols())*2));
+    temp.reserve(reserveSize);
     for (Index j=0; j<outerEvaluationSize; ++j)
     {
       temp.startVec(j);
@@ -172,7 +180,7 @@ struct assignment_from_dense_op_sparse
   // Specialization for dense1 = sparse + dense2; -> dense1 = dense2; dense1 += sparse;
   template<typename Lhs, typename Rhs, typename Scalar>
   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  typename internal::enable_if<internal::is_same<typename internal::evaluator_traits<Rhs>::Shape,DenseShape>::value>::type
+  std::enable_if_t<internal::is_same<typename internal::evaluator_traits<Rhs>::Shape,DenseShape>::value>
   run(DstXprType &dst, const CwiseBinaryOp<internal::scalar_sum_op<Scalar,Scalar>, const Lhs, const Rhs> &src,
       const internal::assign_op<typename DstXprType::Scalar,Scalar>& /*func*/)
   {
@@ -188,7 +196,7 @@ struct assignment_from_dense_op_sparse
   // Specialization for dense1 = sparse - dense2; -> dense1 = -dense2; dense1 += sparse;
   template<typename Lhs, typename Rhs, typename Scalar>
   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  typename internal::enable_if<internal::is_same<typename internal::evaluator_traits<Rhs>::Shape,DenseShape>::value>::type
+  std::enable_if_t<internal::is_same<typename internal::evaluator_traits<Rhs>::Shape,DenseShape>::value>
   run(DstXprType &dst, const CwiseBinaryOp<internal::scalar_difference_op<Scalar,Scalar>, const Lhs, const Rhs> &src,
       const internal::assign_op<typename DstXprType::Scalar,Scalar>& /*func*/)
   {
@@ -206,8 +214,8 @@ struct assignment_from_dense_op_sparse
   template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar> \
   struct Assignment<DstXprType, CwiseBinaryOp<internal::BINOP<Scalar,Scalar>, const Lhs, const Rhs>, internal::ASSIGN_OP<typename DstXprType::Scalar,Scalar>, \
                     Sparse2Dense, \
-                    typename internal::enable_if<   internal::is_same<typename internal::evaluator_traits<Lhs>::Shape,DenseShape>::value \
-                                                 || internal::is_same<typename internal::evaluator_traits<Rhs>::Shape,DenseShape>::value>::type> \
+                    std::enable_if_t<   internal::is_same<typename internal::evaluator_traits<Lhs>::Shape,DenseShape>::value \
+                                     || internal::is_same<typename internal::evaluator_traits<Rhs>::Shape,DenseShape>::value>> \
     : assignment_from_dense_op_sparse<DstXprType, internal::ASSIGN_OP<typename DstXprType::Scalar,typename Lhs::Scalar>, internal::ASSIGN_OP2<typename DstXprType::Scalar,typename Rhs::Scalar> > \
   {}
 
diff --git a/libs/eigen/Eigen/src/SparseCore/SparseBlock.h b/libs/eigen/Eigen/src/SparseCore/SparseBlock.h
index 5b4f6cc..b3fc859 100644
--- a/libs/eigen/Eigen/src/SparseCore/SparseBlock.h
+++ b/libs/eigen/Eigen/src/SparseCore/SparseBlock.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_SPARSE_BLOCK_H
 #define EIGEN_SPARSE_BLOCK_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 // Subset of columns or rows
@@ -17,7 +19,7 @@ template<typename XprType, int BlockRows, int BlockCols>
 class BlockImpl<XprType,BlockRows,BlockCols,true,Sparse>
   : public SparseMatrixBase<Block<XprType,BlockRows,BlockCols,true> >
 {
-    typedef typename internal::remove_all<typename XprType::Nested>::type _MatrixTypeNested;
+    typedef internal::remove_all_t<typename XprType::Nested> MatrixTypeNested_;
     typedef Block<XprType, BlockRows, BlockCols, true> BlockType;
 public:
     enum { IsRowMajor = internal::traits<BlockType>::IsRowMajor };
@@ -96,7 +98,7 @@ template<typename SparseMatrixType, int BlockRows, int BlockCols>
 class sparse_matrix_block_impl
   : public SparseCompressedBase<Block<SparseMatrixType,BlockRows,BlockCols,true> >
 {
-    typedef typename internal::remove_all<typename SparseMatrixType::Nested>::type _MatrixTypeNested;
+    typedef internal::remove_all_t<typename SparseMatrixType::Nested> MatrixTypeNested_;
     typedef Block<SparseMatrixType, BlockRows, BlockCols, true> BlockType;
     typedef SparseCompressedBase<Block<SparseMatrixType,BlockRows,BlockCols,true> > Base;
     using Base::convert_index;
@@ -119,8 +121,8 @@ public:
     template<typename OtherDerived>
     inline BlockType& operator=(const SparseMatrixBase<OtherDerived>& other)
     {
-      typedef typename internal::remove_all<typename SparseMatrixType::Nested>::type _NestedMatrixType;
-      _NestedMatrixType& matrix = m_matrix;
+      typedef internal::remove_all_t<typename SparseMatrixType::Nested> NestedMatrixType_;
+      NestedMatrixType_& matrix = m_matrix;
       // This assignment is slow if this vector set is not empty
       // and/or it is not at the end of the nonzeros of the underlying matrix.
 
@@ -283,13 +285,13 @@ public:
 
 } // namespace internal
 
-template<typename _Scalar, int _Options, typename _StorageIndex, int BlockRows, int BlockCols>
-class BlockImpl<SparseMatrix<_Scalar, _Options, _StorageIndex>,BlockRows,BlockCols,true,Sparse>
-  : public internal::sparse_matrix_block_impl<SparseMatrix<_Scalar, _Options, _StorageIndex>,BlockRows,BlockCols>
+template<typename Scalar_, int Options_, typename StorageIndex_, int BlockRows, int BlockCols>
+class BlockImpl<SparseMatrix<Scalar_, Options_, StorageIndex_>,BlockRows,BlockCols,true,Sparse>
+  : public internal::sparse_matrix_block_impl<SparseMatrix<Scalar_, Options_, StorageIndex_>,BlockRows,BlockCols>
 {
 public:
-  typedef _StorageIndex StorageIndex;
-  typedef SparseMatrix<_Scalar, _Options, _StorageIndex> SparseMatrixType;
+  typedef StorageIndex_ StorageIndex;
+  typedef SparseMatrix<Scalar_, Options_, StorageIndex_> SparseMatrixType;
   typedef internal::sparse_matrix_block_impl<SparseMatrixType,BlockRows,BlockCols> Base;
   inline BlockImpl(SparseMatrixType& xpr, Index i)
     : Base(xpr, i)
@@ -302,13 +304,13 @@ public:
   using Base::operator=;
 };
 
-template<typename _Scalar, int _Options, typename _StorageIndex, int BlockRows, int BlockCols>
-class BlockImpl<const SparseMatrix<_Scalar, _Options, _StorageIndex>,BlockRows,BlockCols,true,Sparse>
-  : public internal::sparse_matrix_block_impl<const SparseMatrix<_Scalar, _Options, _StorageIndex>,BlockRows,BlockCols>
+template<typename Scalar_, int Options_, typename StorageIndex_, int BlockRows, int BlockCols>
+class BlockImpl<const SparseMatrix<Scalar_, Options_, StorageIndex_>,BlockRows,BlockCols,true,Sparse>
+  : public internal::sparse_matrix_block_impl<const SparseMatrix<Scalar_, Options_, StorageIndex_>,BlockRows,BlockCols>
 {
 public:
-  typedef _StorageIndex StorageIndex;
-  typedef const SparseMatrix<_Scalar, _Options, _StorageIndex> SparseMatrixType;
+  typedef StorageIndex_ StorageIndex;
+  typedef const SparseMatrix<Scalar_, Options_, StorageIndex_> SparseMatrixType;
   typedef internal::sparse_matrix_block_impl<SparseMatrixType,BlockRows,BlockCols> Base;
   inline BlockImpl(SparseMatrixType& xpr, Index i)
     : Base(xpr, i)
@@ -340,7 +342,7 @@ public:
     enum { IsRowMajor = internal::traits<BlockType>::IsRowMajor };
     EIGEN_SPARSE_PUBLIC_INTERFACE(BlockType)
 
-    typedef typename internal::remove_all<typename XprType::Nested>::type _MatrixTypeNested;
+    typedef internal::remove_all_t<typename XprType::Nested> MatrixTypeNested_;
 
     /** Column or Row constructor
       */
@@ -429,17 +431,12 @@ struct unary_evaluator<Block<ArgType,BlockRows,BlockCols,InnerPanel>, IteratorBa
 
     enum {
       IsRowMajor = XprType::IsRowMajor,
-
-      OuterVector =  (BlockCols==1 && ArgType::IsRowMajor)
-                    | // FIXME | instead of || to please GCC 4.4.0 stupid warning "suggest parentheses around &&".
-                      // revert to || as soon as not needed anymore.
-                     (BlockRows==1 && !ArgType::IsRowMajor),
-
+      OuterVector = (BlockCols == 1 && ArgType::IsRowMajor) || (BlockRows == 1 && !ArgType::IsRowMajor),
       CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
       Flags = XprType::Flags
     };
 
-    typedef typename internal::conditional<OuterVector,OuterVectorInnerIterator,InnerVectorInnerIterator>::type InnerIterator;
+    typedef std::conditional_t<OuterVector,OuterVectorInnerIterator,InnerVectorInnerIterator> InnerIterator;
 
     explicit unary_evaluator(const XprType& op)
       : m_argImpl(op.nestedExpression()), m_block(op)
@@ -467,7 +464,7 @@ template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel>
 class unary_evaluator<Block<ArgType,BlockRows,BlockCols,InnerPanel>, IteratorBased>::InnerVectorInnerIterator
  : public EvalIterator
 {
-  // NOTE MSVC fails to compile if we don't explicitely "import" IsRowMajor from unary_evaluator
+  // NOTE MSVC fails to compile if we don't explicitly "import" IsRowMajor from unary_evaluator
   //      because the base class EvalIterator has a private IsRowMajor enum too. (bug #1786)
   // NOTE We cannot call it IsRowMajor because it would shadow unary_evaluator::IsRowMajor
   enum { XprIsRowMajor = unary_evaluator::IsRowMajor };
@@ -533,8 +530,8 @@ public:
     while(++m_outerPos<m_end)
     {
       // Restart iterator at the next inner-vector:
-      m_it.~EvalIterator();
-      ::new (&m_it) EvalIterator(m_eval.m_argImpl, m_outerPos);
+      internal::destroy_at(&m_it);
+      internal::construct_at(&m_it, m_eval.m_argImpl, m_outerPos);
       // search for the key m_innerIndex in the current outer-vector
       while(m_it && m_it.index() < m_innerIndex) ++m_it;
       if(m_it && m_it.index()==m_innerIndex) break;
@@ -545,20 +542,20 @@ public:
   inline operator bool() const { return m_outerPos < m_end; }
 };
 
-template<typename _Scalar, int _Options, typename _StorageIndex, int BlockRows, int BlockCols>
-struct unary_evaluator<Block<SparseMatrix<_Scalar, _Options, _StorageIndex>,BlockRows,BlockCols,true>, IteratorBased>
-  : evaluator<SparseCompressedBase<Block<SparseMatrix<_Scalar, _Options, _StorageIndex>,BlockRows,BlockCols,true> > >
+template<typename Scalar_, int Options_, typename StorageIndex_, int BlockRows, int BlockCols>
+struct unary_evaluator<Block<SparseMatrix<Scalar_, Options_, StorageIndex_>,BlockRows,BlockCols,true>, IteratorBased>
+  : evaluator<SparseCompressedBase<Block<SparseMatrix<Scalar_, Options_, StorageIndex_>,BlockRows,BlockCols,true> > >
 {
-  typedef Block<SparseMatrix<_Scalar, _Options, _StorageIndex>,BlockRows,BlockCols,true> XprType;
+  typedef Block<SparseMatrix<Scalar_, Options_, StorageIndex_>,BlockRows,BlockCols,true> XprType;
   typedef evaluator<SparseCompressedBase<XprType> > Base;
   explicit unary_evaluator(const XprType &xpr) : Base(xpr) {}
 };
 
-template<typename _Scalar, int _Options, typename _StorageIndex, int BlockRows, int BlockCols>
-struct unary_evaluator<Block<const SparseMatrix<_Scalar, _Options, _StorageIndex>,BlockRows,BlockCols,true>, IteratorBased>
-  : evaluator<SparseCompressedBase<Block<const SparseMatrix<_Scalar, _Options, _StorageIndex>,BlockRows,BlockCols,true> > >
+template<typename Scalar_, int Options_, typename StorageIndex_, int BlockRows, int BlockCols>
+struct unary_evaluator<Block<const SparseMatrix<Scalar_, Options_, StorageIndex_>,BlockRows,BlockCols,true>, IteratorBased>
+  : evaluator<SparseCompressedBase<Block<const SparseMatrix<Scalar_, Options_, StorageIndex_>,BlockRows,BlockCols,true> > >
 {
-  typedef Block<const SparseMatrix<_Scalar, _Options, _StorageIndex>,BlockRows,BlockCols,true> XprType;
+  typedef Block<const SparseMatrix<Scalar_, Options_, StorageIndex_>,BlockRows,BlockCols,true> XprType;
   typedef evaluator<SparseCompressedBase<XprType> > Base;
   explicit unary_evaluator(const XprType &xpr) : Base(xpr) {}
 };
diff --git a/libs/eigen/Eigen/src/SparseCore/SparseColEtree.h b/libs/eigen/Eigen/src/SparseCore/SparseColEtree.h
index ebe02d1..ff32458 100644
--- a/libs/eigen/Eigen/src/SparseCore/SparseColEtree.h
+++ b/libs/eigen/Eigen/src/SparseCore/SparseColEtree.h
@@ -31,6 +31,8 @@
 #ifndef SPARSE_COLETREE_H
 #define SPARSE_COLETREE_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
diff --git a/libs/eigen/Eigen/src/SparseCore/SparseCompressedBase.h b/libs/eigen/Eigen/src/SparseCore/SparseCompressedBase.h
index 6a2c7a8..243cd16 100644
--- a/libs/eigen/Eigen/src/SparseCore/SparseCompressedBase.h
+++ b/libs/eigen/Eigen/src/SparseCore/SparseCompressedBase.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_SPARSE_COMPRESSED_BASE_H
 #define EIGEN_SPARSE_COMPRESSED_BASE_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 template<typename Derived> class SparseCompressedBase;
@@ -20,6 +22,9 @@ template<typename Derived>
 struct traits<SparseCompressedBase<Derived> > : traits<Derived>
 {};
 
+template <typename Derived, class Comp, bool IsVector>
+struct inner_sort_impl;
+
 } // end namespace internal
 
 /** \ingroup SparseCore_Module
@@ -124,6 +129,40 @@ class SparseCompressedBase
       *
       * \sa valuePtr(), isCompressed() */
     Map<Array<Scalar,Dynamic,1> > coeffs() { eigen_assert(isCompressed()); return Array<Scalar,Dynamic,1>::Map(valuePtr(),nonZeros()); }
+    
+    /** sorts the inner vectors in the range [begin,end) with respect to `Comp`  
+      * \sa innerIndicesAreSorted() */
+    template <class Comp = std::less<>>
+    inline void sortInnerIndices(Index begin, Index end) {
+      eigen_assert(begin >= 0 && end <= derived().outerSize() && end >= begin);
+      internal::inner_sort_impl<Derived, Comp, IsVectorAtCompileTime>::run(*this, begin, end);
+    }
+    
+    /** \returns the index of the first inner vector in the range [begin,end) that is not sorted with respect to `Comp`, or `end` if the range is fully sorted
+      * \sa sortInnerIndices() */
+    template <class Comp = std::less<>>
+    inline Index innerIndicesAreSorted(Index begin, Index end) const {
+      eigen_assert(begin >= 0 && end <= derived().outerSize() && end >= begin);
+      return internal::inner_sort_impl<Derived, Comp, IsVectorAtCompileTime>::check(*this, begin, end);
+    }
+
+    /** sorts the inner vectors in the range [0,outerSize) with respect to `Comp`
+      * \sa innerIndicesAreSorted() */
+    template <class Comp = std::less<>>
+    inline void sortInnerIndices() {
+      Index begin = 0;
+      Index end = derived().outerSize();
+      internal::inner_sort_impl<Derived, Comp, IsVectorAtCompileTime>::run(*this, begin, end);
+    }
+
+    /** \returns the index of the first inner vector in the range [0,outerSize) that is not sorted with respect to `Comp`, or `outerSize` if the range is fully sorted
+      * \sa sortInnerIndices() */
+    template<class Comp = std::less<>>
+    inline Index innerIndicesAreSorted() const {
+      Index begin = 0;
+      Index end = derived().outerSize();
+      return internal::inner_sort_impl<Derived, Comp, IsVectorAtCompileTime>::check(*this, begin, end);
+    }
 
   protected:
     /** Default constructor. Do nothing. */
@@ -194,8 +233,7 @@ class SparseCompressedBase<Derived>::InnerIterator
       }
     }
 
-    explicit InnerIterator(const SparseCompressedBase& mat)
-      : m_values(mat.valuePtr()), m_indices(mat.innerIndexPtr()), m_outer(0), m_id(0), m_end(mat.nonZeros())
+    explicit InnerIterator(const SparseCompressedBase& mat) : InnerIterator(mat, Index(0))
     {
       EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived);
     }
@@ -305,6 +343,138 @@ class SparseCompressedBase<Derived>::ReverseInnerIterator
 
 namespace internal {
 
+// modified from https://artificial-mind.net/blog/2020/11/28/std-sort-multiple-ranges
+template <typename Scalar, typename StorageIndex>
+class CompressedStorageIterator;
+
+// wrapper class analogous to std::pair<StorageIndex&, Scalar&>
+// used to define assignment, swap, and comparison operators for CompressedStorageIterator
+template <typename Scalar, typename StorageIndex>
+class StorageRef 
+{
+public:
+  using value_type = std::pair<StorageIndex, Scalar>;
+
+  inline StorageRef& operator=(const StorageRef& other) {
+    *m_innerIndexIterator = *other.m_innerIndexIterator;
+    *m_valueIterator = *other.m_valueIterator;
+    return *this;
+  }
+  inline StorageRef& operator=(const value_type& other) {
+    std::tie(*m_innerIndexIterator, *m_valueIterator) = other;
+    return *this;
+  }
+  inline operator value_type() const { return std::make_pair(*m_innerIndexIterator, *m_valueIterator); }
+  inline friend void swap(const StorageRef& a, const StorageRef& b) {
+    std::iter_swap(a.m_innerIndexIterator, b.m_innerIndexIterator);
+    std::iter_swap(a.m_valueIterator, b.m_valueIterator);
+  }
+
+  inline static const StorageIndex& key(const StorageRef& a) { return *a.m_innerIndexIterator; }
+  inline static const StorageIndex& key(const value_type& a) { return a.first; }
+  #define REF_COMP_REF(OP) inline friend bool operator OP(const StorageRef& a, const StorageRef& b) { return key(a) OP key(b); };
+  #define REF_COMP_VAL(OP) inline friend bool operator OP(const StorageRef& a, const value_type& b) { return key(a) OP key(b); };
+  #define VAL_COMP_REF(OP) inline friend bool operator OP(const value_type& a, const StorageRef& b) { return key(a) OP key(b); };
+  #define MAKE_COMPS(OP) REF_COMP_REF(OP) REF_COMP_VAL(OP) VAL_COMP_REF(OP)
+  MAKE_COMPS(<) MAKE_COMPS(>) MAKE_COMPS(<=) MAKE_COMPS(>=) MAKE_COMPS(==) MAKE_COMPS(!=)
+
+protected:
+  StorageIndex* m_innerIndexIterator;
+  Scalar* m_valueIterator;
+private:
+  StorageRef() = delete;
+  // these constructors are only called by the CompressedStorageIterator constructors for convenience only
+  StorageRef(StorageIndex* innerIndexIterator, Scalar* valueIterator) : m_innerIndexIterator(innerIndexIterator), m_valueIterator(valueIterator) {}
+  StorageRef(const StorageRef& other) : m_innerIndexIterator(other.m_innerIndexIterator), m_valueIterator(other.m_valueIterator) {}
+
+  friend class CompressedStorageIterator<Scalar, StorageIndex>;
+};
+
+// STL-compatible iterator class that operates on inner indices and values
+template<typename Scalar, typename StorageIndex>
+class CompressedStorageIterator
+{
+public:
+  using iterator_category = std::random_access_iterator_tag;
+  using reference = StorageRef<Scalar, StorageIndex>;
+  using difference_type = Index;
+  using value_type = typename reference::value_type;
+  using pointer = value_type*;
+
+  CompressedStorageIterator() = delete;
+  CompressedStorageIterator(difference_type index, StorageIndex* innerIndexPtr, Scalar* valuePtr) : m_index(index), m_data(innerIndexPtr, valuePtr) {}
+  CompressedStorageIterator(difference_type index, reference data) : m_index(index), m_data(data) {}
+  CompressedStorageIterator(const CompressedStorageIterator& other) : m_index(other.m_index), m_data(other.m_data) {}
+  inline CompressedStorageIterator& operator=(const CompressedStorageIterator& other) {
+    m_index = other.m_index;
+    m_data = other.m_data;
+    return *this;
+  }
+
+  inline CompressedStorageIterator operator+(difference_type offset) const { return CompressedStorageIterator(m_index + offset, m_data); }
+  inline CompressedStorageIterator operator-(difference_type offset) const { return CompressedStorageIterator(m_index - offset, m_data); }
+  inline difference_type operator-(const CompressedStorageIterator& other) const { return m_index - other.m_index; }
+  inline CompressedStorageIterator& operator++() { ++m_index; return *this; }
+  inline CompressedStorageIterator& operator--() { --m_index; return *this; }
+  inline CompressedStorageIterator& operator+=(difference_type offset) { m_index += offset; return *this; }
+  inline CompressedStorageIterator& operator-=(difference_type offset) { m_index -= offset; return *this; }
+  inline reference operator*() const { return reference(m_data.m_innerIndexIterator + m_index, m_data.m_valueIterator + m_index); }
+
+  #define MAKE_COMP(OP) inline bool operator OP(const CompressedStorageIterator& other) const { return m_index OP other.m_index; }
+  MAKE_COMP(<) MAKE_COMP(>) MAKE_COMP(>=) MAKE_COMP(<=) MAKE_COMP(!=) MAKE_COMP(==)
+
+protected:
+  difference_type m_index;
+  reference m_data;
+};
+
+template <typename Derived, class Comp, bool IsVector>
+struct inner_sort_impl {
+  typedef typename Derived::Scalar Scalar;
+  typedef typename Derived::StorageIndex StorageIndex;
+  static inline void run(SparseCompressedBase<Derived>& obj, Index begin, Index end) {
+    const bool is_compressed = obj.isCompressed();
+    for (Index outer = begin; outer < end; outer++) {
+      Index begin_offset = obj.outerIndexPtr()[outer];
+      Index end_offset = is_compressed ? obj.outerIndexPtr()[outer + 1] : (begin_offset + obj.innerNonZeroPtr()[outer]);
+      CompressedStorageIterator<Scalar, StorageIndex> begin_it(begin_offset, obj.innerIndexPtr(), obj.valuePtr());
+      CompressedStorageIterator<Scalar, StorageIndex> end_it(end_offset, obj.innerIndexPtr(), obj.valuePtr());
+      std::sort(begin_it, end_it, Comp());
+    }
+  }
+  static inline Index check(const SparseCompressedBase<Derived>& obj, Index begin, Index end) {
+    const bool is_compressed = obj.isCompressed();
+    for (Index outer = begin; outer < end; outer++) {
+      Index begin_offset = obj.outerIndexPtr()[outer];
+      Index end_offset = is_compressed ? obj.outerIndexPtr()[outer + 1] : (begin_offset + obj.innerNonZeroPtr()[outer]);
+      const StorageIndex* begin_it = obj.innerIndexPtr() + begin_offset;
+      const StorageIndex* end_it = obj.innerIndexPtr() + end_offset;
+      bool is_sorted = std::is_sorted(begin_it, end_it, Comp());
+      if (!is_sorted) return outer;
+    }
+    return end;
+  }
+};
+template <typename Derived, class Comp>
+struct inner_sort_impl<Derived, Comp, true> {
+  typedef typename Derived::Scalar Scalar;
+  typedef typename Derived::StorageIndex StorageIndex;
+  static inline void run(SparseCompressedBase<Derived>& obj, Index, Index) {
+    Index begin_offset = 0;
+    Index end_offset = obj.nonZeros();
+    CompressedStorageIterator<Scalar, StorageIndex> begin_it(begin_offset, obj.innerIndexPtr(), obj.valuePtr());
+    CompressedStorageIterator<Scalar, StorageIndex> end_it(end_offset, obj.innerIndexPtr(), obj.valuePtr());
+    std::sort(begin_it, end_it, Comp());
+  }
+  static inline Index check(const SparseCompressedBase<Derived>& obj, Index, Index) {
+    Index begin_offset = 0;
+    Index end_offset = obj.nonZeros();
+    const StorageIndex* begin_it = obj.innerIndexPtr() + begin_offset;
+    const StorageIndex* end_it = obj.innerIndexPtr() + end_offset;
+    return std::is_sorted(begin_it, end_it, Comp()) ? 1 : 0;
+  }
+};
+
 template<typename Derived>
 struct evaluator<SparseCompressedBase<Derived> >
   : evaluator_base<Derived>
diff --git a/libs/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h b/libs/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h
index 9b0d3f9..17cdb8e 100644
--- a/libs/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h
+++ b/libs/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_SPARSE_CWISE_BINARY_OP_H
 #define EIGEN_SPARSE_CWISE_BINARY_OP_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 // Here we have to handle 3 cases:
@@ -40,14 +42,11 @@ class CwiseBinaryOpImpl<BinaryOp, Lhs, Rhs, Sparse>
     typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> Derived;
     typedef SparseMatrixBase<Derived> Base;
     EIGEN_SPARSE_PUBLIC_INTERFACE(Derived)
-    CwiseBinaryOpImpl()
-    {
-      EIGEN_STATIC_ASSERT((
-                (!internal::is_same<typename internal::traits<Lhs>::StorageKind,
-                                    typename internal::traits<Rhs>::StorageKind>::value)
-            ||  ((internal::evaluator<Lhs>::Flags&RowMajorBit) == (internal::evaluator<Rhs>::Flags&RowMajorBit))),
-            THE_STORAGE_ORDER_OF_BOTH_SIDES_MUST_MATCH);
-    }
+    EIGEN_STATIC_ASSERT((
+              (!internal::is_same<typename internal::traits<Lhs>::StorageKind,
+                                  typename internal::traits<Rhs>::StorageKind>::value)
+          ||  ((internal::evaluator<Lhs>::Flags&RowMajorBit) == (internal::evaluator<Rhs>::Flags&RowMajorBit))),
+          THE_STORAGE_ORDER_OF_BOTH_SIDES_MUST_MATCH)
 };
 
 namespace internal {
diff --git a/libs/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h b/libs/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h
index 32dac0f..6f48fa7 100644
--- a/libs/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h
+++ b/libs/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_SPARSE_CWISE_UNARY_OP_H
 #define EIGEN_SPARSE_CWISE_UNARY_OP_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
diff --git a/libs/eigen/Eigen/src/SparseCore/SparseDenseProduct.h b/libs/eigen/Eigen/src/SparseCore/SparseDenseProduct.h
index f005a18..9c0c531 100644
--- a/libs/eigen/Eigen/src/SparseCore/SparseDenseProduct.h
+++ b/libs/eigen/Eigen/src/SparseCore/SparseDenseProduct.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_SPARSEDENSEPRODUCT_H
 #define EIGEN_SPARSEDENSEPRODUCT_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
@@ -26,9 +28,9 @@ struct sparse_time_dense_product_impl;
 template<typename SparseLhsType, typename DenseRhsType, typename DenseResType>
 struct sparse_time_dense_product_impl<SparseLhsType,DenseRhsType,DenseResType, typename DenseResType::Scalar, RowMajor, true>
 {
-  typedef typename internal::remove_all<SparseLhsType>::type Lhs;
-  typedef typename internal::remove_all<DenseRhsType>::type Rhs;
-  typedef typename internal::remove_all<DenseResType>::type Res;
+  typedef internal::remove_all_t<SparseLhsType> Lhs;
+  typedef internal::remove_all_t<DenseRhsType> Rhs;
+  typedef internal::remove_all_t<DenseResType> Res;
   typedef typename evaluator<Lhs>::InnerIterator LhsInnerIterator;
   typedef evaluator<Lhs> LhsEval;
   static void run(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const typename Res::Scalar& alpha)
@@ -63,18 +65,26 @@ struct sparse_time_dense_product_impl<SparseLhsType,DenseRhsType,DenseResType, t
   
   static void processRow(const LhsEval& lhsEval, const DenseRhsType& rhs, DenseResType& res, const typename Res::Scalar& alpha, Index i, Index col)
   {
-    typename Res::Scalar tmp(0);
-    for(LhsInnerIterator it(lhsEval,i); it ;++it)
-      tmp += it.value() * rhs.coeff(it.index(),col);
-    res.coeffRef(i,col) += alpha * tmp;
+    // Two accumulators, which breaks the dependency chain on the accumulator
+    // and allows more instruction-level parallelism in the following loop
+    typename Res::Scalar tmp_a(0);
+    typename Res::Scalar tmp_b(0);
+    for(LhsInnerIterator it(lhsEval,i); it ;++it) {
+      tmp_a += it.value() * rhs.coeff(it.index(), col);
+      ++it;
+      if(it) {
+        tmp_b += it.value() * rhs.coeff(it.index(), col);
+      }
+    }
+    res.coeffRef(i, col) += alpha * (tmp_a + tmp_b);
   }
   
 };
 
 // FIXME: what is the purpose of the following specialization? Is it for the BlockedSparse format?
 // -> let's disable it for now as it is conflicting with generic scalar*matrix and matrix*scalar operators
-// template<typename T1, typename T2/*, int _Options, typename _StrideType*/>
-// struct ScalarBinaryOpTraits<T1, Ref<T2/*, _Options, _StrideType*/> >
+// template<typename T1, typename T2/*, int Options_, typename StrideType_*/>
+// struct ScalarBinaryOpTraits<T1, Ref<T2/*, Options_, StrideType_*/> >
 // {
 //   enum {
 //     Defined = 1
@@ -85,9 +95,9 @@ struct sparse_time_dense_product_impl<SparseLhsType,DenseRhsType,DenseResType, t
 template<typename SparseLhsType, typename DenseRhsType, typename DenseResType, typename AlphaType>
 struct sparse_time_dense_product_impl<SparseLhsType,DenseRhsType,DenseResType, AlphaType, ColMajor, true>
 {
-  typedef typename internal::remove_all<SparseLhsType>::type Lhs;
-  typedef typename internal::remove_all<DenseRhsType>::type Rhs;
-  typedef typename internal::remove_all<DenseResType>::type Res;
+  typedef internal::remove_all_t<SparseLhsType> Lhs;
+  typedef internal::remove_all_t<DenseRhsType> Rhs;
+  typedef internal::remove_all_t<DenseResType> Res;
   typedef evaluator<Lhs> LhsEval;
   typedef typename LhsEval::InnerIterator LhsInnerIterator;
   static void run(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const AlphaType& alpha)
@@ -109,9 +119,9 @@ struct sparse_time_dense_product_impl<SparseLhsType,DenseRhsType,DenseResType, A
 template<typename SparseLhsType, typename DenseRhsType, typename DenseResType>
 struct sparse_time_dense_product_impl<SparseLhsType,DenseRhsType,DenseResType, typename DenseResType::Scalar, RowMajor, false>
 {
-  typedef typename internal::remove_all<SparseLhsType>::type Lhs;
-  typedef typename internal::remove_all<DenseRhsType>::type Rhs;
-  typedef typename internal::remove_all<DenseResType>::type Res;
+  typedef internal::remove_all_t<SparseLhsType> Lhs;
+  typedef internal::remove_all_t<DenseRhsType> Rhs;
+  typedef internal::remove_all_t<DenseResType> Res;
   typedef evaluator<Lhs> LhsEval;
   typedef typename LhsEval::InnerIterator LhsInnerIterator;
   static void run(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const typename Res::Scalar& alpha)
@@ -149,9 +159,9 @@ struct sparse_time_dense_product_impl<SparseLhsType,DenseRhsType,DenseResType, t
 template<typename SparseLhsType, typename DenseRhsType, typename DenseResType>
 struct sparse_time_dense_product_impl<SparseLhsType,DenseRhsType,DenseResType, typename DenseResType::Scalar, ColMajor, false>
 {
-  typedef typename internal::remove_all<SparseLhsType>::type Lhs;
-  typedef typename internal::remove_all<DenseRhsType>::type Rhs;
-  typedef typename internal::remove_all<DenseResType>::type Res;
+  typedef internal::remove_all_t<SparseLhsType> Lhs;
+  typedef internal::remove_all_t<DenseRhsType> Rhs;
+  typedef internal::remove_all_t<DenseResType> Res;
   typedef typename evaluator<Lhs>::InnerIterator LhsInnerIterator;
   static void run(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const typename Res::Scalar& alpha)
   {
@@ -226,16 +236,16 @@ template<typename LhsT, typename RhsT, bool NeedToTranspose>
 struct sparse_dense_outer_product_evaluator
 {
 protected:
-  typedef typename conditional<NeedToTranspose,RhsT,LhsT>::type Lhs1;
-  typedef typename conditional<NeedToTranspose,LhsT,RhsT>::type ActualRhs;
+  typedef std::conditional_t<NeedToTranspose,RhsT,LhsT> Lhs1;
+  typedef std::conditional_t<NeedToTranspose,LhsT,RhsT> ActualRhs;
   typedef Product<LhsT,RhsT,DefaultProduct> ProdXprType;
   
   // if the actual left-hand side is a dense vector,
   // then build a sparse-view so that we can seamlessly iterate over it.
-  typedef typename conditional<is_same<typename internal::traits<Lhs1>::StorageKind,Sparse>::value,
-            Lhs1, SparseView<Lhs1> >::type ActualLhs;
-  typedef typename conditional<is_same<typename internal::traits<Lhs1>::StorageKind,Sparse>::value,
-            Lhs1 const&, SparseView<Lhs1> >::type LhsArg;
+  typedef std::conditional_t<is_same<typename internal::traits<Lhs1>::StorageKind,Sparse>::value,
+            Lhs1, SparseView<Lhs1> > ActualLhs;
+  typedef std::conditional_t<is_same<typename internal::traits<Lhs1>::StorageKind,Sparse>::value,
+            Lhs1 const&, SparseView<Lhs1> > LhsArg;
             
   typedef evaluator<ActualLhs> LhsEval;
   typedef evaluator<ActualRhs> RhsEval;
diff --git a/libs/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h b/libs/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h
index 941c03b..4dc9502 100644
--- a/libs/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h
+++ b/libs/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_SPARSE_DIAGONAL_PRODUCT_H
 #define EIGEN_SPARSE_DIAGONAL_PRODUCT_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 // The product of a diagonal matrix with a sparse matrix can be easily
diff --git a/libs/eigen/Eigen/src/SparseCore/SparseDot.h b/libs/eigen/Eigen/src/SparseCore/SparseDot.h
index 38bc4aa..a45ecfa 100644
--- a/libs/eigen/Eigen/src/SparseCore/SparseDot.h
+++ b/libs/eigen/Eigen/src/SparseCore/SparseDot.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_SPARSE_DOT_H
 #define EIGEN_SPARSE_DOT_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 template<typename Derived>
diff --git a/libs/eigen/Eigen/src/SparseCore/SparseFuzzy.h b/libs/eigen/Eigen/src/SparseCore/SparseFuzzy.h
index 7d47eb9..dcfdde9 100644
--- a/libs/eigen/Eigen/src/SparseCore/SparseFuzzy.h
+++ b/libs/eigen/Eigen/src/SparseCore/SparseFuzzy.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_SPARSE_FUZZY_H
 #define EIGEN_SPARSE_FUZZY_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
   
 template<typename Derived>
@@ -17,9 +19,9 @@ template<typename OtherDerived>
 bool SparseMatrixBase<Derived>::isApprox(const SparseMatrixBase<OtherDerived>& other, const RealScalar &prec) const
 {
   const typename internal::nested_eval<Derived,2,PlainObject>::type actualA(derived());
-  typename internal::conditional<bool(IsRowMajor)==bool(OtherDerived::IsRowMajor),
+  std::conditional_t<bool(IsRowMajor)==bool(OtherDerived::IsRowMajor),
     const typename internal::nested_eval<OtherDerived,2,PlainObject>::type,
-    const PlainObject>::type actualB(other.derived());
+    const PlainObject> actualB(other.derived());
 
   return (actualA - actualB).squaredNorm() <= prec * prec * numext::mini(actualA.squaredNorm(), actualB.squaredNorm());
 }
diff --git a/libs/eigen/Eigen/src/SparseCore/SparseMap.h b/libs/eigen/Eigen/src/SparseCore/SparseMap.h
index f99be33..0ee3813 100644
--- a/libs/eigen/Eigen/src/SparseCore/SparseMap.h
+++ b/libs/eigen/Eigen/src/SparseCore/SparseMap.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_SPARSE_MAP_H
 #define EIGEN_SPARSE_MAP_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
@@ -58,12 +60,12 @@ class SparseMapBase<Derived,ReadOnlyAccessors>
     using Base::operator=;
   protected:
     
-    typedef typename internal::conditional<
-                         bool(internal::is_lvalue<Derived>::value),
-                         Scalar *, const Scalar *>::type ScalarPointer;
-    typedef typename internal::conditional<
-                         bool(internal::is_lvalue<Derived>::value),
-                         StorageIndex *, const StorageIndex *>::type IndexPointer;
+    typedef std::conditional_t<
+                 bool(internal::is_lvalue<Derived>::value),
+                 Scalar *, const Scalar *> ScalarPointer;
+    typedef std::conditional_t<
+                 bool(internal::is_lvalue<Derived>::value),
+                 StorageIndex *, const StorageIndex *> IndexPointer;
 
     Index   m_outerSize;
     Index   m_innerSize;
@@ -237,6 +239,7 @@ class Map<SparseMatrixType>
     /** Constructs a read-write Map to a sparse matrix of size \a rows x \a cols, containing \a nnz non-zero coefficients,
       * stored as a sparse format as defined by the pointers \a outerIndexPtr, \a innerIndexPtr, and \a valuePtr.
       * If the optional parameter \a innerNonZerosPtr is the null pointer, then a standard compressed format is assumed.
+      * The inner indices must be sorted appropriately.
       *
       * This constructor is available only if \c SparseMatrixType is non-const.
       *
diff --git a/libs/eigen/Eigen/src/SparseCore/SparseMatrix.h b/libs/eigen/Eigen/src/SparseCore/SparseMatrix.h
index 616b4a0..6806812 100644
--- a/libs/eigen/Eigen/src/SparseCore/SparseMatrix.h
+++ b/libs/eigen/Eigen/src/SparseCore/SparseMatrix.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_SPARSEMATRIX_H
 #define EIGEN_SPARSEMATRIX_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 /** \ingroup SparseCore_Module
@@ -29,10 +31,10 @@ namespace Eigen {
   *
   * More details on this storage sceheme are given in the \ref TutorialSparse "manual pages".
   *
-  * \tparam _Scalar the scalar type, i.e. the type of the coefficients
-  * \tparam _Options Union of bit flags controlling the storage scheme. Currently the only possibility
+  * \tparam Scalar_ the scalar type, i.e. the type of the coefficients
+  * \tparam Options_ Union of bit flags controlling the storage scheme. Currently the only possibility
   *                 is ColMajor or RowMajor. The default is 0 which means column-major.
-  * \tparam _StorageIndex the type of the indices. It has to be a \b signed type (e.g., short, int, std::ptrdiff_t). Default is \c int.
+  * \tparam StorageIndex_ the type of the indices. It has to be a \b signed type (e.g., short, int, std::ptrdiff_t). Default is \c int.
   *
   * \warning In %Eigen 3.2, the undocumented type \c SparseMatrix::Index was improperly defined as the storage index type (e.g., int),
   *          whereas it is now (starting from %Eigen 3.3) deprecated and always defined as Eigen::Index.
@@ -43,11 +45,11 @@ namespace Eigen {
   */
 
 namespace internal {
-template<typename _Scalar, int _Options, typename _StorageIndex>
-struct traits<SparseMatrix<_Scalar, _Options, _StorageIndex> >
+template<typename Scalar_, int Options_, typename StorageIndex_>
+struct traits<SparseMatrix<Scalar_, Options_, StorageIndex_> >
 {
-  typedef _Scalar Scalar;
-  typedef _StorageIndex StorageIndex;
+  typedef Scalar_ Scalar;
+  typedef StorageIndex_ StorageIndex;
   typedef Sparse StorageKind;
   typedef MatrixXpr XprKind;
   enum {
@@ -55,21 +57,21 @@ struct traits<SparseMatrix<_Scalar, _Options, _StorageIndex> >
     ColsAtCompileTime = Dynamic,
     MaxRowsAtCompileTime = Dynamic,
     MaxColsAtCompileTime = Dynamic,
-    Flags = _Options | NestByRefBit | LvalueBit | CompressedAccessBit,
+    Flags = Options_ | NestByRefBit | LvalueBit | CompressedAccessBit,
     SupportedAccessPatterns = InnerRandomAccessPattern
   };
 };
 
-template<typename _Scalar, int _Options, typename _StorageIndex, int DiagIndex>
-struct traits<Diagonal<SparseMatrix<_Scalar, _Options, _StorageIndex>, DiagIndex> >
+template<typename Scalar_, int Options_, typename StorageIndex_, int DiagIndex>
+struct traits<Diagonal<SparseMatrix<Scalar_, Options_, StorageIndex_>, DiagIndex> >
 {
-  typedef SparseMatrix<_Scalar, _Options, _StorageIndex> MatrixType;
+  typedef SparseMatrix<Scalar_, Options_, StorageIndex_> MatrixType;
   typedef typename ref_selector<MatrixType>::type MatrixTypeNested;
-  typedef typename remove_reference<MatrixTypeNested>::type _MatrixTypeNested;
+  typedef std::remove_reference_t<MatrixTypeNested> MatrixTypeNested_;
 
-  typedef _Scalar Scalar;
+  typedef Scalar_ Scalar;
   typedef Dense StorageKind;
-  typedef _StorageIndex StorageIndex;
+  typedef StorageIndex_ StorageIndex;
   typedef MatrixXpr XprKind;
 
   enum {
@@ -81,9 +83,9 @@ struct traits<Diagonal<SparseMatrix<_Scalar, _Options, _StorageIndex>, DiagIndex
   };
 };
 
-template<typename _Scalar, int _Options, typename _StorageIndex, int DiagIndex>
-struct traits<Diagonal<const SparseMatrix<_Scalar, _Options, _StorageIndex>, DiagIndex> >
- : public traits<Diagonal<SparseMatrix<_Scalar, _Options, _StorageIndex>, DiagIndex> >
+template<typename Scalar_, int Options_, typename StorageIndex_, int DiagIndex>
+struct traits<Diagonal<const SparseMatrix<Scalar_, Options_, StorageIndex_>, DiagIndex> >
+ : public traits<Diagonal<SparseMatrix<Scalar_, Options_, StorageIndex_>, DiagIndex> >
 {
   enum {
     Flags = 0
@@ -92,13 +94,13 @@ struct traits<Diagonal<const SparseMatrix<_Scalar, _Options, _StorageIndex>, Dia
 
 } // end namespace internal
 
-template<typename _Scalar, int _Options, typename _StorageIndex>
+template<typename Scalar_, int Options_, typename StorageIndex_>
 class SparseMatrix
-  : public SparseCompressedBase<SparseMatrix<_Scalar, _Options, _StorageIndex> >
+  : public SparseCompressedBase<SparseMatrix<Scalar_, Options_, StorageIndex_> >
 {
     typedef SparseCompressedBase<SparseMatrix> Base;
     using Base::convert_index;
-    friend class SparseVector<_Scalar,0,_StorageIndex>;
+    friend class SparseVector<Scalar_,0,StorageIndex_>;
     template<typename, typename, typename, typename, typename>
     friend struct internal::Assignment;
   public:
@@ -108,7 +110,7 @@ class SparseMatrix
     using Base::operator+=;
     using Base::operator-=;
 
-    typedef MappedSparseMatrix<Scalar,Flags> Map;
+    typedef Eigen::Map<SparseMatrix<Scalar,Flags,StorageIndex>> Map;
     typedef Diagonal<SparseMatrix> DiagonalReturnType;
     typedef Diagonal<const SparseMatrix> ConstDiagonalReturnType;
     typedef typename Base::InnerIterator InnerIterator;
@@ -118,13 +120,13 @@ class SparseMatrix
     using Base::IsRowMajor;
     typedef internal::CompressedStorage<Scalar,StorageIndex> Storage;
     enum {
-      Options = _Options
+      Options = Options_
     };
 
     typedef typename Base::IndexVector IndexVector;
     typedef typename Base::ScalarVector ScalarVector;
   protected:
-    typedef SparseMatrix<Scalar,(Flags&~RowMajorBit)|(IsRowMajor?RowMajorBit:0)> TransposedSparseMatrix;
+    typedef SparseMatrix<Scalar,(Flags&~RowMajorBit)|(IsRowMajor?RowMajorBit:0),StorageIndex> TransposedSparseMatrix;
 
     Index m_outerSize;
     Index m_innerSize;
@@ -253,9 +255,10 @@ class SparseMatrix
     inline void setZero()
     {
       m_data.clear();
-      memset(m_outerIndex, 0, (m_outerSize+1)*sizeof(StorageIndex));
-      if(m_innerNonZeros)
-        memset(m_innerNonZeros, 0, (m_outerSize)*sizeof(StorageIndex));
+      std::fill_n(m_outerIndex, m_outerSize + 1, StorageIndex(0));
+      if(m_innerNonZeros) {
+        std::fill_n(m_innerNonZeros, m_outerSize, StorageIndex(0));
+      }
     }
 
     /** Preallocates \a reserveSize non zeros.
@@ -285,10 +288,7 @@ class SparseMatrix
     #else
     template<class SizesType>
     inline void reserve(const SizesType& reserveSizes, const typename SizesType::value_type& enableif =
-    #if (!EIGEN_COMP_MSVC) || (EIGEN_COMP_MSVC>=1500) // MSVC 2005 fails to compile with this typename
-        typename
-    #endif
-        SizesType::value_type())
+        typename SizesType::value_type())
     {
       EIGEN_UNUSED_VARIABLE(enableif);
       reserveInnerVectors(reserveSizes);
@@ -302,8 +302,7 @@ class SparseMatrix
       {
         Index totalReserveSize = 0;
         // turn the matrix into non-compressed mode
-        m_innerNonZeros = static_cast<StorageIndex*>(std::malloc(m_outerSize * sizeof(StorageIndex)));
-        if (!m_innerNonZeros) internal::throw_std_bad_alloc();
+        m_innerNonZeros = internal::conditional_aligned_new_auto<StorageIndex, true>(m_outerSize);
         
         // temporarily use m_innerSizes to hold the new starting points.
         StorageIndex* newOuterIndex = m_innerNonZeros;
@@ -336,8 +335,7 @@ class SparseMatrix
       }
       else
       {
-        StorageIndex* newOuterIndex = static_cast<StorageIndex*>(std::malloc((m_outerSize+1)*sizeof(StorageIndex)));
-        if (!newOuterIndex) internal::throw_std_bad_alloc();
+        StorageIndex* newOuterIndex = internal::conditional_aligned_new_auto<StorageIndex, true>(m_outerSize + 1);
         
         StorageIndex count = 0;
         for(Index j=0; j<m_outerSize; ++j)
@@ -365,7 +363,7 @@ class SparseMatrix
         }
         
         std::swap(m_outerIndex, newOuterIndex);
-        std::free(newOuterIndex);
+        internal::conditional_aligned_delete_auto<StorageIndex, true>(newOuterIndex, m_outerSize + 1);
       }
       
     }
@@ -488,7 +486,7 @@ class SparseMatrix
         m_outerIndex[j+1] = m_outerIndex[j] + m_innerNonZeros[j];
         oldStart = nextOldStart;
       }
-      std::free(m_innerNonZeros);
+      internal::conditional_aligned_delete_auto<StorageIndex, true>(m_innerNonZeros, m_outerSize);
       m_innerNonZeros = 0;
       m_data.resize(m_outerIndex[m_outerSize]);
       m_data.squeeze();
@@ -499,7 +497,7 @@ class SparseMatrix
     {
       if(m_innerNonZeros != 0)
         return; 
-      m_innerNonZeros = static_cast<StorageIndex*>(std::malloc(m_outerSize * sizeof(StorageIndex)));
+      m_innerNonZeros = internal::conditional_aligned_new_auto<StorageIndex, true>(m_outerSize);
       for (Index i = 0; i < m_outerSize; i++)
       {
         m_innerNonZeros[i] = m_outerIndex[i+1] - m_outerIndex[i]; 
@@ -569,9 +567,8 @@ class SparseMatrix
       if (m_innerNonZeros)
       {
         // Resize m_innerNonZeros
-        StorageIndex *newInnerNonZeros = static_cast<StorageIndex*>(std::realloc(m_innerNonZeros, (m_outerSize + outerChange) * sizeof(StorageIndex)));
-        if (!newInnerNonZeros) internal::throw_std_bad_alloc();
-        m_innerNonZeros = newInnerNonZeros;
+        m_innerNonZeros = internal::conditional_aligned_realloc_new_auto<StorageIndex, true>(
+              m_innerNonZeros, m_outerSize + outerChange, m_outerSize);
         
         for(Index i=m_outerSize; i<m_outerSize+outerChange; i++)          
           m_innerNonZeros[i] = 0;
@@ -579,8 +576,7 @@ class SparseMatrix
       else if (innerChange < 0) 
       {
         // Inner size decreased: allocate a new m_innerNonZeros
-        m_innerNonZeros = static_cast<StorageIndex*>(std::malloc((m_outerSize + outerChange) * sizeof(StorageIndex)));
-        if (!m_innerNonZeros) internal::throw_std_bad_alloc();
+        m_innerNonZeros = internal::conditional_aligned_new_auto<StorageIndex, true>(m_outerSize + outerChange);
         for(Index i = 0; i < m_outerSize + (std::min)(outerChange, Index(0)); i++)
           m_innerNonZeros[i] = m_outerIndex[i+1] - m_outerIndex[i];
         for(Index i = m_outerSize; i < m_outerSize + outerChange; i++)
@@ -604,9 +600,8 @@ class SparseMatrix
       if (outerChange == 0)
         return;
           
-      StorageIndex *newOuterIndex = static_cast<StorageIndex*>(std::realloc(m_outerIndex, (m_outerSize + outerChange + 1) * sizeof(StorageIndex)));
-      if (!newOuterIndex) internal::throw_std_bad_alloc();
-      m_outerIndex = newOuterIndex;
+      m_outerIndex = internal::conditional_aligned_realloc_new_auto<StorageIndex, true>(
+          m_outerIndex, m_outerSize + outerChange + 1, m_outerSize + 1);
       if (outerChange > 0)
       {
         StorageIndex lastIdx = m_outerSize == 0 ? 0 : m_outerIndex[m_outerSize];
@@ -630,18 +625,16 @@ class SparseMatrix
       m_data.clear();
       if (m_outerSize != outerSize || m_outerSize==0)
       {
-        std::free(m_outerIndex);
-        m_outerIndex = static_cast<StorageIndex*>(std::malloc((outerSize + 1) * sizeof(StorageIndex)));
-        if (!m_outerIndex) internal::throw_std_bad_alloc();
-        
+        m_outerIndex = internal::conditional_aligned_realloc_new_auto<StorageIndex, true>(m_outerIndex, outerSize + 1,
+            m_outerSize + 1);
         m_outerSize = outerSize;
       }
       if(m_innerNonZeros)
       {
-        std::free(m_innerNonZeros);
+        internal::conditional_aligned_delete_auto<StorageIndex, true>(m_innerNonZeros, m_outerSize);
         m_innerNonZeros = 0;
       }
-      memset(m_outerIndex, 0, (m_outerSize+1)*sizeof(StorageIndex));
+      std::fill_n(m_outerIndex, m_outerSize + 1, StorageIndex(0));
     }
 
     /** \internal
@@ -664,7 +657,6 @@ class SparseMatrix
     inline SparseMatrix()
       : m_outerSize(-1), m_innerSize(0), m_outerIndex(0), m_innerNonZeros(0)
     {
-      check_template_parameters();
       resize(0, 0);
     }
 
@@ -672,7 +664,6 @@ class SparseMatrix
     inline SparseMatrix(Index rows, Index cols)
       : m_outerSize(0), m_innerSize(0), m_outerIndex(0), m_innerNonZeros(0)
     {
-      check_template_parameters();
       resize(rows, cols);
     }
 
@@ -683,7 +674,6 @@ class SparseMatrix
     {
       EIGEN_STATIC_ASSERT((internal::is_same<Scalar, typename OtherDerived::Scalar>::value),
         YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
-      check_template_parameters();
       const bool needToTranspose = (Flags & RowMajorBit) != (internal::evaluator<OtherDerived>::Flags & RowMajorBit);
       if (needToTranspose)
         *this = other.derived();
@@ -695,21 +685,24 @@ class SparseMatrix
         internal::call_assignment_no_alias(*this, other.derived());
       }
     }
-    
+
     /** Constructs a sparse matrix from the sparse selfadjoint view \a other */
     template<typename OtherDerived, unsigned int UpLo>
     inline SparseMatrix(const SparseSelfAdjointView<OtherDerived, UpLo>& other)
       : m_outerSize(0), m_innerSize(0), m_outerIndex(0), m_innerNonZeros(0)
     {
-      check_template_parameters();
       Base::operator=(other);
     }
 
+    inline SparseMatrix(SparseMatrix&& other) : Base(), m_outerSize(0), m_innerSize(0), m_outerIndex(0), m_innerNonZeros(0)
+    {
+      *this = other.derived().markAsRValue();
+    }
+
     /** Copy constructor (it performs a deep copy) */
     inline SparseMatrix(const SparseMatrix& other)
       : Base(), m_outerSize(0), m_innerSize(0), m_outerIndex(0), m_innerNonZeros(0)
     {
-      check_template_parameters();
       *this = other.derived();
     }
 
@@ -718,17 +711,15 @@ class SparseMatrix
     SparseMatrix(const ReturnByValue<OtherDerived>& other)
       : Base(), m_outerSize(0), m_innerSize(0), m_outerIndex(0), m_innerNonZeros(0)
     {
-      check_template_parameters();
       initAssignment(other);
       other.evalTo(*this);
     }
-    
+
     /** \brief Copy constructor with in-place evaluation */
     template<typename OtherDerived>
     explicit SparseMatrix(const DiagonalBase<OtherDerived>& other)
       : Base(), m_outerSize(0), m_innerSize(0), m_outerIndex(0), m_innerNonZeros(0)
     {
-      check_template_parameters();
       *this = other.derived();
     }
 
@@ -753,9 +744,10 @@ class SparseMatrix
       Eigen::Map<IndexVector>(this->m_data.indexPtr(), rows()).setLinSpaced(0, StorageIndex(rows()-1));
       Eigen::Map<ScalarVector>(this->m_data.valuePtr(), rows()).setOnes();
       Eigen::Map<IndexVector>(this->m_outerIndex, rows()+1).setLinSpaced(0, StorageIndex(rows()));
-      std::free(m_innerNonZeros);
+      internal::conditional_aligned_delete_auto<StorageIndex, true>(m_innerNonZeros, m_outerSize);
       m_innerNonZeros = 0;
     }
+
     inline SparseMatrix& operator=(const SparseMatrix& other)
     {
       if (other.isRValue())
@@ -781,6 +773,10 @@ class SparseMatrix
       return *this;
     }
 
+    inline SparseMatrix& operator=(SparseMatrix&& other) {
+      return *this = other.derived().markAsRValue();
+    }
+
 #ifndef EIGEN_PARSED_BY_DOXYGEN
     template<typename OtherDerived>
     inline SparseMatrix& operator=(const EigenBase<OtherDerived>& other)
@@ -793,6 +789,7 @@ class SparseMatrix
     template<typename OtherDerived>
     EIGEN_DONT_INLINE SparseMatrix& operator=(const SparseMatrixBase<OtherDerived>& other);
 
+#ifndef EIGEN_NO_IO
     friend std::ostream & operator << (std::ostream & s, const SparseMatrix& m)
     {
       EIGEN_DBG_SPARSE(
@@ -837,12 +834,13 @@ class SparseMatrix
       s << static_cast<const SparseMatrixBase<SparseMatrix>&>(m);
       return s;
     }
+#endif
 
     /** Destructor */
     inline ~SparseMatrix()
     {
-      std::free(m_outerIndex);
-      std::free(m_innerNonZeros);
+      internal::conditional_aligned_delete_auto<StorageIndex, true>(m_outerIndex, m_outerSize + 1);
+      internal::conditional_aligned_delete_auto<StorageIndex, true>(m_innerNonZeros, m_outerSize);
     }
 
     /** Overloaded for performance */
@@ -860,7 +858,7 @@ protected:
       resize(other.rows(), other.cols());
       if(m_innerNonZeros)
       {
-        std::free(m_innerNonZeros);
+        internal::conditional_aligned_delete_auto<StorageIndex, true>(m_innerNonZeros, m_outerSize);
         m_innerNonZeros = 0;
       }
     }
@@ -1012,11 +1010,8 @@ protected:
     }
 
 private:
-  static void check_template_parameters()
-  {
-    EIGEN_STATIC_ASSERT(NumTraits<StorageIndex>::IsSigned,THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE);
-    EIGEN_STATIC_ASSERT((Options&(ColMajor|RowMajor))==Options,INVALID_MATRIX_TEMPLATE_PARAMETERS);
-  }
+  EIGEN_STATIC_ASSERT(NumTraits<StorageIndex>::IsSigned,THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE)
+  EIGEN_STATIC_ASSERT((Options&(ColMajor|RowMajor))==Options,INVALID_MATRIX_TEMPLATE_PARAMETERS)
 
   struct default_prunning_func {
     default_prunning_func(const Scalar& ref, const RealScalar& eps) : reference(ref), epsilon(eps) {}
@@ -1103,11 +1098,11 @@ void set_from_triplets(const InputIterator& begin, const InputIterator& end, Spa
   * an abstract iterator over a complex data-structure that would be expensive to evaluate. The triplets should rather
   * be explicitly stored into a std::vector for instance.
   */
-template<typename Scalar, int _Options, typename _StorageIndex>
+template<typename Scalar, int Options_, typename StorageIndex_>
 template<typename InputIterators>
-void SparseMatrix<Scalar,_Options,_StorageIndex>::setFromTriplets(const InputIterators& begin, const InputIterators& end)
+void SparseMatrix<Scalar,Options_,StorageIndex_>::setFromTriplets(const InputIterators& begin, const InputIterators& end)
 {
-  internal::set_from_triplets<InputIterators, SparseMatrix<Scalar,_Options,_StorageIndex> >(begin, end, *this, internal::scalar_sum_op<Scalar,Scalar>());
+  internal::set_from_triplets<InputIterators, SparseMatrix<Scalar,Options_,StorageIndex_> >(begin, end, *this, internal::scalar_sum_op<Scalar,Scalar>());
 }
 
 /** The same as setFromTriplets but when duplicates are met the functor \a dup_func is applied:
@@ -1119,17 +1114,17 @@ void SparseMatrix<Scalar,_Options,_StorageIndex>::setFromTriplets(const InputIte
   * mat.setFromTriplets(triplets.begin(), triplets.end(), [] (const Scalar&,const Scalar &b) { return b; });
   * \endcode
   */
-template<typename Scalar, int _Options, typename _StorageIndex>
+template<typename Scalar, int Options_, typename StorageIndex_>
 template<typename InputIterators,typename DupFunctor>
-void SparseMatrix<Scalar,_Options,_StorageIndex>::setFromTriplets(const InputIterators& begin, const InputIterators& end, DupFunctor dup_func)
+void SparseMatrix<Scalar,Options_,StorageIndex_>::setFromTriplets(const InputIterators& begin, const InputIterators& end, DupFunctor dup_func)
 {
-  internal::set_from_triplets<InputIterators, SparseMatrix<Scalar,_Options,_StorageIndex>, DupFunctor>(begin, end, *this, dup_func);
+  internal::set_from_triplets<InputIterators, SparseMatrix<Scalar,Options_,StorageIndex_>, DupFunctor>(begin, end, *this, dup_func);
 }
 
 /** \internal */
-template<typename Scalar, int _Options, typename _StorageIndex>
+template<typename Scalar, int Options_, typename StorageIndex_>
 template<typename DupFunctor>
-void SparseMatrix<Scalar,_Options,_StorageIndex>::collapseDuplicates(DupFunctor dup_func)
+void SparseMatrix<Scalar,Options_,StorageIndex_>::collapseDuplicates(DupFunctor dup_func)
 {
   eigen_assert(!isCompressed());
   // TODO, in practice we should be able to use m_innerNonZeros for that task
@@ -1162,14 +1157,14 @@ void SparseMatrix<Scalar,_Options,_StorageIndex>::collapseDuplicates(DupFunctor
   m_outerIndex[m_outerSize] = count;
 
   // turn the matrix into compressed form
-  std::free(m_innerNonZeros);
+  internal::conditional_aligned_delete_auto<StorageIndex, true>(m_innerNonZeros, m_outerSize);
   m_innerNonZeros = 0;
   m_data.resize(m_outerIndex[m_outerSize]);
 }
 
-template<typename Scalar, int _Options, typename _StorageIndex>
+template<typename Scalar, int Options_, typename StorageIndex_>
 template<typename OtherDerived>
-EIGEN_DONT_INLINE SparseMatrix<Scalar,_Options,_StorageIndex>& SparseMatrix<Scalar,_Options,_StorageIndex>::operator=(const SparseMatrixBase<OtherDerived>& other)
+EIGEN_DONT_INLINE SparseMatrix<Scalar,Options_,StorageIndex_>& SparseMatrix<Scalar,Options_,StorageIndex_>::operator=(const SparseMatrixBase<OtherDerived>& other)
 {
   EIGEN_STATIC_ASSERT((internal::is_same<Scalar, typename OtherDerived::Scalar>::value),
         YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
@@ -1189,8 +1184,8 @@ EIGEN_DONT_INLINE SparseMatrix<Scalar,_Options,_StorageIndex>& SparseMatrix<Scal
     //  2 - do the actual copy/eval
     // Since each coeff of the rhs has to be evaluated twice, let's evaluate it if needed
     typedef typename internal::nested_eval<OtherDerived,2,typename internal::plain_matrix_type<OtherDerived>::type >::type OtherCopy;
-    typedef typename internal::remove_all<OtherCopy>::type _OtherCopy;
-    typedef internal::evaluator<_OtherCopy> OtherCopyEval;
+    typedef internal::remove_all_t<OtherCopy> OtherCopy_;
+    typedef internal::evaluator<OtherCopy_> OtherCopyEval;
     OtherCopy otherCopy(other.derived());
     OtherCopyEval otherCopyEval(otherCopy);
 
@@ -1240,8 +1235,8 @@ EIGEN_DONT_INLINE SparseMatrix<Scalar,_Options,_StorageIndex>& SparseMatrix<Scal
   }
 }
 
-template<typename _Scalar, int _Options, typename _StorageIndex>
-typename SparseMatrix<_Scalar,_Options,_StorageIndex>::Scalar& SparseMatrix<_Scalar,_Options,_StorageIndex>::insert(Index row, Index col)
+template<typename Scalar_, int Options_, typename StorageIndex_>
+typename SparseMatrix<Scalar_,Options_,StorageIndex_>::Scalar& SparseMatrix<Scalar_,Options_,StorageIndex_>::insert(Index row, Index col)
 {
   eigen_assert(row>=0 && row<rows() && col>=0 && col<cols());
   
@@ -1257,10 +1252,9 @@ typename SparseMatrix<_Scalar,_Options,_StorageIndex>::Scalar& SparseMatrix<_Sca
         m_data.reserve(2*m_innerSize);
       
       // turn the matrix into non-compressed mode
-      m_innerNonZeros = static_cast<StorageIndex*>(std::malloc(m_outerSize * sizeof(StorageIndex)));
-      if(!m_innerNonZeros) internal::throw_std_bad_alloc();
+      m_innerNonZeros = internal::conditional_aligned_new_auto<StorageIndex, true>(m_outerSize);
       
-      memset(m_innerNonZeros, 0, (m_outerSize)*sizeof(StorageIndex));
+      std::fill(m_innerNonZeros, m_innerNonZeros + m_outerSize, StorageIndex(0));
       
       // pack all inner-vectors to the end of the pre-allocated space
       // and allocate the entire free-space to the first inner-vector
@@ -1271,8 +1265,7 @@ typename SparseMatrix<_Scalar,_Options,_StorageIndex>::Scalar& SparseMatrix<_Sca
     else
     {
       // turn the matrix into non-compressed mode
-      m_innerNonZeros = static_cast<StorageIndex*>(std::malloc(m_outerSize * sizeof(StorageIndex)));
-      if(!m_innerNonZeros) internal::throw_std_bad_alloc();
+      m_innerNonZeros = internal::conditional_aligned_new_auto<StorageIndex, true>(m_outerSize);
       for(Index j=0; j<m_outerSize; ++j)
         m_innerNonZeros[j] = m_outerIndex[j+1]-m_outerIndex[j];
     }
@@ -1360,8 +1353,8 @@ typename SparseMatrix<_Scalar,_Options,_StorageIndex>::Scalar& SparseMatrix<_Sca
   return insertUncompressed(row,col);
 }
     
-template<typename _Scalar, int _Options, typename _StorageIndex>
-EIGEN_DONT_INLINE typename SparseMatrix<_Scalar,_Options,_StorageIndex>::Scalar& SparseMatrix<_Scalar,_Options,_StorageIndex>::insertUncompressed(Index row, Index col)
+template<typename Scalar_, int Options_, typename StorageIndex_>
+EIGEN_DONT_INLINE typename SparseMatrix<Scalar_,Options_,StorageIndex_>::Scalar& SparseMatrix<Scalar_,Options_,StorageIndex_>::insertUncompressed(Index row, Index col)
 {
   eigen_assert(!isCompressed());
 
@@ -1392,8 +1385,8 @@ EIGEN_DONT_INLINE typename SparseMatrix<_Scalar,_Options,_StorageIndex>::Scalar&
   return (m_data.value(p) = Scalar(0));
 }
 
-template<typename _Scalar, int _Options, typename _StorageIndex>
-EIGEN_DONT_INLINE typename SparseMatrix<_Scalar,_Options,_StorageIndex>::Scalar& SparseMatrix<_Scalar,_Options,_StorageIndex>::insertCompressed(Index row, Index col)
+template<typename Scalar_, int Options_, typename StorageIndex_>
+EIGEN_DONT_INLINE typename SparseMatrix<Scalar_,Options_,StorageIndex_>::Scalar& SparseMatrix<Scalar_,Options_,StorageIndex_>::insertCompressed(Index row, Index col)
 {
   eigen_assert(isCompressed());
 
@@ -1501,18 +1494,138 @@ EIGEN_DONT_INLINE typename SparseMatrix<_Scalar,_Options,_StorageIndex>::Scalar&
 
 namespace internal {
 
-template<typename _Scalar, int _Options, typename _StorageIndex>
-struct evaluator<SparseMatrix<_Scalar,_Options,_StorageIndex> >
-  : evaluator<SparseCompressedBase<SparseMatrix<_Scalar,_Options,_StorageIndex> > >
+template<typename Scalar_, int Options_, typename StorageIndex_>
+struct evaluator<SparseMatrix<Scalar_,Options_,StorageIndex_> >
+  : evaluator<SparseCompressedBase<SparseMatrix<Scalar_,Options_,StorageIndex_> > >
 {
-  typedef evaluator<SparseCompressedBase<SparseMatrix<_Scalar,_Options,_StorageIndex> > > Base;
-  typedef SparseMatrix<_Scalar,_Options,_StorageIndex> SparseMatrixType;
+  typedef evaluator<SparseCompressedBase<SparseMatrix<Scalar_,Options_,StorageIndex_> > > Base;
+  typedef SparseMatrix<Scalar_,Options_,StorageIndex_> SparseMatrixType;
   evaluator() : Base() {}
   explicit evaluator(const SparseMatrixType &mat) : Base(mat) {}
 };
 
 }
 
+// Specialization for SparseMatrix.
+// Serializes [rows, cols, isCompressed, outerSize, innerBufferSize,
+// innerNonZeros, outerIndices, innerIndices, values].
+template <typename Scalar, int Options, typename StorageIndex>
+class Serializer<SparseMatrix<Scalar, Options, StorageIndex>, void> {
+ public:
+  typedef SparseMatrix<Scalar, Options, StorageIndex> SparseMat;
+
+  struct Header {
+    typename SparseMat::Index rows;
+    typename SparseMat::Index cols;
+    bool compressed;
+    Index outer_size;
+    Index inner_buffer_size;
+  };
+
+  EIGEN_DEVICE_FUNC size_t size(const SparseMat& value) const {
+    // innerNonZeros.
+    std::size_t num_storage_indices = value.isCompressed() ? 0 : value.outerSize();
+    // Outer indices.
+    num_storage_indices += value.outerSize() + 1;
+    // Inner indices.
+    const StorageIndex inner_buffer_size = value.outerIndexPtr()[value.outerSize()];
+    num_storage_indices += inner_buffer_size;
+    // Values.
+    std::size_t num_values = inner_buffer_size;
+    return sizeof(Header) + sizeof(Scalar) * num_values +
+           sizeof(StorageIndex) * num_storage_indices;
+  }
+
+  EIGEN_DEVICE_FUNC uint8_t* serialize(uint8_t* dest, uint8_t* end,
+                                       const SparseMat& value) {
+    if (EIGEN_PREDICT_FALSE(dest == nullptr)) return nullptr;
+    if (EIGEN_PREDICT_FALSE(dest + size(value) > end)) return nullptr;
+
+    const size_t header_bytes = sizeof(Header);
+    Header header = {value.rows(), value.cols(), value.isCompressed(),
+                     value.outerSize(), value.outerIndexPtr()[value.outerSize()]};
+    EIGEN_USING_STD(memcpy)
+    memcpy(dest, &header, header_bytes);
+    dest += header_bytes;
+
+    // innerNonZeros.
+    if (!header.compressed) {
+      std::size_t data_bytes = sizeof(StorageIndex) * header.outer_size;
+      memcpy(dest, value.innerNonZeroPtr(), data_bytes);
+      dest += data_bytes;
+    }
+
+    // Outer indices.
+    std::size_t data_bytes = sizeof(StorageIndex) * (header.outer_size + 1);
+    memcpy(dest, value.outerIndexPtr(), data_bytes);
+    dest += data_bytes;
+
+    // Inner indices.
+    data_bytes = sizeof(StorageIndex) * header.inner_buffer_size;
+    memcpy(dest, value.innerIndexPtr(), data_bytes);
+    dest += data_bytes;
+
+    // Values.
+    data_bytes = sizeof(Scalar) * header.inner_buffer_size;
+    memcpy(dest, value.valuePtr(), data_bytes);
+    dest += data_bytes;
+
+    return dest;
+  }
+
+  EIGEN_DEVICE_FUNC const uint8_t* deserialize(const uint8_t* src,
+                                               const uint8_t* end,
+                                               SparseMat& value) const {
+    if (EIGEN_PREDICT_FALSE(src == nullptr)) return nullptr;
+    if (EIGEN_PREDICT_FALSE(src + sizeof(Header) > end)) return nullptr;
+
+    const size_t header_bytes = sizeof(Header);
+    Header header;
+    EIGEN_USING_STD(memcpy)
+    memcpy(&header, src, header_bytes);
+    src += header_bytes;
+
+    value.setZero();
+    value.resize(header.rows, header.cols);
+    if (header.compressed) {
+      value.makeCompressed();
+    } else {
+      value.uncompress();
+    }
+    
+    // Adjust value ptr size.
+    value.data().resize(header.inner_buffer_size);
+
+    // Initialize compressed state and inner non-zeros.
+    if (!header.compressed) {           
+      // Inner non-zero counts.
+      std::size_t data_bytes = sizeof(StorageIndex) * header.outer_size;
+      if (EIGEN_PREDICT_FALSE(src + data_bytes > end)) return nullptr;
+      memcpy(value.innerNonZeroPtr(), src, data_bytes);
+      src += data_bytes;
+    }
+
+    // Outer indices.
+    std::size_t data_bytes = sizeof(StorageIndex) * (header.outer_size + 1);
+    if (EIGEN_PREDICT_FALSE(src + data_bytes > end)) return nullptr;
+    memcpy(value.outerIndexPtr(), src, data_bytes);
+    src += data_bytes;
+
+    // Inner indices.
+    data_bytes = sizeof(StorageIndex) * header.inner_buffer_size;
+    if (EIGEN_PREDICT_FALSE(src + data_bytes > end)) return nullptr;
+    memcpy(value.innerIndexPtr(), src, data_bytes);
+    src += data_bytes;
+
+    // Values.
+    data_bytes = sizeof(Scalar) * header.inner_buffer_size;
+    if (EIGEN_PREDICT_FALSE(src + data_bytes > end)) return nullptr;
+    memcpy(value.valuePtr(), src, data_bytes);
+    src += data_bytes;
+    return src;
+  }
+};
+
 } // end namespace Eigen
 
 #endif // EIGEN_SPARSEMATRIX_H
diff --git a/libs/eigen/Eigen/src/SparseCore/SparseMatrixBase.h b/libs/eigen/Eigen/src/SparseCore/SparseMatrixBase.h
index 229449f..dc78c2e 100644
--- a/libs/eigen/Eigen/src/SparseCore/SparseMatrixBase.h
+++ b/libs/eigen/Eigen/src/SparseCore/SparseMatrixBase.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_SPARSEMATRIXBASE_H
 #define EIGEN_SPARSEMATRIXBASE_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 /** \ingroup SparseCore_Module
@@ -69,8 +71,7 @@ template<typename Derived> class SparseMatrixBase
           * \sa MatrixBase::rows(), MatrixBase::cols(), RowsAtCompileTime, SizeAtCompileTime */
 
 
-      SizeAtCompileTime = (internal::size_at_compile_time<internal::traits<Derived>::RowsAtCompileTime,
-                                                   internal::traits<Derived>::ColsAtCompileTime>::ret),
+      SizeAtCompileTime = (internal::size_of_xpr_at_compile_time<Derived>::ret),
         /**< This is equal to the number of coefficients, i.e. the number of
           * rows times the number of columns, or to \a Dynamic if this is not
           * known at compile-time. \sa RowsAtCompileTime, ColsAtCompileTime */
@@ -78,8 +79,7 @@ template<typename Derived> class SparseMatrixBase
       MaxRowsAtCompileTime = RowsAtCompileTime,
       MaxColsAtCompileTime = ColsAtCompileTime,
 
-      MaxSizeAtCompileTime = (internal::size_at_compile_time<MaxRowsAtCompileTime,
-                                                      MaxColsAtCompileTime>::ret),
+      MaxSizeAtCompileTime = internal::size_at_compile_time(MaxRowsAtCompileTime, MaxColsAtCompileTime),
 
       IsVectorAtCompileTime = RowsAtCompileTime == 1 || ColsAtCompileTime == 1,
         /**< This is set to true if either the number of rows or the number of
@@ -103,17 +103,17 @@ template<typename Derived> class SparseMatrixBase
                              : int(IsRowMajor) ? int(ColsAtCompileTime) : int(RowsAtCompileTime),
 
       #ifndef EIGEN_PARSED_BY_DOXYGEN
-      _HasDirectAccess = (int(Flags)&DirectAccessBit) ? 1 : 0 // workaround sunCC
+      HasDirectAccess_ = (int(Flags)&DirectAccessBit) ? 1 : 0 // workaround sunCC
       #endif
     };
 
     /** \internal the return type of MatrixBase::adjoint() */
-    typedef typename internal::conditional<NumTraits<Scalar>::IsComplex,
+    typedef std::conditional_t<NumTraits<Scalar>::IsComplex,
                         CwiseUnaryOp<internal::scalar_conjugate_op<Scalar>, Eigen::Transpose<const Derived> >,
                         Transpose<const Derived>
-                     >::type AdjointReturnType;
+                     > AdjointReturnType;
     typedef Transpose<Derived> TransposeReturnType;
-    typedef typename internal::add_const<Transpose<const Derived> >::type ConstTransposeReturnType;
+    typedef Transpose<const Derived> ConstTransposeReturnType;
 
     // FIXME storage order do not match evaluator storage order
     typedef SparseMatrix<Scalar, Flags&RowMajorBit ? RowMajor : ColMajor, StorageIndex> PlainObject;
@@ -129,7 +129,7 @@ template<typename Derived> class SparseMatrixBase
 
     /** \internal the return type of coeff()
       */
-    typedef typename internal::conditional<_HasDirectAccess, const Scalar&, Scalar>::type CoeffReturnType;
+    typedef std::conditional_t<HasDirectAccess_, const Scalar&, Scalar> CoeffReturnType;
 
     /** \internal Represents a matrix with all coefficients equal to one another*/
     typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,Matrix<Scalar,Dynamic,Dynamic> > ConstantReturnType;
@@ -137,8 +137,8 @@ template<typename Derived> class SparseMatrixBase
     /** type of the equivalent dense matrix */
     typedef Matrix<Scalar,RowsAtCompileTime,ColsAtCompileTime> DenseMatrixType;
     /** type of the equivalent square matrix */
-    typedef Matrix<Scalar,EIGEN_SIZE_MAX(RowsAtCompileTime,ColsAtCompileTime),
-                          EIGEN_SIZE_MAX(RowsAtCompileTime,ColsAtCompileTime)> SquareMatrixType;
+    typedef Matrix<Scalar, internal::max_size_prefer_dynamic(RowsAtCompileTime, ColsAtCompileTime),
+                           internal::max_size_prefer_dynamic(RowsAtCompileTime, ColsAtCompileTime)> SquareMatrixType;
 
     inline const Derived& derived() const { return *static_cast<const Derived*>(this); }
     inline Derived& derived() { return *static_cast<Derived*>(this); }
@@ -214,11 +214,11 @@ template<typename Derived> class SparseMatrixBase
     inline void assignGeneric(const OtherDerived& other);
 
   public:
-
+#ifndef EIGEN_NO_IO
     friend std::ostream & operator << (std::ostream & s, const SparseMatrixBase& m)
     {
       typedef typename Derived::Nested Nested;
-      typedef typename internal::remove_all<Nested>::type NestedCleaned;
+      typedef internal::remove_all_t<Nested> NestedCleaned;
 
       if (Flags&RowMajorBit)
       {
@@ -263,6 +263,7 @@ template<typename Derived> class SparseMatrixBase
       }
       return s;
     }
+#endif
 
     template<typename OtherDerived>
     Derived& operator+=(const SparseMatrixBase<OtherDerived>& other);
diff --git a/libs/eigen/Eigen/src/SparseCore/SparsePermutation.h b/libs/eigen/Eigen/src/SparseCore/SparsePermutation.h
index ef38357..af9a1fe 100644
--- a/libs/eigen/Eigen/src/SparseCore/SparsePermutation.h
+++ b/libs/eigen/Eigen/src/SparseCore/SparsePermutation.h
@@ -12,6 +12,8 @@
 
 // This file implements sparse * permutation products
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
@@ -20,7 +22,7 @@ template<typename ExpressionType, int Side, bool Transposed>
 struct permutation_matrix_product<ExpressionType, Side, Transposed, SparseShape>
 {
     typedef typename nested_eval<ExpressionType, 1>::type MatrixType;
-    typedef typename remove_all<MatrixType>::type MatrixTypeCleaned;
+    typedef remove_all_t<MatrixType> MatrixTypeCleaned;
 
     typedef typename MatrixTypeCleaned::Scalar Scalar;
     typedef typename MatrixTypeCleaned::StorageIndex StorageIndex;
@@ -30,9 +32,9 @@ struct permutation_matrix_product<ExpressionType, Side, Transposed, SparseShape>
       MoveOuter = SrcStorageOrder==RowMajor ? Side==OnTheLeft : Side==OnTheRight
     };
     
-    typedef typename internal::conditional<MoveOuter,
+    typedef std::conditional_t<MoveOuter,
         SparseMatrix<Scalar,SrcStorageOrder,StorageIndex>,
-        SparseMatrix<Scalar,int(SrcStorageOrder)==RowMajor?ColMajor:RowMajor,StorageIndex> >::type ReturnType;
+        SparseMatrix<Scalar,int(SrcStorageOrder)==RowMajor?ColMajor:RowMajor,StorageIndex> > ReturnType;
 
     template<typename Dest,typename PermutationType>
     static inline void run(Dest& dst, const PermutationType& perm, const ExpressionType& xpr)
@@ -107,7 +109,7 @@ struct product_evaluator<Product<Lhs, Rhs, AliasFreeProduct>, ProductTag, Permut
   explicit product_evaluator(const XprType& xpr)
     : m_result(xpr.rows(), xpr.cols())
   {
-    ::new (static_cast<Base*>(this)) Base(m_result);
+    internal::construct_at<Base>(this, m_result);
     generic_product_impl<Lhs, Rhs, PermutationShape, SparseShape, ProductTag>::evalTo(m_result, xpr.lhs(), xpr.rhs());
   }
 
diff --git a/libs/eigen/Eigen/src/SparseCore/SparseProduct.h b/libs/eigen/Eigen/src/SparseCore/SparseProduct.h
index af8a774..85a8a10 100644
--- a/libs/eigen/Eigen/src/SparseCore/SparseProduct.h
+++ b/libs/eigen/Eigen/src/SparseCore/SparseProduct.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_SPARSEPRODUCT_H
 #define EIGEN_SPARSEPRODUCT_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 /** \returns an expression of the product of two sparse matrices.
@@ -45,19 +47,19 @@ struct generic_product_impl<Lhs, Rhs, SparseShape, SparseShape, ProductType>
 
   // dense += sparse * sparse
   template<typename Dest,typename ActualLhs>
-  static void addTo(Dest& dst, const ActualLhs& lhs, const Rhs& rhs, typename enable_if<is_same<typename evaluator_traits<Dest>::Shape,DenseShape>::value,int*>::type* = 0)
+  static void addTo(Dest& dst, const ActualLhs& lhs, const Rhs& rhs, std::enable_if_t<is_same<typename evaluator_traits<Dest>::Shape,DenseShape>::value,int*>* = 0)
   {
     typedef typename nested_eval<ActualLhs,Dynamic>::type LhsNested;
     typedef typename nested_eval<Rhs,Dynamic>::type RhsNested;
     LhsNested lhsNested(lhs);
     RhsNested rhsNested(rhs);
-    internal::sparse_sparse_to_dense_product_selector<typename remove_all<LhsNested>::type,
-                                                      typename remove_all<RhsNested>::type, Dest>::run(lhsNested,rhsNested,dst);
+    internal::sparse_sparse_to_dense_product_selector<remove_all_t<LhsNested>,
+                                                      remove_all_t<RhsNested>, Dest>::run(lhsNested,rhsNested,dst);
   }
 
   // dense -= sparse * sparse
   template<typename Dest>
-  static void subTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, typename enable_if<is_same<typename evaluator_traits<Dest>::Shape,DenseShape>::value,int*>::type* = 0)
+  static void subTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, std::enable_if_t<is_same<typename evaluator_traits<Dest>::Shape,DenseShape>::value,int*>* = 0)
   {
     addTo(dst, -lhs, rhs);
   }
@@ -72,8 +74,8 @@ protected:
     typedef typename nested_eval<Rhs,Dynamic>::type RhsNested;
     LhsNested lhsNested(lhs);
     RhsNested rhsNested(rhs);
-    internal::conservative_sparse_sparse_product_selector<typename remove_all<LhsNested>::type,
-                                                          typename remove_all<RhsNested>::type, Dest>::run(lhsNested,rhsNested,dst);
+    internal::conservative_sparse_sparse_product_selector<remove_all_t<LhsNested>,
+                                                          remove_all_t<RhsNested>, Dest>::run(lhsNested,rhsNested,dst);
   }
 
   // dense = sparse * sparse
@@ -147,14 +149,14 @@ struct unary_evaluator<SparseView<Product<Lhs, Rhs, Options> >, IteratorBased>
     : m_result(xpr.rows(), xpr.cols())
   {
     using std::abs;
-    ::new (static_cast<Base*>(this)) Base(m_result);
+    internal::construct_at<Base>(this, m_result);
     typedef typename nested_eval<Lhs,Dynamic>::type LhsNested;
     typedef typename nested_eval<Rhs,Dynamic>::type RhsNested;
     LhsNested lhsNested(xpr.nestedExpression().lhs());
     RhsNested rhsNested(xpr.nestedExpression().rhs());
 
-    internal::sparse_sparse_product_with_pruning_selector<typename remove_all<LhsNested>::type,
-                                                          typename remove_all<RhsNested>::type, PlainObject>::run(lhsNested,rhsNested,m_result,
+    internal::sparse_sparse_product_with_pruning_selector<remove_all_t<LhsNested>,
+                                                          remove_all_t<RhsNested>, PlainObject>::run(lhsNested,rhsNested,m_result,
                                                                                                                   abs(xpr.reference())*xpr.epsilon());
   }
 
@@ -165,9 +167,9 @@ protected:
 } // end namespace internal
 
 // sparse matrix = sparse-product (can be sparse*sparse, sparse*perm, etc.)
-template<typename Scalar, int _Options, typename _StorageIndex>
+template<typename Scalar, int Options_, typename StorageIndex_>
 template<typename Lhs, typename Rhs>
-SparseMatrix<Scalar,_Options,_StorageIndex>& SparseMatrix<Scalar,_Options,_StorageIndex>::operator=(const Product<Lhs,Rhs,AliasFreeProduct>& src)
+SparseMatrix<Scalar,Options_,StorageIndex_>& SparseMatrix<Scalar,Options_,StorageIndex_>::operator=(const Product<Lhs,Rhs,AliasFreeProduct>& src)
 {
   // std::cout << "in Assignment : " << DstOptions << "\n";
   SparseMatrix dst(src.rows(),src.cols());
diff --git a/libs/eigen/Eigen/src/SparseCore/SparseRedux.h b/libs/eigen/Eigen/src/SparseCore/SparseRedux.h
index 4587749..6b14c58 100644
--- a/libs/eigen/Eigen/src/SparseCore/SparseRedux.h
+++ b/libs/eigen/Eigen/src/SparseCore/SparseRedux.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_SPARSEREDUX_H
 #define EIGEN_SPARSEREDUX_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 template<typename Derived>
@@ -25,9 +27,9 @@ SparseMatrixBase<Derived>::sum() const
   return res;
 }
 
-template<typename _Scalar, int _Options, typename _Index>
-typename internal::traits<SparseMatrix<_Scalar,_Options,_Index> >::Scalar
-SparseMatrix<_Scalar,_Options,_Index>::sum() const
+template<typename Scalar_, int Options_, typename Index_>
+typename internal::traits<SparseMatrix<Scalar_,Options_,Index_> >::Scalar
+SparseMatrix<Scalar_,Options_,Index_>::sum() const
 {
   eigen_assert(rows()>0 && cols()>0 && "you are using a non initialized matrix");
   if(this->isCompressed())
@@ -36,9 +38,9 @@ SparseMatrix<_Scalar,_Options,_Index>::sum() const
     return Base::sum();
 }
 
-template<typename _Scalar, int _Options, typename _Index>
-typename internal::traits<SparseVector<_Scalar,_Options, _Index> >::Scalar
-SparseVector<_Scalar,_Options,_Index>::sum() const
+template<typename Scalar_, int Options_, typename Index_>
+typename internal::traits<SparseVector<Scalar_,Options_, Index_> >::Scalar
+SparseVector<Scalar_,Options_,Index_>::sum() const
 {
   eigen_assert(rows()>0 && cols()>0 && "you are using a non initialized matrix");
   return Matrix<Scalar,1,Dynamic>::Map(m_data.valuePtr(), m_data.size()).sum();
diff --git a/libs/eigen/Eigen/src/SparseCore/SparseRef.h b/libs/eigen/Eigen/src/SparseCore/SparseRef.h
index 748f87d..9e69d93 100644
--- a/libs/eigen/Eigen/src/SparseCore/SparseRef.h
+++ b/libs/eigen/Eigen/src/SparseCore/SparseRef.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_SPARSE_REF_H
 #define EIGEN_SPARSE_REF_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 enum {
@@ -20,13 +22,13 @@ namespace internal {
 
 template<typename Derived> class SparseRefBase;
 
-template<typename MatScalar, int MatOptions, typename MatIndex, int _Options, typename _StrideType>
-struct traits<Ref<SparseMatrix<MatScalar,MatOptions,MatIndex>, _Options, _StrideType> >
+template<typename MatScalar, int MatOptions, typename MatIndex, int Options_, typename StrideType_>
+struct traits<Ref<SparseMatrix<MatScalar,MatOptions,MatIndex>, Options_, StrideType_> >
   : public traits<SparseMatrix<MatScalar,MatOptions,MatIndex> >
 {
   typedef SparseMatrix<MatScalar,MatOptions,MatIndex> PlainObjectType;
   enum {
-    Options = _Options,
+    Options = Options_,
     Flags = traits<PlainObjectType>::Flags | CompressedAccessBit | NestByRefBit
   };
 
@@ -35,27 +37,27 @@ struct traits<Ref<SparseMatrix<MatScalar,MatOptions,MatIndex>, _Options, _Stride
       StorageOrderMatch = PlainObjectType::IsVectorAtCompileTime || Derived::IsVectorAtCompileTime || ((PlainObjectType::Flags&RowMajorBit)==(Derived::Flags&RowMajorBit)),
       MatchAtCompileTime = (Derived::Flags&CompressedAccessBit) && StorageOrderMatch
     };
-    typedef typename internal::conditional<MatchAtCompileTime,internal::true_type,internal::false_type>::type type;
+    typedef std::conditional_t<MatchAtCompileTime,internal::true_type,internal::false_type> type;
   };
   
 };
 
-template<typename MatScalar, int MatOptions, typename MatIndex, int _Options, typename _StrideType>
-struct traits<Ref<const SparseMatrix<MatScalar,MatOptions,MatIndex>, _Options, _StrideType> >
-  : public traits<Ref<SparseMatrix<MatScalar,MatOptions,MatIndex>, _Options, _StrideType> >
+template<typename MatScalar, int MatOptions, typename MatIndex, int Options_, typename StrideType_>
+struct traits<Ref<const SparseMatrix<MatScalar,MatOptions,MatIndex>, Options_, StrideType_> >
+  : public traits<Ref<SparseMatrix<MatScalar,MatOptions,MatIndex>, Options_, StrideType_> >
 {
   enum {
     Flags = (traits<SparseMatrix<MatScalar,MatOptions,MatIndex> >::Flags | CompressedAccessBit | NestByRefBit) & ~LvalueBit
   };
 };
 
-template<typename MatScalar, int MatOptions, typename MatIndex, int _Options, typename _StrideType>
-struct traits<Ref<SparseVector<MatScalar,MatOptions,MatIndex>, _Options, _StrideType> >
+template<typename MatScalar, int MatOptions, typename MatIndex, int Options_, typename StrideType_>
+struct traits<Ref<SparseVector<MatScalar,MatOptions,MatIndex>, Options_, StrideType_> >
   : public traits<SparseVector<MatScalar,MatOptions,MatIndex> >
 {
   typedef SparseVector<MatScalar,MatOptions,MatIndex> PlainObjectType;
   enum {
-    Options = _Options,
+    Options = Options_,
     Flags = traits<PlainObjectType>::Flags | CompressedAccessBit | NestByRefBit
   };
 
@@ -63,14 +65,14 @@ struct traits<Ref<SparseVector<MatScalar,MatOptions,MatIndex>, _Options, _Stride
     enum {
       MatchAtCompileTime = (Derived::Flags&CompressedAccessBit) && Derived::IsVectorAtCompileTime
     };
-    typedef typename internal::conditional<MatchAtCompileTime,internal::true_type,internal::false_type>::type type;
+    typedef std::conditional_t<MatchAtCompileTime,internal::true_type,internal::false_type> type;
   };
 
 };
 
-template<typename MatScalar, int MatOptions, typename MatIndex, int _Options, typename _StrideType>
-struct traits<Ref<const SparseVector<MatScalar,MatOptions,MatIndex>, _Options, _StrideType> >
-  : public traits<Ref<SparseVector<MatScalar,MatOptions,MatIndex>, _Options, _StrideType> >
+template<typename MatScalar, int MatOptions, typename MatIndex, int Options_, typename StrideType_>
+struct traits<Ref<const SparseVector<MatScalar,MatOptions,MatIndex>, Options_, StrideType_> >
+  : public traits<Ref<SparseVector<MatScalar,MatOptions,MatIndex>, Options_, StrideType_> >
 {
   enum {
     Flags = (traits<SparseVector<MatScalar,MatOptions,MatIndex> >::Flags | CompressedAccessBit | NestByRefBit) & ~LvalueBit
@@ -98,9 +100,9 @@ protected:
   void construct(Expression& expr)
   {
     if(expr.outerIndexPtr()==0)
-      ::new (static_cast<Base*>(this)) Base(expr.size(), expr.nonZeros(), expr.innerIndexPtr(), expr.valuePtr());
+      internal::construct_at<Base>(this, expr.size(), expr.nonZeros(), expr.innerIndexPtr(), expr.valuePtr());
     else
-      ::new (static_cast<Base*>(this)) Base(expr.rows(), expr.cols(), expr.nonZeros(), expr.outerIndexPtr(), expr.innerIndexPtr(), expr.valuePtr(), expr.innerNonZeroPtr());
+      internal::construct_at<Base>(this, expr.rows(), expr.cols(), expr.nonZeros(), expr.outerIndexPtr(), expr.innerIndexPtr(), expr.valuePtr(), expr.innerNonZeroPtr());
   }
 };
 
@@ -133,7 +135,7 @@ class Ref<SparseMatrixType, Options>
     template<int OtherOptions>
     inline Ref(const SparseMatrix<MatScalar,OtherOptions,MatIndex>& expr);
     template<int OtherOptions>
-    inline Ref(const MappedSparseMatrix<MatScalar,OtherOptions,MatIndex>& expr);
+    inline Ref(const Map<SparseMatrix<MatScalar,OtherOptions,MatIndex>>& expr);
   public:
 
     typedef internal::SparseRefBase<Ref> Base;
@@ -148,15 +150,15 @@ class Ref<SparseMatrixType, Options>
       eigen_assert( ((Options & int(StandardCompressedFormat))==0) || (expr.isCompressed()) );
       Base::construct(expr.derived());
     }
-    
+
     template<int OtherOptions>
-    inline Ref(MappedSparseMatrix<MatScalar,OtherOptions,MatIndex>& expr)
+    inline Ref(Map<SparseMatrix<MatScalar,OtherOptions,MatIndex> >& expr)
     {
       EIGEN_STATIC_ASSERT(bool(Traits::template match<SparseMatrix<MatScalar,OtherOptions,MatIndex> >::MatchAtCompileTime), STORAGE_LAYOUT_DOES_NOT_MATCH);
       eigen_assert( ((Options & int(StandardCompressedFormat))==0) || (expr.isCompressed()) );
       Base::construct(expr.derived());
     }
-    
+
     template<typename Derived>
     inline Ref(const SparseCompressedBase<Derived>& expr)
     #else
@@ -201,8 +203,7 @@ class Ref<const SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType
 
     ~Ref() {
       if(m_hasCopy) {
-        TPlainObjectType* obj = reinterpret_cast<TPlainObjectType*>(&m_storage);
-        obj->~TPlainObjectType();
+        internal::destroy_at(reinterpret_cast<TPlainObjectType*>(&m_storage));
       }
     }
 
@@ -213,8 +214,7 @@ class Ref<const SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType
     {
       if((Options & int(StandardCompressedFormat)) && (!expr.isCompressed()))
       {
-        TPlainObjectType* obj = reinterpret_cast<TPlainObjectType*>(&m_storage);
-        ::new (obj) TPlainObjectType(expr);
+        TPlainObjectType* obj = internal::construct_at(reinterpret_cast<TPlainObjectType*>(&m_storage), expr);
         m_hasCopy = true;
         Base::construct(*obj);
       }
@@ -227,8 +227,7 @@ class Ref<const SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType
     template<typename Expression>
     void construct(const Expression& expr, internal::false_type)
     {
-      TPlainObjectType* obj = reinterpret_cast<TPlainObjectType*>(&m_storage);
-      ::new (obj) TPlainObjectType(expr);
+      TPlainObjectType* obj = internal::construct_at(reinterpret_cast<TPlainObjectType*>(&m_storage), expr);
       m_hasCopy = true;
       Base::construct(*obj);
     }
@@ -319,8 +318,7 @@ class Ref<const SparseVector<MatScalar,MatOptions,MatIndex>, Options, StrideType
 
     ~Ref() {
       if(m_hasCopy) {
-        TPlainObjectType* obj = reinterpret_cast<TPlainObjectType*>(&m_storage);
-        obj->~TPlainObjectType();
+        internal::destroy_at(reinterpret_cast<TPlainObjectType*>(&m_storage));
       }
     }
 
@@ -335,8 +333,7 @@ class Ref<const SparseVector<MatScalar,MatOptions,MatIndex>, Options, StrideType
     template<typename Expression>
     void construct(const Expression& expr, internal::false_type)
     {
-      TPlainObjectType* obj = reinterpret_cast<TPlainObjectType*>(&m_storage);
-      ::new (obj) TPlainObjectType(expr);
+      TPlainObjectType* obj = internal::construct_at(reinterpret_cast<TPlainObjectType*>(&m_storage), expr);
       m_hasCopy = true;
       Base::construct(*obj);
     }
@@ -355,7 +352,7 @@ struct evaluator<Ref<SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, Strid
   : evaluator<SparseCompressedBase<Ref<SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType> > >
 {
   typedef evaluator<SparseCompressedBase<Ref<SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType> > > Base;
-  typedef Ref<SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType> XprType;  
+  typedef Ref<SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType> XprType;
   evaluator() : Base() {}
   explicit evaluator(const XprType &mat) : Base(mat) {}
 };
@@ -365,7 +362,7 @@ struct evaluator<Ref<const SparseMatrix<MatScalar,MatOptions,MatIndex>, Options,
   : evaluator<SparseCompressedBase<Ref<const SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType> > >
 {
   typedef evaluator<SparseCompressedBase<Ref<const SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType> > > Base;
-  typedef Ref<const SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType> XprType;  
+  typedef Ref<const SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType> XprType;
   evaluator() : Base() {}
   explicit evaluator(const XprType &mat) : Base(mat) {}
 };
diff --git a/libs/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h b/libs/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h
index 85b00e1..211506e 100644
--- a/libs/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h
+++ b/libs/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_SPARSE_SELFADJOINTVIEW_H
 #define EIGEN_SPARSE_SELFADJOINTVIEW_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
   
 /** \ingroup SparseCore_Module
@@ -40,13 +42,13 @@ void permute_symm_to_fullsymm(const MatrixType& mat, SparseMatrix<typename Matri
 
 }
 
-template<typename MatrixType, unsigned int _Mode> class SparseSelfAdjointView
-  : public EigenBase<SparseSelfAdjointView<MatrixType,_Mode> >
+template<typename MatrixType, unsigned int Mode_> class SparseSelfAdjointView
+  : public EigenBase<SparseSelfAdjointView<MatrixType,Mode_> >
 {
   public:
     
     enum {
-      Mode = _Mode,
+      Mode = Mode_,
       TransposeMode = ((Mode & Upper) ? Lower : 0) | ((Mode & Lower) ? Upper : 0),
       RowsAtCompileTime = internal::traits<SparseSelfAdjointView>::RowsAtCompileTime,
       ColsAtCompileTime = internal::traits<SparseSelfAdjointView>::ColsAtCompileTime
@@ -57,7 +59,7 @@ template<typename MatrixType, unsigned int _Mode> class SparseSelfAdjointView
     typedef typename MatrixType::StorageIndex StorageIndex;
     typedef Matrix<StorageIndex,Dynamic,1> VectorI;
     typedef typename internal::ref_selector<MatrixType>::non_const_type MatrixTypeNested;
-    typedef typename internal::remove_all<MatrixTypeNested>::type _MatrixTypeNested;
+    typedef internal::remove_all_t<MatrixTypeNested> MatrixTypeNested_;
     
     explicit inline SparseSelfAdjointView(MatrixType& matrix) : m_matrix(matrix)
     {
@@ -68,8 +70,8 @@ template<typename MatrixType, unsigned int _Mode> class SparseSelfAdjointView
     inline Index cols() const { return m_matrix.cols(); }
 
     /** \internal \returns a reference to the nested matrix */
-    const _MatrixTypeNested& matrix() const { return m_matrix; }
-    typename internal::remove_reference<MatrixTypeNested>::type& matrix() { return m_matrix; }
+    const MatrixTypeNested_& matrix() const { return m_matrix; }
+    std::remove_reference_t<MatrixTypeNested>& matrix() { return m_matrix; }
 
     /** \returns an expression of the matrix product between a sparse self-adjoint matrix \c *this and a sparse matrix \a rhs.
       *
@@ -124,9 +126,9 @@ template<typename MatrixType, unsigned int _Mode> class SparseSelfAdjointView
     
     /** \returns an expression of P H P^-1 */
     // TODO implement twists in a more evaluator friendly fashion
-    SparseSymmetricPermutationProduct<_MatrixTypeNested,Mode> twistedBy(const PermutationMatrix<Dynamic,Dynamic,StorageIndex>& perm) const
+    SparseSymmetricPermutationProduct<MatrixTypeNested_,Mode> twistedBy(const PermutationMatrix<Dynamic,Dynamic,StorageIndex>& perm) const
     {
-      return SparseSymmetricPermutationProduct<_MatrixTypeNested,Mode>(m_matrix, perm);
+      return SparseSymmetricPermutationProduct<MatrixTypeNested_,Mode>(m_matrix, perm);
     }
 
     template<typename SrcMatrixType,int SrcMode>
@@ -260,15 +262,6 @@ struct Assignment<DstXprType, SrcXprType, Functor, SparseSelfAdjoint2Sparse>
     run(tmp, src, AssignOpType());
     dst -= tmp;
   }
-  
-  template<typename DestScalar>
-  static void run(DynamicSparseMatrix<DestScalar,ColMajor,StorageIndex>& dst, const SrcXprType &src, const AssignOpType&/*func*/)
-  {
-    // TODO directly evaluate into dst;
-    SparseMatrix<DestScalar,ColMajor,StorageIndex> tmp(dst.rows(),dst.cols());
-    internal::permute_symm_to_fullsymm<SrcXprType::Mode>(src.matrix(), tmp);
-    dst = tmp;
-  }
 };
 
 } // end namespace internal
@@ -285,7 +278,7 @@ inline void sparse_selfadjoint_time_dense_product(const SparseLhsType& lhs, cons
   EIGEN_ONLY_USED_FOR_DEBUG(alpha);
   
   typedef typename internal::nested_eval<SparseLhsType,DenseRhsType::MaxColsAtCompileTime>::type SparseLhsTypeNested;
-  typedef typename internal::remove_all<SparseLhsTypeNested>::type SparseLhsTypeNestedCleaned;
+  typedef internal::remove_all_t<SparseLhsTypeNested> SparseLhsTypeNestedCleaned;
   typedef evaluator<SparseLhsTypeNestedCleaned> LhsEval;
   typedef typename LhsEval::InnerIterator LhsIterator;
   typedef typename SparseLhsType::Scalar LhsScalar;
@@ -347,7 +340,7 @@ struct generic_product_impl<LhsView, Rhs, SparseSelfAdjointShape, DenseShape, Pr
   template<typename Dest>
   static void scaleAndAddTo(Dest& dst, const LhsView& lhsView, const Rhs& rhs, const typename Dest::Scalar& alpha)
   {
-    typedef typename LhsView::_MatrixTypeNested Lhs;
+    typedef typename LhsView::MatrixTypeNested_ Lhs;
     typedef typename nested_eval<Lhs,Dynamic>::type LhsNested;
     typedef typename nested_eval<Rhs,Dynamic>::type RhsNested;
     LhsNested lhsNested(lhsView.matrix());
@@ -364,7 +357,7 @@ struct generic_product_impl<Lhs, RhsView, DenseShape, SparseSelfAdjointShape, Pr
   template<typename Dest>
   static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const RhsView& rhsView, const typename Dest::Scalar& alpha)
   {
-    typedef typename RhsView::_MatrixTypeNested Rhs;
+    typedef typename RhsView::MatrixTypeNested_ Rhs;
     typedef typename nested_eval<Lhs,Dynamic>::type LhsNested;
     typedef typename nested_eval<Rhs,Dynamic>::type RhsNested;
     LhsNested lhsNested(lhs);
@@ -390,7 +383,7 @@ struct product_evaluator<Product<LhsView, Rhs, DefaultProduct>, ProductTag, Spar
   product_evaluator(const XprType& xpr)
     : m_lhs(xpr.lhs()), m_result(xpr.rows(), xpr.cols())
   {
-    ::new (static_cast<Base*>(this)) Base(m_result);
+    internal::construct_at<Base>(this, m_result);
     generic_product_impl<typename Rhs::PlainObject, Rhs, SparseShape, SparseShape, ProductTag>::evalTo(m_result, m_lhs, xpr.rhs());
   }
   
@@ -516,7 +509,7 @@ void permute_symm_to_fullsymm(const MatrixType& mat, SparseMatrix<typename Matri
   }
 }
 
-template<int _SrcMode,int _DstMode,typename MatrixType,int DstOrder>
+template<int SrcMode_,int DstMode_,typename MatrixType,int DstOrder>
 void permute_symm_to_symm(const MatrixType& mat, SparseMatrix<typename MatrixType::Scalar,DstOrder,typename MatrixType::StorageIndex>& _dest, const typename MatrixType::StorageIndex* perm)
 {
   typedef typename MatrixType::StorageIndex StorageIndex;
@@ -529,8 +522,8 @@ void permute_symm_to_symm(const MatrixType& mat, SparseMatrix<typename MatrixTyp
   enum {
     SrcOrder = MatrixType::IsRowMajor ? RowMajor : ColMajor,
     StorageOrderMatch = int(SrcOrder) == int(DstOrder),
-    DstMode = DstOrder==RowMajor ? (_DstMode==Upper ? Lower : Upper) : _DstMode,
-    SrcMode = SrcOrder==RowMajor ? (_SrcMode==Upper ? Lower : Upper) : _SrcMode
+    DstMode = DstOrder==RowMajor ? (DstMode_==Upper ? Lower : Upper) : DstMode_,
+    SrcMode = SrcOrder==RowMajor ? (SrcMode_==Upper ? Lower : Upper) : SrcMode_
   };
 
   MatEval matEval(mat);
@@ -611,7 +604,7 @@ class SparseSymmetricPermutationProduct
   public:
     typedef Matrix<StorageIndex,Dynamic,1> VectorI;
     typedef typename MatrixType::Nested MatrixTypeNested;
-    typedef typename internal::remove_all<MatrixTypeNested>::type NestedExpression;
+    typedef internal::remove_all_t<MatrixTypeNested> NestedExpression;
     
     SparseSymmetricPermutationProduct(const MatrixType& mat, const Perm& perm)
       : m_matrix(mat), m_perm(perm)
diff --git a/libs/eigen/Eigen/src/SparseCore/SparseSolverBase.h b/libs/eigen/Eigen/src/SparseCore/SparseSolverBase.h
index b4c9a42..8261fb5 100644
--- a/libs/eigen/Eigen/src/SparseCore/SparseSolverBase.h
+++ b/libs/eigen/Eigen/src/SparseCore/SparseSolverBase.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_SPARSESOLVERBASE_H
 #define EIGEN_SPARSESOLVERBASE_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
@@ -19,7 +21,7 @@ namespace internal {
   * The rhs is decomposed into small vertical panels which are solved through dense temporaries.
   */
 template<typename Decomposition, typename Rhs, typename Dest>
-typename enable_if<Rhs::ColsAtCompileTime!=1 && Dest::ColsAtCompileTime!=1>::type
+std::enable_if_t<Rhs::ColsAtCompileTime!=1 && Dest::ColsAtCompileTime!=1>
 solve_sparse_through_dense_panels(const Decomposition &dec, const Rhs& rhs, Dest &dest)
 {
   EIGEN_STATIC_ASSERT((Dest::Flags&RowMajorBit)==0,THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES);
@@ -43,7 +45,7 @@ solve_sparse_through_dense_panels(const Decomposition &dec, const Rhs& rhs, Dest
 
 // Overload for vector as rhs
 template<typename Decomposition, typename Rhs, typename Dest>
-typename enable_if<Rhs::ColsAtCompileTime==1 || Dest::ColsAtCompileTime==1>::type
+std::enable_if_t<Rhs::ColsAtCompileTime==1 || Dest::ColsAtCompileTime==1>
 solve_sparse_through_dense_panels(const Decomposition &dec, const Rhs& rhs, Dest &dest)
 {
   typedef typename Dest::Scalar DestScalar;
@@ -73,6 +75,8 @@ class SparseSolverBase : internal::noncopyable
       : m_isInitialized(false)
     {}
 
+    SparseSolverBase(SparseSolverBase&&other ) : internal::noncopyable{}, m_isInitialized{other.m_isInitialized} {}
+
     ~SparseSolverBase()
     {}
 
diff --git a/libs/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h b/libs/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h
index 88820a4..ee0ec1b 100644
--- a/libs/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h
+++ b/libs/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_SPARSESPARSEPRODUCTWITHPRUNING_H
 #define EIGEN_SPARSESPARSEPRODUCTWITHPRUNING_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
@@ -21,9 +23,9 @@ static void sparse_sparse_product_with_pruning_impl(const Lhs& lhs, const Rhs& r
 {
   // return sparse_sparse_product_with_pruning_impl2(lhs,rhs,res);
 
-  typedef typename remove_all<Rhs>::type::Scalar RhsScalar;
-  typedef typename remove_all<ResultType>::type::Scalar ResScalar;
-  typedef typename remove_all<Lhs>::type::StorageIndex StorageIndex;
+  typedef typename remove_all_t<Rhs>::Scalar RhsScalar;
+  typedef typename remove_all_t<ResultType>::Scalar ResScalar;
+  typedef typename remove_all_t<Lhs>::StorageIndex StorageIndex;
 
   // make sure to call innerSize/outerSize since we fake the storage order.
   Index rows = lhs.innerSize();
@@ -90,7 +92,7 @@ struct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,ColMajor,C
 
   static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance)
   {
-    typename remove_all<ResultType>::type _res(res.rows(), res.cols());
+    remove_all_t<ResultType> _res(res.rows(), res.cols());
     internal::sparse_sparse_product_with_pruning_impl<Lhs,Rhs,ResultType>(lhs, rhs, _res, tolerance);
     res.swap(_res);
   }
@@ -117,7 +119,7 @@ struct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,RowMajor,R
   static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance)
   {
     // let's transpose the product to get a column x column product
-    typename remove_all<ResultType>::type _res(res.rows(), res.cols());
+    remove_all_t<ResultType> _res(res.rows(), res.cols());
     internal::sparse_sparse_product_with_pruning_impl<Rhs,Lhs,ResultType>(rhs, lhs, _res, tolerance);
     res.swap(_res);
   }
diff --git a/libs/eigen/Eigen/src/SparseCore/SparseTranspose.h b/libs/eigen/Eigen/src/SparseCore/SparseTranspose.h
index 3757d4c..cce5903 100644
--- a/libs/eigen/Eigen/src/SparseCore/SparseTranspose.h
+++ b/libs/eigen/Eigen/src/SparseCore/SparseTranspose.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_SPARSETRANSPOSE_H
 #define EIGEN_SPARSETRANSPOSE_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
diff --git a/libs/eigen/Eigen/src/SparseCore/SparseTriangularView.h b/libs/eigen/Eigen/src/SparseCore/SparseTriangularView.h
index 9ac1202..5e7cea7 100644
--- a/libs/eigen/Eigen/src/SparseCore/SparseTriangularView.h
+++ b/libs/eigen/Eigen/src/SparseCore/SparseTriangularView.h
@@ -11,6 +11,8 @@
 #ifndef EIGEN_SPARSE_TRIANGULARVIEW_H
 #define EIGEN_SPARSE_TRIANGULARVIEW_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /** \ingroup SparseCore_Module
@@ -44,8 +46,8 @@ template<typename MatrixType, unsigned int Mode> class TriangularViewImpl<Matrix
     EIGEN_SPARSE_PUBLIC_INTERFACE(TriangularViewType)
     
     typedef typename MatrixType::Nested MatrixTypeNested;
-    typedef typename internal::remove_reference<MatrixTypeNested>::type MatrixTypeNestedNonRef;
-    typedef typename internal::remove_all<MatrixTypeNested>::type MatrixTypeNestedCleaned;
+    typedef std::remove_reference_t<MatrixTypeNested> MatrixTypeNestedNonRef;
+    typedef internal::remove_all_t<MatrixTypeNested> MatrixTypeNestedCleaned;
 
     template<typename RhsType, typename DstType>
     EIGEN_DEVICE_FUNC
diff --git a/libs/eigen/Eigen/src/SparseCore/SparseUtil.h b/libs/eigen/Eigen/src/SparseCore/SparseUtil.h
index ceb9368..47f5ef6 100644
--- a/libs/eigen/Eigen/src/SparseCore/SparseUtil.h
+++ b/libs/eigen/Eigen/src/SparseCore/SparseUtil.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_SPARSEUTIL_H
 #define EIGEN_SPARSEUTIL_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 #ifdef NDEBUG
@@ -49,10 +51,8 @@ const int InnerRandomAccessPattern  = 0x2 | CoherentAccessPattern;
 const int OuterRandomAccessPattern  = 0x4 | CoherentAccessPattern;
 const int RandomAccessPattern       = 0x8 | OuterRandomAccessPattern | InnerRandomAccessPattern;
 
-template<typename _Scalar, int _Flags = 0, typename _StorageIndex = int>  class SparseMatrix;
-template<typename _Scalar, int _Flags = 0, typename _StorageIndex = int>  class DynamicSparseMatrix;
-template<typename _Scalar, int _Flags = 0, typename _StorageIndex = int>  class SparseVector;
-template<typename _Scalar, int _Flags = 0, typename _StorageIndex = int>  class MappedSparseMatrix;
+template<typename Scalar_, int Flags_ = 0, typename StorageIndex_ = int>  class SparseMatrix;
+template<typename Scalar_, int Flags_ = 0, typename StorageIndex_ = int>  class SparseVector;
 
 template<typename MatrixType, unsigned int UpLo>  class SparseSelfAdjointView;
 template<typename Lhs, typename Rhs>              class SparseDiagonalProduct;
@@ -65,10 +65,10 @@ template<typename Lhs, typename Rhs, bool Transpose> class SparseDenseOuterProdu
 
 template<typename Lhs, typename Rhs> struct SparseSparseProductReturnType;
 template<typename Lhs, typename Rhs,
-         int InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(internal::traits<Lhs>::ColsAtCompileTime,internal::traits<Rhs>::RowsAtCompileTime)> struct DenseSparseProductReturnType;
+         int InnerSize = internal::min_size_prefer_fixed(internal::traits<Lhs>::ColsAtCompileTime, internal::traits<Rhs>::RowsAtCompileTime)> struct DenseSparseProductReturnType;
          
 template<typename Lhs, typename Rhs,
-         int InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(internal::traits<Lhs>::ColsAtCompileTime,internal::traits<Rhs>::RowsAtCompileTime)> struct SparseDenseProductReturnType;
+         int InnerSize = internal::min_size_prefer_fixed(internal::traits<Lhs>::ColsAtCompileTime, internal::traits<Rhs>::RowsAtCompileTime)> struct SparseDenseProductReturnType;
 template<typename MatrixType,int UpLo> class SparseSymmetricPermutationProduct;
 
 namespace internal {
@@ -80,41 +80,41 @@ template<typename T> struct eval<T,Sparse>
 {};
 
 template<typename T,int Cols,int Flags> struct sparse_eval<T,1,Cols,Flags> {
-    typedef typename traits<T>::Scalar _Scalar;
-    typedef typename traits<T>::StorageIndex _StorageIndex;
+    typedef typename traits<T>::Scalar Scalar_;
+    typedef typename traits<T>::StorageIndex StorageIndex_;
   public:
-    typedef SparseVector<_Scalar, RowMajor, _StorageIndex> type;
+    typedef SparseVector<Scalar_, RowMajor, StorageIndex_> type;
 };
 
 template<typename T,int Rows,int Flags> struct sparse_eval<T,Rows,1,Flags> {
-    typedef typename traits<T>::Scalar _Scalar;
-    typedef typename traits<T>::StorageIndex _StorageIndex;
+    typedef typename traits<T>::Scalar Scalar_;
+    typedef typename traits<T>::StorageIndex StorageIndex_;
   public:
-    typedef SparseVector<_Scalar, ColMajor, _StorageIndex> type;
+    typedef SparseVector<Scalar_, ColMajor, StorageIndex_> type;
 };
 
 // TODO this seems almost identical to plain_matrix_type<T, Sparse>
 template<typename T,int Rows,int Cols,int Flags> struct sparse_eval {
-    typedef typename traits<T>::Scalar _Scalar;
-    typedef typename traits<T>::StorageIndex _StorageIndex;
-    enum { _Options = ((Flags&RowMajorBit)==RowMajorBit) ? RowMajor : ColMajor };
+    typedef typename traits<T>::Scalar Scalar_;
+    typedef typename traits<T>::StorageIndex StorageIndex_;
+    enum { Options_ = ((Flags&RowMajorBit)==RowMajorBit) ? RowMajor : ColMajor };
   public:
-    typedef SparseMatrix<_Scalar, _Options, _StorageIndex> type;
+    typedef SparseMatrix<Scalar_, Options_, StorageIndex_> type;
 };
 
 template<typename T,int Flags> struct sparse_eval<T,1,1,Flags> {
-    typedef typename traits<T>::Scalar _Scalar;
+    typedef typename traits<T>::Scalar Scalar_;
   public:
-    typedef Matrix<_Scalar, 1, 1> type;
+    typedef Matrix<Scalar_, 1, 1> type;
 };
 
 template<typename T> struct plain_matrix_type<T,Sparse>
 {
-  typedef typename traits<T>::Scalar _Scalar;
-  typedef typename traits<T>::StorageIndex _StorageIndex;
-  enum { _Options = ((evaluator<T>::Flags&RowMajorBit)==RowMajorBit) ? RowMajor : ColMajor };
+  typedef typename traits<T>::Scalar Scalar_;
+  typedef typename traits<T>::StorageIndex StorageIndex_;
+  enum { Options_ = ((evaluator<T>::Flags&RowMajorBit)==RowMajorBit) ? RowMajor : ColMajor };
   public:
-    typedef SparseMatrix<_Scalar, _Options, _StorageIndex> type;
+    typedef SparseMatrix<Scalar_, Options_, StorageIndex_> type;
 };
 
 template<typename T>
diff --git a/libs/eigen/Eigen/src/SparseCore/SparseVector.h b/libs/eigen/Eigen/src/SparseCore/SparseVector.h
index 05779be..3b4d7b0 100644
--- a/libs/eigen/Eigen/src/SparseCore/SparseVector.h
+++ b/libs/eigen/Eigen/src/SparseCore/SparseVector.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_SPARSEVECTOR_H
 #define EIGEN_SPARSEVECTOR_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 /** \ingroup SparseCore_Module
@@ -17,7 +19,7 @@ namespace Eigen {
   *
   * \brief a sparse vector class
   *
-  * \tparam _Scalar the scalar type, i.e. the type of the coefficients
+  * \tparam Scalar_ the scalar type, i.e. the type of the coefficients
   *
   * See http://www.netlib.org/linalg/html_templates/node91.html for details on the storage scheme.
   *
@@ -26,21 +28,21 @@ namespace Eigen {
   */
 
 namespace internal {
-template<typename _Scalar, int _Options, typename _StorageIndex>
-struct traits<SparseVector<_Scalar, _Options, _StorageIndex> >
+template<typename Scalar_, int Options_, typename StorageIndex_>
+struct traits<SparseVector<Scalar_, Options_, StorageIndex_> >
 {
-  typedef _Scalar Scalar;
-  typedef _StorageIndex StorageIndex;
+  typedef Scalar_ Scalar;
+  typedef StorageIndex_ StorageIndex;
   typedef Sparse StorageKind;
   typedef MatrixXpr XprKind;
   enum {
-    IsColVector = (_Options & RowMajorBit) ? 0 : 1,
+    IsColVector = (Options_ & RowMajorBit) ? 0 : 1,
 
     RowsAtCompileTime = IsColVector ? Dynamic : 1,
     ColsAtCompileTime = IsColVector ? 1 : Dynamic,
     MaxRowsAtCompileTime = RowsAtCompileTime,
     MaxColsAtCompileTime = ColsAtCompileTime,
-    Flags = _Options | NestByRefBit | LvalueBit | (IsColVector ? 0 : RowMajorBit) | CompressedAccessBit,
+    Flags = Options_ | NestByRefBit | LvalueBit | (IsColVector ? 0 : RowMajorBit) | CompressedAccessBit,
     SupportedAccessPatterns = InnerRandomAccessPattern
   };
 };
@@ -60,9 +62,9 @@ struct sparse_vector_assign_selector;
 
 }
 
-template<typename _Scalar, int _Options, typename _StorageIndex>
+template<typename Scalar_, int Options_, typename StorageIndex_>
 class SparseVector
-  : public SparseCompressedBase<SparseVector<_Scalar, _Options, _StorageIndex> >
+  : public SparseCompressedBase<SparseVector<Scalar_, Options_, StorageIndex_> >
 {
     typedef SparseCompressedBase<SparseVector> Base;
     using Base::convert_index;
@@ -75,7 +77,7 @@ class SparseVector
     enum { IsColVector = internal::traits<SparseVector>::IsColVector };
     
     enum {
-      Options = _Options
+      Options = Options_
     };
     
     EIGEN_STRONG_INLINE Index rows() const { return IsColVector ? m_size : 1; }
@@ -207,9 +209,33 @@ class SparseVector
     inline void finalize() {}
 
     /** \copydoc SparseMatrix::prune(const Scalar&,const RealScalar&) */
-    void prune(const Scalar& reference, const RealScalar& epsilon = NumTraits<RealScalar>::dummy_precision())
+    Index prune(const Scalar& reference, const RealScalar& epsilon = NumTraits<RealScalar>::dummy_precision()) {
+      return prune([&](const Scalar& val){ return !internal::isMuchSmallerThan(val, reference, epsilon); });
+    }
+
+    /**
+     * \brief Prunes the entries of the vector based on a `predicate`
+     * \tparam F Type of the predicate.
+     * \param keep_predicate The predicate that is used to test whether a value should be kept. A callable that
+     * gets passed om a `Scalar` value and returns a boolean. If the predicate returns true, the value is kept.
+     * \return The new number of structural non-zeros.
+     */
+    template<class F>
+    Index prune(F&& keep_predicate)
     {
-      m_data.prune(reference,epsilon);
+      Index k = 0;
+      Index n = m_data.size();
+      for (Index i = 0; i < n; ++i)
+      {
+        if (keep_predicate(m_data.value(i)))
+        {
+          m_data.value(k) = std::move(m_data.value(i));
+          m_data.index(k) = m_data.index(i);
+          ++k;
+        }
+      }
+      m_data.resize(k);
+      return k;
     }
 
     /** Resizes the sparse vector to \a rows x \a cols
@@ -256,11 +282,11 @@ class SparseVector
 
     void resizeNonZeros(Index size) { m_data.resize(size); }
 
-    inline SparseVector() : m_size(0) { check_template_parameters(); resize(0); }
+    inline SparseVector() : m_size(0) { resize(0); }
 
-    explicit inline SparseVector(Index size) : m_size(0) { check_template_parameters(); resize(size); }
+    explicit inline SparseVector(Index size) : m_size(0) { resize(size); }
 
-    inline SparseVector(Index rows, Index cols) : m_size(0) { check_template_parameters(); resize(rows,cols); }
+    inline SparseVector(Index rows, Index cols) : m_size(0) { resize(rows,cols); }
 
     template<typename OtherDerived>
     inline SparseVector(const SparseMatrixBase<OtherDerived>& other)
@@ -269,14 +295,12 @@ class SparseVector
       #ifdef EIGEN_SPARSE_CREATE_TEMPORARY_PLUGIN
         EIGEN_SPARSE_CREATE_TEMPORARY_PLUGIN
       #endif
-      check_template_parameters();
       *this = other.derived();
     }
 
     inline SparseVector(const SparseVector& other)
       : Base(other), m_size(0)
     {
-      check_template_parameters();
       *this = other.derived();
     }
 
@@ -329,6 +353,7 @@ class SparseVector
     }
     #endif
 
+#ifndef EIGEN_NO_IO
     friend std::ostream & operator << (std::ostream & s, const SparseVector& m)
     {
       for (Index i=0; i<m.nonZeros(); ++i)
@@ -336,6 +361,7 @@ class SparseVector
       s << std::endl;
       return s;
     }
+#endif
 
     /** Destructor */
     inline ~SparseVector() {}
@@ -393,30 +419,26 @@ class SparseVector
 #   endif
 
 protected:
-  
-    static void check_template_parameters()
-    {
-      EIGEN_STATIC_ASSERT(NumTraits<StorageIndex>::IsSigned,THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE);
-      EIGEN_STATIC_ASSERT((_Options&(ColMajor|RowMajor))==Options,INVALID_MATRIX_TEMPLATE_PARAMETERS);
-    }
-    
+    EIGEN_STATIC_ASSERT(NumTraits<StorageIndex>::IsSigned,THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE)
+    EIGEN_STATIC_ASSERT((Options_&(ColMajor|RowMajor))==Options,INVALID_MATRIX_TEMPLATE_PARAMETERS)
+
     Storage m_data;
     Index m_size;
 };
 
 namespace internal {
 
-template<typename _Scalar, int _Options, typename _Index>
-struct evaluator<SparseVector<_Scalar,_Options,_Index> >
-  : evaluator_base<SparseVector<_Scalar,_Options,_Index> >
+template<typename Scalar_, int Options_, typename Index_>
+struct evaluator<SparseVector<Scalar_,Options_,Index_> >
+  : evaluator_base<SparseVector<Scalar_,Options_,Index_> >
 {
-  typedef SparseVector<_Scalar,_Options,_Index> SparseVectorType;
+  typedef SparseVector<Scalar_,Options_,Index_> SparseVectorType;
   typedef evaluator_base<SparseVectorType> Base;
   typedef typename SparseVectorType::InnerIterator InnerIterator;
   typedef typename SparseVectorType::ReverseInnerIterator ReverseInnerIterator;
   
   enum {
-    CoeffReadCost = NumTraits<_Scalar>::ReadCost,
+    CoeffReadCost = NumTraits<Scalar_>::ReadCost,
     Flags = SparseVectorType::Flags
   };
 
@@ -473,6 +495,78 @@ struct sparse_vector_assign_selector<Dest,Src,SVA_RuntimeSwitch> {
 
 }
 
+// Specialization for SparseVector.
+// Serializes [size, numNonZeros, innerIndices, values].
+template <typename Scalar, int Options, typename StorageIndex>
+class Serializer<SparseVector<Scalar, Options, StorageIndex>, void> {
+ public:
+  typedef SparseVector<Scalar, Options, StorageIndex> SparseMat;
+
+  struct Header {
+    typename SparseMat::Index size;
+    Index num_non_zeros;
+  };
+
+  EIGEN_DEVICE_FUNC size_t size(const SparseMat& value) const {
+    return sizeof(Header) +
+           (sizeof(Scalar) + sizeof(StorageIndex)) * value.nonZeros();
+  }
+
+  EIGEN_DEVICE_FUNC uint8_t* serialize(uint8_t* dest, uint8_t* end,
+                                       const SparseMat& value) {
+    if (EIGEN_PREDICT_FALSE(dest == nullptr)) return nullptr;
+    if (EIGEN_PREDICT_FALSE(dest + size(value) > end)) return nullptr;
+
+    const size_t header_bytes = sizeof(Header);
+    Header header = {value.innerSize(), value.nonZeros()};
+    EIGEN_USING_STD(memcpy)
+    memcpy(dest, &header, header_bytes);
+    dest += header_bytes;
+
+    // Inner indices.
+    std::size_t data_bytes = sizeof(StorageIndex) * header.num_non_zeros;
+    memcpy(dest, value.innerIndexPtr(), data_bytes);
+    dest += data_bytes;
+
+    // Values.
+    data_bytes = sizeof(Scalar) * header.num_non_zeros;
+    memcpy(dest, value.valuePtr(), data_bytes);
+    dest += data_bytes;
+
+    return dest;
+  }
+
+  EIGEN_DEVICE_FUNC const uint8_t* deserialize(const uint8_t* src,
+                                               const uint8_t* end,
+                                               SparseMat& value) const {
+    if (EIGEN_PREDICT_FALSE(src == nullptr)) return nullptr;
+    if (EIGEN_PREDICT_FALSE(src + sizeof(Header) > end)) return nullptr;
+
+    const size_t header_bytes = sizeof(Header);
+    Header header;
+    EIGEN_USING_STD(memcpy)
+    memcpy(&header, src, header_bytes);
+    src += header_bytes;
+
+    value.setZero();
+    value.resize(header.size);
+    value.resizeNonZeros(header.num_non_zeros);
+
+    // Inner indices.
+    std::size_t data_bytes = sizeof(StorageIndex) * header.num_non_zeros;
+    if (EIGEN_PREDICT_FALSE(src + data_bytes > end)) return nullptr;
+    memcpy(value.innerIndexPtr(), src, data_bytes);
+    src += data_bytes;
+
+    // Values.
+    data_bytes = sizeof(Scalar) * header.num_non_zeros;
+    if (EIGEN_PREDICT_FALSE(src + data_bytes > end)) return nullptr;
+    memcpy(value.valuePtr(), src, data_bytes);
+    src += data_bytes;
+    return src;
+  }
+};
+
 } // end namespace Eigen
 
 #endif // EIGEN_SPARSEVECTOR_H
diff --git a/libs/eigen/Eigen/src/SparseCore/SparseView.h b/libs/eigen/Eigen/src/SparseCore/SparseView.h
index 92b3d1f..dbb4c43 100644
--- a/libs/eigen/Eigen/src/SparseCore/SparseView.h
+++ b/libs/eigen/Eigen/src/SparseCore/SparseView.h
@@ -11,6 +11,8 @@
 #ifndef EIGEN_SPARSEVIEW_H
 #define EIGEN_SPARSEVIEW_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
@@ -45,11 +47,11 @@ template<typename MatrixType>
 class SparseView : public SparseMatrixBase<SparseView<MatrixType> >
 {
   typedef typename MatrixType::Nested MatrixTypeNested;
-  typedef typename internal::remove_all<MatrixTypeNested>::type _MatrixTypeNested;
+  typedef internal::remove_all_t<MatrixTypeNested> MatrixTypeNested_;
   typedef SparseMatrixBase<SparseView > Base;
 public:
   EIGEN_SPARSE_PUBLIC_INTERFACE(SparseView)
-  typedef typename internal::remove_all<MatrixType>::type NestedExpression;
+  typedef internal::remove_all_t<MatrixType> NestedExpression;
 
   explicit SparseView(const MatrixType& mat, const Scalar& reference = Scalar(0),
                       const RealScalar &epsilon = NumTraits<Scalar>::dummy_precision())
@@ -62,7 +64,7 @@ public:
   inline Index outerSize() const { return m_matrix.outerSize(); }
   
   /** \returns the nested expression */
-  const typename internal::remove_all<MatrixTypeNested>::type&
+  const internal::remove_all_t<MatrixTypeNested>&
   nestedExpression() const { return m_matrix; }
   
   Scalar reference() const { return m_reference; }
diff --git a/libs/eigen/Eigen/src/SparseCore/TriangularSolver.h b/libs/eigen/Eigen/src/SparseCore/TriangularSolver.h
index f9c56ba..a9fbeeb 100644
--- a/libs/eigen/Eigen/src/SparseCore/TriangularSolver.h
+++ b/libs/eigen/Eigen/src/SparseCore/TriangularSolver.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_SPARSETRIANGULARSOLVER_H
 #define EIGEN_SPARSETRIANGULARSOLVER_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
@@ -114,7 +116,7 @@ struct sparse_solve_triangular_selector<Lhs,Rhs,Mode,Lower,ColMajor>
       for(Index i=0; i<lhs.cols(); ++i)
       {
         Scalar& tmp = other.coeffRef(i,col);
-        if (tmp!=Scalar(0)) // optimization when other is actually sparse
+        if (!numext::is_exactly_zero(tmp)) // optimization when other is actually sparse
         {
           LhsIterator it(lhsEval, i);
           while(it && it.index()<i)
@@ -149,7 +151,7 @@ struct sparse_solve_triangular_selector<Lhs,Rhs,Mode,Upper,ColMajor>
       for(Index i=lhs.cols()-1; i>=0; --i)
       {
         Scalar& tmp = other.coeffRef(i,col);
-        if (tmp!=Scalar(0)) // optimization when other is actually sparse
+        if (!numext::is_exactly_zero(tmp)) // optimization when other is actually sparse
         {
           if(!(Mode & UnitDiag))
           {
@@ -182,11 +184,11 @@ void TriangularViewImpl<ExpressionType,Mode,Sparse>::solveInPlace(MatrixBase<Oth
 
   enum { copy = internal::traits<OtherDerived>::Flags & RowMajorBit };
 
-  typedef typename internal::conditional<copy,
-    typename internal::plain_matrix_type_column_major<OtherDerived>::type, OtherDerived&>::type OtherCopy;
+  typedef std::conditional_t<copy,
+    typename internal::plain_matrix_type_column_major<OtherDerived>::type, OtherDerived&> OtherCopy;
   OtherCopy otherCopy(other.derived());
 
-  internal::sparse_solve_triangular_selector<ExpressionType, typename internal::remove_reference<OtherCopy>::type, Mode>::run(derived().nestedExpression(), otherCopy);
+  internal::sparse_solve_triangular_selector<ExpressionType, std::remove_reference_t<OtherCopy>, Mode>::run(derived().nestedExpression(), otherCopy);
 
   if (copy)
     other = otherCopy;
@@ -239,7 +241,7 @@ struct sparse_solve_triangular_sparse_selector<Lhs,Rhs,Mode,UpLo,ColMajor>
       {
         tempVector.restart();
         Scalar& ci = tempVector.coeffRef(i);
-        if (ci!=Scalar(0))
+        if (!numext::is_exactly_zero(ci))
         {
           // find
           typename Lhs::InnerIterator it(lhs, i);
@@ -270,11 +272,11 @@ struct sparse_solve_triangular_sparse_selector<Lhs,Rhs,Mode,UpLo,ColMajor>
       }
 
 
-      Index count = 0;
+//       Index count = 0;
       // FIXME compute a reference value to filter zeros
       for (typename AmbiVector<Scalar,StorageIndex>::Iterator it(tempVector/*,1e-12*/); it; ++it)
       {
-        ++ count;
+//         ++ count;
 //         std::cerr << "fill " << it.index() << ", " << col << "\n";
 //         std::cout << it.value() << "  ";
         // FIXME use insertBack
@@ -299,8 +301,8 @@ void TriangularViewImpl<ExpressionType,Mode,Sparse>::solveInPlace(SparseMatrixBa
 
 //   enum { copy = internal::traits<OtherDerived>::Flags & RowMajorBit };
 
-//   typedef typename internal::conditional<copy,
-//     typename internal::plain_matrix_type_column_major<OtherDerived>::type, OtherDerived&>::type OtherCopy;
+//   typedef std::conditional_t<copy,
+//     typename internal::plain_matrix_type_column_major<OtherDerived>::type, OtherDerived&> OtherCopy;
 //   OtherCopy otherCopy(other.derived());
 
   internal::sparse_solve_triangular_sparse_selector<ExpressionType, OtherDerived, Mode>::run(derived().nestedExpression(), other.derived());
diff --git a/libs/eigen/Eigen/src/SparseLU/InternalHeaderCheck.h b/libs/eigen/Eigen/src/SparseLU/InternalHeaderCheck.h
new file mode 100644
index 0000000..78ebfcc
--- /dev/null
+++ b/libs/eigen/Eigen/src/SparseLU/InternalHeaderCheck.h
@@ -0,0 +1,3 @@
+#ifndef EIGEN_SPARSELU_MODULE_H
+#error "Please include Eigen/SparseLU instead of including headers inside the src directory directly."
+#endif
diff --git a/libs/eigen/Eigen/src/SparseLU/SparseLU.h b/libs/eigen/Eigen/src/SparseLU/SparseLU.h
index 0c8d893..1e69924 100644
--- a/libs/eigen/Eigen/src/SparseLU/SparseLU.h
+++ b/libs/eigen/Eigen/src/SparseLU/SparseLU.h
@@ -12,9 +12,11 @@
 #ifndef EIGEN_SPARSE_LU_H
 #define EIGEN_SPARSE_LU_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
-template <typename _MatrixType, typename _OrderingType = COLAMDOrdering<typename _MatrixType::StorageIndex> > class SparseLU;
+template <typename MatrixType_, typename OrderingType_ = COLAMDOrdering<typename MatrixType_::StorageIndex> > class SparseLU;
 template <typename MappedSparseMatrixType> struct SparseLUMatrixLReturnType;
 template <typename MatrixLType, typename MatrixUType> struct SparseLUMatrixUReturnType;
 
@@ -35,8 +37,8 @@ public:
     MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
   };
 
-  SparseLUTransposeView() : m_sparseLU(NULL) {}
-  SparseLUTransposeView(const SparseLUTransposeView& view) {
+  SparseLUTransposeView() : APIBase(), m_sparseLU(NULL) {}
+  SparseLUTransposeView(const SparseLUTransposeView& view) : APIBase() {
     this->m_sparseLU = view.m_sparseLU;
   }
   void setIsInitialized(const bool isInitialized) {this->m_isInitialized = isInitialized;}
@@ -119,25 +121,25 @@ private:
   * If this is the case for your matrices, you can try the basic scaling method at
   *  "unsupported/Eigen/src/IterativeSolvers/Scaling.h"
   * 
-  * \tparam _MatrixType The type of the sparse matrix. It must be a column-major SparseMatrix<>
-  * \tparam _OrderingType The ordering method to use, either AMD, COLAMD or METIS. Default is COLMAD
+  * \tparam MatrixType_ The type of the sparse matrix. It must be a column-major SparseMatrix<>
+  * \tparam OrderingType_ The ordering method to use, either AMD, COLAMD or METIS. Default is COLMAD
   *
   * \implsparsesolverconcept
   * 
   * \sa \ref TutorialSparseSolverConcept
   * \sa \ref OrderingMethods_Module
   */
-template <typename _MatrixType, typename _OrderingType>
-class SparseLU : public SparseSolverBase<SparseLU<_MatrixType,_OrderingType> >, public internal::SparseLUImpl<typename _MatrixType::Scalar, typename _MatrixType::StorageIndex>
+template <typename MatrixType_, typename OrderingType_>
+class SparseLU : public SparseSolverBase<SparseLU<MatrixType_,OrderingType_> >, public internal::SparseLUImpl<typename MatrixType_::Scalar, typename MatrixType_::StorageIndex>
 {
   protected:
-    typedef SparseSolverBase<SparseLU<_MatrixType,_OrderingType> > APIBase;
+    typedef SparseSolverBase<SparseLU<MatrixType_,OrderingType_> > APIBase;
     using APIBase::m_isInitialized;
   public:
     using APIBase::_solve_impl;
     
-    typedef _MatrixType MatrixType; 
-    typedef _OrderingType OrderingType;
+    typedef MatrixType_ MatrixType;
+    typedef OrderingType_ OrderingType;
     typedef typename MatrixType::Scalar Scalar; 
     typedef typename MatrixType::RealScalar RealScalar; 
     typedef typename MatrixType::StorageIndex StorageIndex;
@@ -197,9 +199,9 @@ class SparseLU : public SparseSolverBase<SparseLU<_MatrixType,_OrderingType> >,
       *
       * \sa adjoint(), solve()
       */
-    const SparseLUTransposeView<false,SparseLU<_MatrixType,_OrderingType> > transpose()
+    const SparseLUTransposeView<false,SparseLU<MatrixType_,OrderingType_> > transpose()
     {
-      SparseLUTransposeView<false,  SparseLU<_MatrixType,_OrderingType> > transposeView;
+      SparseLUTransposeView<false,  SparseLU<MatrixType_,OrderingType_> > transposeView;
       transposeView.setSparseLU(this);
       transposeView.setIsInitialized(this->m_isInitialized);
       return transposeView;
@@ -218,9 +220,9 @@ class SparseLU : public SparseSolverBase<SparseLU<_MatrixType,_OrderingType> >,
       *
       * \sa transpose(), solve()
       */
-    const SparseLUTransposeView<true, SparseLU<_MatrixType,_OrderingType> > adjoint()
+    const SparseLUTransposeView<true, SparseLU<MatrixType_,OrderingType_> > adjoint()
     {
-      SparseLUTransposeView<true,  SparseLU<_MatrixType,_OrderingType> > adjointView;
+      SparseLUTransposeView<true,  SparseLU<MatrixType_,OrderingType_> > adjointView;
       adjointView.setSparseLU(this);
       adjointView.setIsInitialized(this->m_isInitialized);
       return adjointView;
@@ -250,9 +252,9 @@ class SparseLU : public SparseSolverBase<SparseLU<_MatrixType,_OrderingType> >,
       * y = b; matrixU().solveInPlace(y);
       * \endcode
       */
-    SparseLUMatrixUReturnType<SCMatrix,MappedSparseMatrix<Scalar,ColMajor,StorageIndex> > matrixU() const
+    SparseLUMatrixUReturnType<SCMatrix,Map<SparseMatrix<Scalar,ColMajor,StorageIndex> > > matrixU() const
     {
-      return SparseLUMatrixUReturnType<SCMatrix, MappedSparseMatrix<Scalar,ColMajor,StorageIndex> >(m_Lstore, m_Ustore);
+      return SparseLUMatrixUReturnType<SCMatrix, Map<SparseMatrix<Scalar,ColMajor,StorageIndex> > >(m_Lstore, m_Ustore);
     }
 
     /**
@@ -452,8 +454,8 @@ class SparseLU : public SparseSolverBase<SparseLU<_MatrixType,_OrderingType> >,
       return (m_detPermR * m_detPermC) > 0 ? det : -det;
     }
 
-    Index nnzL() const { return m_nnzL; };
-    Index nnzU() const { return m_nnzU; };
+    Index nnzL() const { return m_nnzL; }
+    Index nnzU() const { return m_nnzU; }
 
   protected:
     // Functions 
@@ -474,7 +476,7 @@ class SparseLU : public SparseSolverBase<SparseLU<_MatrixType,_OrderingType> >,
     std::string m_lastError;
     NCMatrix m_mat; // The input (permuted ) matrix 
     SCMatrix m_Lstore; // The lower triangular matrix (supernodal)
-    MappedSparseMatrix<Scalar,ColMajor,StorageIndex> m_Ustore; // The upper triangular matrix
+    Map<SparseMatrix<Scalar,ColMajor,StorageIndex>> m_Ustore; // The upper triangular matrix
     PermutationType m_perm_c; // Column permutation 
     PermutationType m_perm_r ; // Row permutation
     IndexVector m_etree; // Column elimination tree 
@@ -752,10 +754,13 @@ void SparseLU<MatrixType, OrderingType>::factorize(const MatrixType& matrix)
       info = Base::pivotL(jj, m_diagpivotthresh, m_perm_r.indices(), iperm_c.indices(), pivrow, m_glu);
       if ( info ) 
       {
-        m_lastError = "THE MATRIX IS STRUCTURALLY SINGULAR ... ZERO COLUMN AT ";
+        m_lastError = "THE MATRIX IS STRUCTURALLY SINGULAR";
+#ifndef EIGEN_NO_IO
         std::ostringstream returnInfo;
-        returnInfo << info; 
+        returnInfo << " ... ZERO COLUMN AT ";
+        returnInfo << info;
         m_lastError += returnInfo.str();
+#endif
         m_info = NumericalIssue; 
         m_factorizationIsOk = false; 
         return; 
@@ -789,7 +794,7 @@ void SparseLU<MatrixType, OrderingType>::factorize(const MatrixType& matrix)
   // Create supernode matrix L 
   m_Lstore.setInfos(m, n, m_glu.lusup, m_glu.xlusup, m_glu.lsub, m_glu.xlsub, m_glu.supno, m_glu.xsup); 
   // Create the column major upper sparse matrix  U; 
-  new (&m_Ustore) MappedSparseMatrix<Scalar, ColMajor, StorageIndex> ( m, n, m_nnzU, m_glu.xusub.data(), m_glu.usub.data(), m_glu.ucol.data() );
+  new (&m_Ustore) Map<SparseMatrix<Scalar, ColMajor, StorageIndex>> ( m, n, m_nnzU, m_glu.xusub.data(), m_glu.usub.data(), m_glu.ucol.data() );
   
   m_info = Success;
   m_factorizationIsOk = true;
@@ -814,6 +819,31 @@ struct SparseLUMatrixLReturnType : internal::no_assignment_operator
     m_mapL.template solveTransposedInPlace<Conjugate>(X);
   }
 
+  SparseMatrix<Scalar, ColMajor, Index> toSparse() const {
+    ArrayXi colCount = ArrayXi::Ones(cols());
+    for (Index i = 0; i < cols(); i++) {
+      typename MappedSupernodalType::InnerIterator iter(m_mapL, i);
+      for (; iter; ++iter) {
+        if (iter.row() > iter.col()) {
+          colCount(iter.col())++;
+        }
+      }
+    }
+    SparseMatrix<Scalar, ColMajor, Index> sL(rows(), cols());
+    sL.reserve(colCount);
+    for (Index i = 0; i < cols(); i++) {
+      sL.insert(i, i) = 1.0;
+      typename MappedSupernodalType::InnerIterator iter(m_mapL, i);
+      for (; iter; ++iter) {
+        if (iter.row() > iter.col()) {
+          sL.insert(iter.row(), iter.col()) = iter.value();
+        }
+      }
+    }
+    sL.makeCompressed();
+    return sL;
+  }
+
   const MappedSupernodalType& m_mapL;
 };
 
@@ -830,7 +860,6 @@ struct SparseLUMatrixUReturnType : internal::no_assignment_operator
   template<typename Dest>   void solveInPlace(MatrixBase<Dest> &X) const
   {
     Index nrhs = X.cols();
-    Index n    = X.rows();
     // Backward solve with U
     for (Index k = m_mapL.nsuper(); k >= 0; k--)
     {
@@ -850,7 +879,7 @@ struct SparseLUMatrixUReturnType : internal::no_assignment_operator
       {
         // FIXME: the following lines should use Block expressions and not Map!
         Map<const Matrix<Scalar,Dynamic,Dynamic, ColMajor>, 0, OuterStride<> > A( &(m_mapL.valuePtr()[luptr]), nsupc, nsupc, OuterStride<>(lda) );
-        Map< Matrix<Scalar,Dynamic,Dest::ColsAtCompileTime, ColMajor>, 0, OuterStride<> > U (&(X.coeffRef(fsupc,0)), nsupc, nrhs, OuterStride<>(n) );
+        typename Dest::RowsBlockXpr U = X.derived().middleRows(fsupc, nsupc);
         U = A.template triangularView<Upper>().solve(U);
       }
 
@@ -873,7 +902,6 @@ struct SparseLUMatrixUReturnType : internal::no_assignment_operator
   {
     using numext::conj;
     Index nrhs = X.cols();
-    Index n    = X.rows();
     // Forward solve with U
     for (Index k = 0; k <=  m_mapL.nsuper(); k++)
     {
@@ -904,7 +932,7 @@ struct SparseLUMatrixUReturnType : internal::no_assignment_operator
       else
       {
         Map<const Matrix<Scalar,Dynamic,Dynamic, ColMajor>, 0, OuterStride<> > A( &(m_mapL.valuePtr()[luptr]), nsupc, nsupc, OuterStride<>(lda) );
-        Map< Matrix<Scalar,Dynamic,Dest::ColsAtCompileTime, ColMajor>, 0, OuterStride<> > U (&(X(fsupc,0)), nsupc, nrhs, OuterStride<>(n) );
+        typename Dest::RowsBlockXpr U = X.derived().middleRows(fsupc, nsupc);
         if(Conjugate)
           U = A.adjoint().template triangularView<Lower>().solve(U);
         else
@@ -913,6 +941,32 @@ struct SparseLUMatrixUReturnType : internal::no_assignment_operator
     }// End For U-solve
   }
 
+  SparseMatrix<Scalar, RowMajor, Index> toSparse() {
+    ArrayXi rowCount = ArrayXi::Zero(rows());
+    for (Index i = 0; i < cols(); i++) {
+      typename MatrixLType::InnerIterator iter(m_mapL, i);
+      for (; iter; ++iter) {
+        if (iter.row() <= iter.col()) {
+          rowCount(iter.row())++;
+        }
+      }
+    }
+
+    SparseMatrix<Scalar, RowMajor, Index> sU(rows(), cols());
+    sU.reserve(rowCount);
+    for (Index i = 0; i < cols(); i++) {
+      typename MatrixLType::InnerIterator iter(m_mapL, i);
+      for (; iter; ++iter) {
+        if (iter.row() <= iter.col()) {
+          sU.insert(iter.row(), iter.col()) = iter.value();
+        }
+      }
+    }
+    sU.makeCompressed();
+    const SparseMatrix<Scalar, RowMajor, Index> u = m_mapU;  // convert to RowMajor
+    sU += u;
+    return sU;
+  }
 
   const MatrixLType& m_mapL;
   const MatrixUType& m_mapU;
diff --git a/libs/eigen/Eigen/src/SparseLU/SparseLUImpl.h b/libs/eigen/Eigen/src/SparseLU/SparseLUImpl.h
index fc0cfc4..daec837 100644
--- a/libs/eigen/Eigen/src/SparseLU/SparseLUImpl.h
+++ b/libs/eigen/Eigen/src/SparseLU/SparseLUImpl.h
@@ -9,6 +9,8 @@
 #ifndef SPARSELU_IMPL_H
 #define SPARSELU_IMPL_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 namespace internal {
   
diff --git a/libs/eigen/Eigen/src/SparseLU/SparseLU_Memory.h b/libs/eigen/Eigen/src/SparseLU/SparseLU_Memory.h
index 349bfd5..798745f 100644
--- a/libs/eigen/Eigen/src/SparseLU/SparseLU_Memory.h
+++ b/libs/eigen/Eigen/src/SparseLU/SparseLU_Memory.h
@@ -31,6 +31,8 @@
 #ifndef EIGEN_SPARSELU_MEMORY
 #define EIGEN_SPARSELU_MEMORY
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 namespace internal {
   
diff --git a/libs/eigen/Eigen/src/SparseLU/SparseLU_Structs.h b/libs/eigen/Eigen/src/SparseLU/SparseLU_Structs.h
index cf5ec44..3ab0c72 100644
--- a/libs/eigen/Eigen/src/SparseLU/SparseLU_Structs.h
+++ b/libs/eigen/Eigen/src/SparseLU/SparseLU_Structs.h
@@ -68,10 +68,12 @@
 
 #ifndef EIGEN_LU_STRUCTS
 #define EIGEN_LU_STRUCTS
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 namespace internal {
-  
-typedef enum {LUSUP, UCOL, LSUB, USUB, LLVL, ULVL} MemType; 
+
+enum MemType {LUSUP, UCOL, LSUB, USUB, LLVL, ULVL};
 
 template <typename IndexVector, typename ScalarVector>
 struct LU_GlobalLU_t {
diff --git a/libs/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h b/libs/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h
index 0be293d..adfc63a 100644
--- a/libs/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h
+++ b/libs/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h
@@ -11,6 +11,8 @@
 #ifndef EIGEN_SPARSELU_SUPERNODAL_MATRIX_H
 #define EIGEN_SPARSELU_SUPERNODAL_MATRIX_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 namespace internal {
 
@@ -29,12 +31,12 @@ namespace internal {
  * SuperInnerIterator to iterate through all supernodes 
  * Function for triangular solve
  */
-template <typename _Scalar, typename _StorageIndex>
+template <typename Scalar_, typename StorageIndex_>
 class MappedSuperNodalMatrix
 {
   public:
-    typedef _Scalar Scalar; 
-    typedef _StorageIndex StorageIndex;
+    typedef Scalar_ Scalar;
+    typedef StorageIndex_ StorageIndex;
     typedef Matrix<StorageIndex,Dynamic,1> IndexVector;
     typedef Matrix<Scalar,Dynamic,1> ScalarVector;
   public:
@@ -274,9 +276,8 @@ void MappedSuperNodalMatrix<Scalar,Index_>::solveInPlace( MatrixBase<Dest>&X) co
         
         // Triangular solve 
         Map<const Matrix<Scalar,Dynamic,Dynamic, ColMajor>, 0, OuterStride<> > A( &(Lval[luptr]), nsupc, nsupc, OuterStride<>(lda) );
-        Map< Matrix<Scalar,Dynamic,Dest::ColsAtCompileTime, ColMajor>, 0, OuterStride<> > U (&(X(fsupc,0)), nsupc, nrhs, OuterStride<>(n) );
-        U = A.template triangularView<UnitLower>().solve(U); 
-        
+        typename Dest::RowsBlockXpr U = X.derived().middleRows(fsupc, nsupc);
+        U = A.template triangularView<UnitLower>().solve(U);        
         // Matrix-vector product 
         new (&A) Map<const Matrix<Scalar,Dynamic,Dynamic, ColMajor>, 0, OuterStride<> > ( &(Lval[luptr+nsupc]), nrow, nsupc, OuterStride<>(lda) );
         work.topRows(nrow).noalias() = A * U;
@@ -349,7 +350,7 @@ void MappedSuperNodalMatrix<Scalar,Index_>::solveTransposedInPlace( MatrixBase<D
 
       // Matrix-vector product with transposed submatrix
       Map<const Matrix<Scalar,Dynamic,Dynamic, ColMajor>, 0, OuterStride<> > A( &(Lval[luptr+nsupc]), nrow, nsupc, OuterStride<>(lda) );
-      Map< Matrix<Scalar,Dynamic,Dest::ColsAtCompileTime, ColMajor>, 0, OuterStride<> > U (&(X(fsupc,0)), nsupc, nrhs, OuterStride<>(n) );
+      typename Dest::RowsBlockXpr U = X.derived().middleRows(fsupc, nsupc);
       if(Conjugate)
         U = U - A.adjoint() * work.topRows(nrow);
       else
diff --git a/libs/eigen/Eigen/src/SparseLU/SparseLU_Utils.h b/libs/eigen/Eigen/src/SparseLU/SparseLU_Utils.h
index 9e3dab4..e399fed 100644
--- a/libs/eigen/Eigen/src/SparseLU/SparseLU_Utils.h
+++ b/libs/eigen/Eigen/src/SparseLU/SparseLU_Utils.h
@@ -11,6 +11,8 @@
 #ifndef EIGEN_SPARSELU_UTILS_H
 #define EIGEN_SPARSELU_UTILS_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 namespace internal {
 
diff --git a/libs/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h b/libs/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h
index b57f068..d5c29b3 100644
--- a/libs/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h
+++ b/libs/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h
@@ -31,6 +31,8 @@
 #ifndef SPARSELU_COLUMN_BMOD_H
 #define SPARSELU_COLUMN_BMOD_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
diff --git a/libs/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h b/libs/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h
index 5a2c941..be4cfd1 100644
--- a/libs/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h
+++ b/libs/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h
@@ -31,6 +31,8 @@
 #define SPARSELU_COLUMN_DFS_H
 
 template <typename Scalar, typename StorageIndex> class SparseLUImpl;
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
diff --git a/libs/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h b/libs/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h
index c32d8d8..e06b2a0 100644
--- a/libs/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h
+++ b/libs/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h
@@ -29,6 +29,8 @@
 #ifndef SPARSELU_COPY_TO_UCOL_H
 #define SPARSELU_COPY_TO_UCOL_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 namespace internal {
 
diff --git a/libs/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h b/libs/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h
index e37c2fe..034d379 100644
--- a/libs/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h
+++ b/libs/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_SPARSELU_GEMM_KERNEL_H
 #define EIGEN_SPARSELU_GEMM_KERNEL_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
diff --git a/libs/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h b/libs/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h
index 6f75d50..2a8d80b 100644
--- a/libs/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h
+++ b/libs/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h
@@ -28,6 +28,8 @@
 #ifndef SPARSELU_HEAP_RELAX_SNODE_H
 #define SPARSELU_HEAP_RELAX_SNODE_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 namespace internal {
 
@@ -75,8 +77,6 @@ void SparseLUImpl<Scalar,StorageIndex>::heap_relax_snode (const Index n, IndexVe
   // Identify the relaxed supernodes by postorder traversal of the etree
   Index snode_start; // beginning of a snode 
   StorageIndex k;
-  Index nsuper_et_post = 0; // Number of relaxed snodes in postordered etree 
-  Index nsuper_et = 0; // Number of relaxed snodes in the original etree 
   StorageIndex l; 
   for (j = 0; j < n; )
   {
@@ -88,7 +88,6 @@ void SparseLUImpl<Scalar,StorageIndex>::heap_relax_snode (const Index n, IndexVe
       parent = et(j);
     }
     // Found a supernode in postordered etree, j is the last column 
-    ++nsuper_et_post;
     k = StorageIndex(n);
     for (Index i = snode_start; i <= j; ++i)
       k = (std::min)(k, inv_post(i));
@@ -97,7 +96,6 @@ void SparseLUImpl<Scalar,StorageIndex>::heap_relax_snode (const Index n, IndexVe
     {
       // This is also a supernode in the original etree
       relax_end(k) = l; // Record last column 
-      ++nsuper_et; 
     }
     else 
     {
@@ -107,7 +105,6 @@ void SparseLUImpl<Scalar,StorageIndex>::heap_relax_snode (const Index n, IndexVe
         if (descendants(i) == 0) 
         {
           relax_end(l) = l;
-          ++nsuper_et;
         }
       }
     }
diff --git a/libs/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h b/libs/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h
index 8c1b3e8..424f93c 100644
--- a/libs/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h
+++ b/libs/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h
@@ -11,6 +11,8 @@
 #ifndef SPARSELU_KERNEL_BMOD_H
 #define SPARSELU_KERNEL_BMOD_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 namespace internal {
   
diff --git a/libs/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h b/libs/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h
index f052001..8cd331a 100644
--- a/libs/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h
+++ b/libs/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h
@@ -31,6 +31,8 @@
 #ifndef SPARSELU_PANEL_BMOD_H
 #define SPARSELU_PANEL_BMOD_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 namespace internal {
 
diff --git a/libs/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h b/libs/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h
index 155df73..c3ff013 100644
--- a/libs/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h
+++ b/libs/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h
@@ -30,6 +30,8 @@
 #ifndef SPARSELU_PANEL_DFS_H
 #define SPARSELU_PANEL_DFS_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
diff --git a/libs/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h b/libs/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h
index a86dac9..6daed91 100644
--- a/libs/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h
+++ b/libs/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h
@@ -30,6 +30,8 @@
 #ifndef SPARSELU_PIVOTL_H
 #define SPARSELU_PIVOTL_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 namespace internal {
   
diff --git a/libs/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h b/libs/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h
index ad32fed..e5da73b 100644
--- a/libs/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h
+++ b/libs/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h
@@ -30,6 +30,8 @@
 #ifndef SPARSELU_PRUNEL_H
 #define SPARSELU_PRUNEL_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 namespace internal {
 
diff --git a/libs/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h b/libs/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h
index c408d01..ed79532 100644
--- a/libs/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h
+++ b/libs/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h
@@ -28,6 +28,8 @@
 #ifndef SPARSELU_RELAX_SNODE_H
 #define SPARSELU_RELAX_SNODE_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
diff --git a/libs/eigen/Eigen/src/SparseQR/InternalHeaderCheck.h b/libs/eigen/Eigen/src/SparseQR/InternalHeaderCheck.h
new file mode 100644
index 0000000..0564e93
--- /dev/null
+++ b/libs/eigen/Eigen/src/SparseQR/InternalHeaderCheck.h
@@ -0,0 +1,3 @@
+#ifndef EIGEN_SPARSEQR_MODULE_H
+#error "Please include Eigen/SparseQR instead of including headers inside the src directory directly."
+#endif
diff --git a/libs/eigen/Eigen/src/SparseQR/SparseQR.h b/libs/eigen/Eigen/src/SparseQR/SparseQR.h
index d1fb96f..f825092 100644
--- a/libs/eigen/Eigen/src/SparseQR/SparseQR.h
+++ b/libs/eigen/Eigen/src/SparseQR/SparseQR.h
@@ -11,6 +11,8 @@
 #ifndef EIGEN_SPARSE_QR_H
 #define EIGEN_SPARSE_QR_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 template<typename MatrixType, typename OrderingType> class SparseQR;
@@ -59,8 +61,8 @@ namespace internal {
   * R is the sparse triangular or trapezoidal matrix. The later occurs when A is rank-deficient.
   * matrixR().topLeftCorner(rank(), rank()) always returns a triangular factor of full rank.
   * 
-  * \tparam _MatrixType The type of the sparse matrix A, must be a column-major SparseMatrix<>
-  * \tparam _OrderingType The fill-reducing ordering method. See the \link OrderingMethods_Module 
+  * \tparam MatrixType_ The type of the sparse matrix A, must be a column-major SparseMatrix<>
+  * \tparam OrderingType_ The fill-reducing ordering method. See the \link OrderingMethods_Module
   *  OrderingMethods \endlink module for the list of built-in and external ordering methods.
   * 
   * \implsparsesolverconcept
@@ -80,16 +82,16 @@ namespace internal {
   * \warning For complex matrices matrixQ().transpose() will actually return the adjoint matrix.
   * 
   */
-template<typename _MatrixType, typename _OrderingType>
-class SparseQR : public SparseSolverBase<SparseQR<_MatrixType,_OrderingType> >
+template<typename MatrixType_, typename OrderingType_>
+class SparseQR : public SparseSolverBase<SparseQR<MatrixType_,OrderingType_> >
 {
   protected:
-    typedef SparseSolverBase<SparseQR<_MatrixType,_OrderingType> > Base;
+    typedef SparseSolverBase<SparseQR<MatrixType_,OrderingType_> > Base;
     using Base::m_isInitialized;
   public:
     using Base::_solve_impl;
-    typedef _MatrixType MatrixType;
-    typedef _OrderingType OrderingType;
+    typedef MatrixType_ MatrixType;
+    typedef OrderingType_ OrderingType;
     typedef typename MatrixType::Scalar Scalar;
     typedef typename MatrixType::RealScalar RealScalar;
     typedef typename MatrixType::StorageIndex StorageIndex;
@@ -321,7 +323,7 @@ void SparseQR<MatrixType,OrderingType>::analyzePattern(const MatrixType& mat)
 {
   eigen_assert(mat.isCompressed() && "SparseQR requires a sparse matrix in compressed mode. Call .makeCompressed() before passing it to SparseQR");
   // Copy to a column major matrix if the input is rowmajor
-  typename internal::conditional<MatrixType::IsRowMajor,QRMatrixType,const MatrixType&>::type matCpy(mat);
+  std::conditional_t<MatrixType::IsRowMajor,QRMatrixType,const MatrixType&> matCpy(mat);
   // Compute the column fill reducing ordering
   OrderingType ord; 
   ord(matCpy, m_perm_c); 
diff --git a/libs/eigen/Eigen/src/StlSupport/StdDeque.h b/libs/eigen/Eigen/src/StlSupport/StdDeque.h
index 6d47e75..1e95182 100644
--- a/libs/eigen/Eigen/src/StlSupport/StdDeque.h
+++ b/libs/eigen/Eigen/src/StlSupport/StdDeque.h
@@ -11,6 +11,10 @@
 #ifndef EIGEN_STDDEQUE_H
 #define EIGEN_STDDEQUE_H
 
+#ifndef EIGEN_STDDEQUE_MODULE_H
+#error "Please include Eigen/StdDeque instead of including this file directly."
+#endif
+
 #include "details.h"
 
 /**
@@ -44,73 +48,4 @@ namespace std \
   }; \
 }
 
-// check whether we really need the std::deque specialization
-#if !EIGEN_HAS_CXX11_CONTAINERS && !(defined(_GLIBCXX_DEQUE) && (!EIGEN_GNUC_AT_LEAST(4,1))) /* Note that before gcc-4.1 we already have: std::deque::resize(size_type,const T&). */
-
-namespace std {
-
-#define EIGEN_STD_DEQUE_SPECIALIZATION_BODY \
-  public:  \
-    typedef T value_type; \
-    typedef typename deque_base::allocator_type allocator_type; \
-    typedef typename deque_base::size_type size_type;  \
-    typedef typename deque_base::iterator iterator;  \
-    typedef typename deque_base::const_iterator const_iterator;  \
-    explicit deque(const allocator_type& a = allocator_type()) : deque_base(a) {}  \
-    template<typename InputIterator> \
-    deque(InputIterator first, InputIterator last, const allocator_type& a = allocator_type()) \
-    : deque_base(first, last, a) {} \
-    deque(const deque& c) : deque_base(c) {}  \
-    explicit deque(size_type num, const value_type& val = value_type()) : deque_base(num, val) {} \
-    deque(iterator start_, iterator end_) : deque_base(start_, end_) {}  \
-    deque& operator=(const deque& x) {  \
-      deque_base::operator=(x);  \
-      return *this;  \
-    }
-
-  template<typename T>
-  class deque<T,EIGEN_ALIGNED_ALLOCATOR<T> >
-    : public deque<EIGEN_WORKAROUND_MSVC_STL_SUPPORT(T),
-                   Eigen::aligned_allocator_indirection<EIGEN_WORKAROUND_MSVC_STL_SUPPORT(T)> >
-{
-  typedef deque<EIGEN_WORKAROUND_MSVC_STL_SUPPORT(T),
-                Eigen::aligned_allocator_indirection<EIGEN_WORKAROUND_MSVC_STL_SUPPORT(T)> > deque_base;
-  EIGEN_STD_DEQUE_SPECIALIZATION_BODY
-
-  void resize(size_type new_size)
-  { resize(new_size, T()); }
-
-#if defined(_DEQUE_)
-  // workaround MSVC std::deque implementation
-  void resize(size_type new_size, const value_type& x)
-  {
-    if (deque_base::size() < new_size)
-      deque_base::_Insert_n(deque_base::end(), new_size - deque_base::size(), x);
-    else if (new_size < deque_base::size())
-      deque_base::erase(deque_base::begin() + new_size, deque_base::end());
-  }
-  void push_back(const value_type& x)
-  { deque_base::push_back(x); } 
-  void push_front(const value_type& x)
-  { deque_base::push_front(x); }
-  using deque_base::insert;  
-  iterator insert(const_iterator position, const value_type& x)
-  { return deque_base::insert(position,x); }
-  void insert(const_iterator position, size_type new_size, const value_type& x)
-  { deque_base::insert(position, new_size, x); }
-#else
-  // default implementation which should always work.
-  void resize(size_type new_size, const value_type& x)
-  {
-    if (new_size < deque_base::size())
-      deque_base::erase(deque_base::begin() + new_size, deque_base::end());
-    else if (new_size > deque_base::size())
-      deque_base::insert(deque_base::end(), new_size - deque_base::size(), x);
-  }
-#endif
-  };
-}
-
-#endif // check whether specialization is actually required
-
 #endif // EIGEN_STDDEQUE_H
diff --git a/libs/eigen/Eigen/src/StlSupport/StdList.h b/libs/eigen/Eigen/src/StlSupport/StdList.h
index 8ba3fad..da36677 100644
--- a/libs/eigen/Eigen/src/StlSupport/StdList.h
+++ b/libs/eigen/Eigen/src/StlSupport/StdList.h
@@ -10,6 +10,10 @@
 #ifndef EIGEN_STDLIST_H
 #define EIGEN_STDLIST_H
 
+#ifndef EIGEN_STDLIST_MODULE_H
+#error "Please include Eigen/StdList instead of including this file directly."
+#endif
+
 #include "details.h"
 
 /**
@@ -43,64 +47,4 @@ namespace std \
   }; \
 }
 
-// check whether we really need the std::list specialization
-#if !EIGEN_HAS_CXX11_CONTAINERS && !(defined(_GLIBCXX_LIST) && (!EIGEN_GNUC_AT_LEAST(4,1))) /* Note that before gcc-4.1 we already have: std::list::resize(size_type,const T&). */
-
-namespace std
-{
-
-#define EIGEN_STD_LIST_SPECIALIZATION_BODY \
-  public:  \
-    typedef T value_type; \
-    typedef typename list_base::allocator_type allocator_type; \
-    typedef typename list_base::size_type size_type;  \
-    typedef typename list_base::iterator iterator;  \
-    typedef typename list_base::const_iterator const_iterator;  \
-    explicit list(const allocator_type& a = allocator_type()) : list_base(a) {}  \
-    template<typename InputIterator> \
-    list(InputIterator first, InputIterator last, const allocator_type& a = allocator_type()) \
-    : list_base(first, last, a) {} \
-    list(const list& c) : list_base(c) {}  \
-    explicit list(size_type num, const value_type& val = value_type()) : list_base(num, val) {} \
-    list(iterator start_, iterator end_) : list_base(start_, end_) {}  \
-    list& operator=(const list& x) {  \
-    list_base::operator=(x);  \
-    return *this; \
-  }
-
-  template<typename T>
-  class list<T,EIGEN_ALIGNED_ALLOCATOR<T> >
-    : public list<EIGEN_WORKAROUND_MSVC_STL_SUPPORT(T),
-                  Eigen::aligned_allocator_indirection<EIGEN_WORKAROUND_MSVC_STL_SUPPORT(T)> >
-  {
-    typedef list<EIGEN_WORKAROUND_MSVC_STL_SUPPORT(T),
-                 Eigen::aligned_allocator_indirection<EIGEN_WORKAROUND_MSVC_STL_SUPPORT(T)> > list_base;
-    EIGEN_STD_LIST_SPECIALIZATION_BODY
-
-    void resize(size_type new_size)
-    { resize(new_size, T()); }
-
-    void resize(size_type new_size, const value_type& x)
-    {
-      if (list_base::size() < new_size)
-        list_base::insert(list_base::end(), new_size - list_base::size(), x);
-      else
-        while (new_size < list_base::size()) list_base::pop_back();
-    }
-
-#if defined(_LIST_)
-    // workaround MSVC std::list implementation
-    void push_back(const value_type& x)
-    { list_base::push_back(x); } 
-    using list_base::insert;  
-    iterator insert(const_iterator position, const value_type& x)
-    { return list_base::insert(position,x); }
-    void insert(const_iterator position, size_type new_size, const value_type& x)
-    { list_base::insert(position, new_size, x); }
-#endif
-  };
-}
-
-#endif // check whether specialization is actually required
-
 #endif // EIGEN_STDLIST_H
diff --git a/libs/eigen/Eigen/src/StlSupport/StdVector.h b/libs/eigen/Eigen/src/StlSupport/StdVector.h
index 9fcf19b..02dfb39 100644
--- a/libs/eigen/Eigen/src/StlSupport/StdVector.h
+++ b/libs/eigen/Eigen/src/StlSupport/StdVector.h
@@ -11,6 +11,10 @@
 #ifndef EIGEN_STDVECTOR_H
 #define EIGEN_STDVECTOR_H
 
+#ifndef EIGEN_STDVECTOR_MODULE_H
+#error "Please include Eigen/StdVector instead of including this file directly."
+#endif
+
 #include "details.h"
 
 /**
@@ -44,88 +48,4 @@ namespace std \
   }; \
 }
 
-// Don't specialize if containers are implemented according to C++11
-#if !EIGEN_HAS_CXX11_CONTAINERS
-
-namespace std {
-
-#define EIGEN_STD_VECTOR_SPECIALIZATION_BODY \
-  public:  \
-    typedef T value_type; \
-    typedef typename vector_base::allocator_type allocator_type; \
-    typedef typename vector_base::size_type size_type;  \
-    typedef typename vector_base::iterator iterator;  \
-    typedef typename vector_base::const_iterator const_iterator;  \
-    explicit vector(const allocator_type& a = allocator_type()) : vector_base(a) {}  \
-    template<typename InputIterator> \
-    vector(InputIterator first, InputIterator last, const allocator_type& a = allocator_type()) \
-    : vector_base(first, last, a) {} \
-    vector(const vector& c) : vector_base(c) {}  \
-    explicit vector(size_type num, const value_type& val = value_type()) : vector_base(num, val) {} \
-    vector(iterator start_, iterator end_) : vector_base(start_, end_) {}  \
-    vector& operator=(const vector& x) {  \
-      vector_base::operator=(x);  \
-      return *this;  \
-    }
-
-  template<typename T>
-  class vector<T,EIGEN_ALIGNED_ALLOCATOR<T> >
-    : public vector<EIGEN_WORKAROUND_MSVC_STL_SUPPORT(T),
-                    Eigen::aligned_allocator_indirection<EIGEN_WORKAROUND_MSVC_STL_SUPPORT(T)> >
-{
-  typedef vector<EIGEN_WORKAROUND_MSVC_STL_SUPPORT(T),
-                 Eigen::aligned_allocator_indirection<EIGEN_WORKAROUND_MSVC_STL_SUPPORT(T)> > vector_base;
-  EIGEN_STD_VECTOR_SPECIALIZATION_BODY
-
-  void resize(size_type new_size)
-  { resize(new_size, T()); }
-
-#if defined(_VECTOR_)
-  // workaround MSVC std::vector implementation
-  void resize(size_type new_size, const value_type& x)
-  {
-    if (vector_base::size() < new_size)
-      vector_base::_Insert_n(vector_base::end(), new_size - vector_base::size(), x);
-    else if (new_size < vector_base::size())
-      vector_base::erase(vector_base::begin() + new_size, vector_base::end());
-  }
-  void push_back(const value_type& x)
-  { vector_base::push_back(x); } 
-  using vector_base::insert;  
-  iterator insert(const_iterator position, const value_type& x)
-  { return vector_base::insert(position,x); }
-  void insert(const_iterator position, size_type new_size, const value_type& x)
-  { vector_base::insert(position, new_size, x); }
-#elif defined(_GLIBCXX_VECTOR) && (!(EIGEN_GNUC_AT_LEAST(4,1)))
-  /* Note that before gcc-4.1 we already have: std::vector::resize(size_type,const T&).
-   * However, this specialization is still needed to make the above EIGEN_DEFINE_STL_VECTOR_SPECIALIZATION trick to work. */
-  void resize(size_type new_size, const value_type& x)
-  {
-    vector_base::resize(new_size,x);
-  }
-#elif defined(_GLIBCXX_VECTOR) && EIGEN_GNUC_AT_LEAST(4,2)
-  // workaround GCC std::vector implementation
-  void resize(size_type new_size, const value_type& x)
-  {
-    if (new_size < vector_base::size())
-      vector_base::_M_erase_at_end(this->_M_impl._M_start + new_size);
-    else
-      vector_base::insert(vector_base::end(), new_size - vector_base::size(), x);
-  }
-#else
-  // either GCC 4.1 or non-GCC
-  // default implementation which should always work.
-  void resize(size_type new_size, const value_type& x)
-  {
-    if (new_size < vector_base::size())
-      vector_base::erase(vector_base::begin() + new_size, vector_base::end());
-    else if (new_size > vector_base::size())
-      vector_base::insert(vector_base::end(), new_size - vector_base::size(), x);
-  }
-#endif
-  };
-}
-#endif // !EIGEN_HAS_CXX11_CONTAINERS
-
-
 #endif // EIGEN_STDVECTOR_H
diff --git a/libs/eigen/Eigen/src/StlSupport/details.h b/libs/eigen/Eigen/src/StlSupport/details.h
index 2cfd13e..29fd871 100644
--- a/libs/eigen/Eigen/src/StlSupport/details.h
+++ b/libs/eigen/Eigen/src/StlSupport/details.h
@@ -52,11 +52,7 @@ namespace Eigen {
   // in std::vector::resize(size_t s,T x) won't be aligned and generate an error
   // even if this function is never called. Whence this little wrapper.
 #define EIGEN_WORKAROUND_MSVC_STL_SUPPORT(T) \
-  typename Eigen::internal::conditional< \
-    Eigen::internal::is_arithmetic<T>::value, \
-    T, \
-    Eigen::internal::workaround_msvc_stl_support<T> \
-  >::type
+  std::conditional_t<Eigen::internal::is_arithmetic<T>::value, T, Eigen::internal::workaround_msvc_stl_support<T> >
 
   namespace internal {
   template<typename T> struct workaround_msvc_stl_support : public T
diff --git a/libs/eigen/Eigen/src/SuperLUSupport/InternalHeaderCheck.h b/libs/eigen/Eigen/src/SuperLUSupport/InternalHeaderCheck.h
new file mode 100644
index 0000000..94a62b5
--- /dev/null
+++ b/libs/eigen/Eigen/src/SuperLUSupport/InternalHeaderCheck.h
@@ -0,0 +1,3 @@
+#ifndef EIGEN_SUPERLUSUPPORT_MODULE_H
+#error "Please include Eigen/SuperLUSupport instead of including headers inside the src directory directly."
+#endif
diff --git a/libs/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h b/libs/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h
index d1d3ad7..4bac22d 100644
--- a/libs/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h
+++ b/libs/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_SUPERLUSUPPORT_H
 #define EIGEN_SUPERLUSUPPORT_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 #if defined(SUPERLU_MAJOR_VERSION) && (SUPERLU_MAJOR_VERSION >= 5)
@@ -295,14 +297,14 @@ SluMatrix asSluMatrix(MatrixType& mat)
 
 /** View a Super LU matrix as an Eigen expression */
 template<typename Scalar, int Flags, typename Index>
-MappedSparseMatrix<Scalar,Flags,Index> map_superlu(SluMatrix& sluMat)
+Map<SparseMatrix<Scalar,Flags,Index> > map_superlu(SluMatrix& sluMat)
 {
   eigen_assert(((Flags&RowMajor)==RowMajor && sluMat.Stype == SLU_NR)
          || ((Flags&ColMajor)==ColMajor && sluMat.Stype == SLU_NC));
 
   Index outerSize = (Flags&RowMajor)==RowMajor ? sluMat.ncol : sluMat.nrow;
 
-  return MappedSparseMatrix<Scalar,Flags,Index>(
+  return Map<SparseMatrix<Scalar,Flags,Index> >(
     sluMat.nrow, sluMat.ncol, sluMat.storage.outerInd[outerSize],
     sluMat.storage.outerInd, sluMat.storage.innerInd, reinterpret_cast<Scalar*>(sluMat.storage.values) );
 }
@@ -313,7 +315,7 @@ MappedSparseMatrix<Scalar,Flags,Index> map_superlu(SluMatrix& sluMat)
   * \class SuperLUBase
   * \brief The base class for the direct and incomplete LU factorization of SuperLU
   */
-template<typename _MatrixType, typename Derived>
+template<typename MatrixType_, typename Derived>
 class SuperLUBase : public SparseSolverBase<Derived>
 {
   protected:
@@ -321,7 +323,7 @@ class SuperLUBase : public SparseSolverBase<Derived>
     using Base::derived;
     using Base::m_isInitialized;
   public:
-    typedef _MatrixType MatrixType;
+    typedef MatrixType_ MatrixType;
     typedef typename MatrixType::Scalar Scalar;
     typedef typename MatrixType::RealScalar RealScalar;
     typedef typename MatrixType::StorageIndex StorageIndex;
@@ -476,7 +478,7 @@ class SuperLUBase : public SparseSolverBase<Derived>
   * using the SuperLU library. The sparse matrix A must be squared and invertible. The vectors or matrices
   * X and B can be either dense or sparse.
   *
-  * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>
+  * \tparam MatrixType_ the type of the sparse matrix A, it must be a SparseMatrix<>
   *
   * \warning This class is only for the 4.x versions of SuperLU. The 3.x and 5.x versions are not supported.
   *
@@ -484,12 +486,12 @@ class SuperLUBase : public SparseSolverBase<Derived>
   *
   * \sa \ref TutorialSparseSolverConcept, class SparseLU
   */
-template<typename _MatrixType>
-class SuperLU : public SuperLUBase<_MatrixType,SuperLU<_MatrixType> >
+template<typename MatrixType_>
+class SuperLU : public SuperLUBase<MatrixType_,SuperLU<MatrixType_> >
 {
   public:
-    typedef SuperLUBase<_MatrixType,SuperLU> Base;
-    typedef _MatrixType MatrixType;
+    typedef SuperLUBase<MatrixType_,SuperLU> Base;
+    typedef MatrixType_ MatrixType;
     typedef typename Base::Scalar Scalar;
     typedef typename Base::RealScalar RealScalar;
     typedef typename Base::StorageIndex StorageIndex;
@@ -830,19 +832,19 @@ typename SuperLU<MatrixType>::Scalar SuperLU<MatrixType>::determinant() const
   *
   * \warning This class is only for the 4.x versions of SuperLU. The 3.x and 5.x versions are not supported.
   *
-  * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>
+  * \tparam MatrixType_ the type of the sparse matrix A, it must be a SparseMatrix<>
   *
   * \implsparsesolverconcept
   *
   * \sa \ref TutorialSparseSolverConcept, class IncompleteLUT, class ConjugateGradient, class BiCGSTAB
   */
 
-template<typename _MatrixType>
-class SuperILU : public SuperLUBase<_MatrixType,SuperILU<_MatrixType> >
+template<typename MatrixType_>
+class SuperILU : public SuperLUBase<MatrixType_,SuperILU<MatrixType_> >
 {
   public:
-    typedef SuperLUBase<_MatrixType,SuperILU> Base;
-    typedef _MatrixType MatrixType;
+    typedef SuperLUBase<MatrixType_,SuperILU> Base;
+    typedef MatrixType_ MatrixType;
     typedef typename Base::Scalar Scalar;
     typedef typename Base::RealScalar RealScalar;
 
diff --git a/libs/eigen/Eigen/src/UmfPackSupport/InternalHeaderCheck.h b/libs/eigen/Eigen/src/UmfPackSupport/InternalHeaderCheck.h
new file mode 100644
index 0000000..64112f1
--- /dev/null
+++ b/libs/eigen/Eigen/src/UmfPackSupport/InternalHeaderCheck.h
@@ -0,0 +1,3 @@
+#ifndef EIGEN_UMFPACKSUPPORT_MODULE_H
+#error "Please include Eigen/UmfPackSupport instead of including headers inside the src directory directly."
+#endif
diff --git a/libs/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h b/libs/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h
index e3a333f..d9a8d38 100644
--- a/libs/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h
+++ b/libs/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h
@@ -20,6 +20,8 @@
 #endif
 #endif
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /* TODO extract L, extract U, compute det, etc... */
@@ -278,21 +280,21 @@ inline SuiteSparse_long umfpack_get_determinant(std::complex<double> *Mx, double
   *
   * \warning The input matrix A should be in a \b compressed and \b column-major form.
   * Otherwise an expensive copy will be made. You can call the inexpensive makeCompressed() to get a compressed matrix.
-  * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>
+  * \tparam MatrixType_ the type of the sparse matrix A, it must be a SparseMatrix<>
   *
   * \implsparsesolverconcept
   *
   * \sa \ref TutorialSparseSolverConcept, class SparseLU
   */
-template<typename _MatrixType>
-class UmfPackLU : public SparseSolverBase<UmfPackLU<_MatrixType> >
+template<typename MatrixType_>
+class UmfPackLU : public SparseSolverBase<UmfPackLU<MatrixType_> >
 {
   protected:
-    typedef SparseSolverBase<UmfPackLU<_MatrixType> > Base;
+    typedef SparseSolverBase<UmfPackLU<MatrixType_> > Base;
     using Base::m_isInitialized;
   public:
     using Base::_solve_impl;
-    typedef _MatrixType MatrixType;
+    typedef MatrixType_ MatrixType;
     typedef typename MatrixType::Scalar Scalar;
     typedef typename MatrixType::RealScalar RealScalar;
     typedef typename MatrixType::StorageIndex StorageIndex;
@@ -529,16 +531,16 @@ class UmfPackLU : public SparseSolverBase<UmfPackLU<_MatrixType> >
     template<typename MatrixDerived>
     void grab(const EigenBase<MatrixDerived> &A)
     {
-      mp_matrix.~UmfpackMatrixRef();
-      ::new (&mp_matrix) UmfpackMatrixRef(A.derived());
+      internal::destroy_at(&mp_matrix);
+      internal::construct_at(&mp_matrix, A.derived());
     }
 
     void grab(const UmfpackMatrixRef &A)
     {
       if(&(A.derived()) != &mp_matrix)
       {
-        mp_matrix.~UmfpackMatrixRef();
-        ::new (&mp_matrix) UmfpackMatrixRef(A);
+        internal::destroy_at(&mp_matrix);
+        internal::construct_at(&mp_matrix, A);
       }
     }
 
diff --git a/libs/eigen/Eigen/src/misc/Image.h b/libs/eigen/Eigen/src/misc/Image.h
index b8b8a04..b500036 100644
--- a/libs/eigen/Eigen/src/misc/Image.h
+++ b/libs/eigen/Eigen/src/misc/Image.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_MISC_IMAGE_H
 #define EIGEN_MISC_IMAGE_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
@@ -32,10 +34,10 @@ struct traits<image_retval_base<DecompositionType> >
   > ReturnType;
 };
 
-template<typename _DecompositionType> struct image_retval_base
- : public ReturnByValue<image_retval_base<_DecompositionType> >
+template<typename DecompositionType_> struct image_retval_base
+ : public ReturnByValue<image_retval_base<DecompositionType_> >
 {
-  typedef _DecompositionType DecompositionType;
+  typedef DecompositionType_ DecompositionType;
   typedef typename DecompositionType::MatrixType MatrixType;
   typedef ReturnByValue<image_retval_base> Base;
 
diff --git a/libs/eigen/Eigen/src/misc/InternalHeaderCheck.h b/libs/eigen/Eigen/src/misc/InternalHeaderCheck.h
new file mode 100644
index 0000000..1cea572
--- /dev/null
+++ b/libs/eigen/Eigen/src/misc/InternalHeaderCheck.h
@@ -0,0 +1,3 @@
+#ifndef EIGEN_CORE_MODULE_H
+#error "Please include Eigen/Core instead of including headers inside the src directory directly."
+#endif
diff --git a/libs/eigen/Eigen/src/misc/Kernel.h b/libs/eigen/Eigen/src/misc/Kernel.h
index bef5d6f..7abfbb7 100644
--- a/libs/eigen/Eigen/src/misc/Kernel.h
+++ b/libs/eigen/Eigen/src/misc/Kernel.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_MISC_KERNEL_H
 #define EIGEN_MISC_KERNEL_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
@@ -34,10 +36,10 @@ struct traits<kernel_retval_base<DecompositionType> >
   > ReturnType;
 };
 
-template<typename _DecompositionType> struct kernel_retval_base
- : public ReturnByValue<kernel_retval_base<_DecompositionType> >
+template<typename DecompositionType_> struct kernel_retval_base
+ : public ReturnByValue<kernel_retval_base<DecompositionType_> >
 {
-  typedef _DecompositionType DecompositionType;
+  typedef DecompositionType_ DecompositionType;
   typedef ReturnByValue<kernel_retval_base> Base;
 
   explicit kernel_retval_base(const DecompositionType& dec)
diff --git a/libs/eigen/Eigen/src/misc/RealSvd2x2.h b/libs/eigen/Eigen/src/misc/RealSvd2x2.h
index abb4d3c..5dd75f3 100644
--- a/libs/eigen/Eigen/src/misc/RealSvd2x2.h
+++ b/libs/eigen/Eigen/src/misc/RealSvd2x2.h
@@ -11,6 +11,8 @@
 #ifndef EIGEN_REALSVD2X2_H
 #define EIGEN_REALSVD2X2_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
diff --git a/libs/eigen/Eigen/src/misc/lapacke.h b/libs/eigen/Eigen/src/misc/lapacke.h
old mode 100755
new mode 100644
diff --git a/libs/eigen/Eigen/src/misc/lapacke_helpers.h b/libs/eigen/Eigen/src/misc/lapacke_helpers.h
new file mode 100644
index 0000000..b6ad6e8
--- /dev/null
+++ b/libs/eigen/Eigen/src/misc/lapacke_helpers.h
@@ -0,0 +1,160 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2021 Erik Schultheis <erik.schultheis@aalto.fi>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_LAPACKE_HELPERS_H
+#define EIGEN_LAPACKE_HELPERS_H
+
+#include "./InternalHeaderCheck.h"
+
+#ifdef EIGEN_USE_MKL
+#include "mkl_lapacke.h"
+#else
+#include "lapacke.h"
+#endif
+
+namespace Eigen {
+namespace internal {
+/**
+ * \internal
+ * \brief Implementation details and helper functions for the lapacke glue code.
+ */
+namespace lapacke_helpers {
+
+// ---------------------------------------------------------------------------------------------------------------------
+//                  Translation from Eigen to Lapacke for types and constants
+// ---------------------------------------------------------------------------------------------------------------------
+
+// For complex numbers, the types in Eigen and Lapacke are different, but layout compatible.
+template<typename Scalar>
+struct translate_type_imp;
+template<>
+struct translate_type_imp<float> {
+    using type = float;
+};
+template<>
+struct translate_type_imp<double> {
+    using type = double;
+};
+template<>
+struct translate_type_imp<std::complex<double>> {
+    using type = lapack_complex_double;
+};
+template<>
+struct translate_type_imp<std::complex<float>> {
+    using type = lapack_complex_float;
+};
+
+/// Given an Eigen types, this is defined to be the corresponding, layout-compatible lapack type
+template<typename Scalar>
+using translated_type = typename translate_type_imp<Scalar>::type;
+
+/// These functions convert their arguments from Eigen to Lapack types
+/// This function performs conversion for any of the translations defined above.
+template<typename Source, typename Target=translated_type<Source>>
+EIGEN_ALWAYS_INLINE auto to_lapack(Source value) { return static_cast<Target>(value); }
+
+/// This function performs conversions for pointer types corresponding to the translations abovce.
+/// This is valid because the translations are between layout-compatible types.
+template<typename Source, typename Target=translated_type<Source>>
+EIGEN_ALWAYS_INLINE auto to_lapack(Source *value) { return reinterpret_cast<Target*>(value); }
+
+/// This function converts the Eigen Index to a lapack index, with possible range checks
+/// \sa internal::convert_index
+EIGEN_ALWAYS_INLINE lapack_int to_lapack(Index index) {
+  return convert_index<lapack_int>(index);
+}
+
+/// translates storage order of the given Eigen object to the corresponding lapack constant
+template<typename Derived>
+EIGEN_ALWAYS_INLINE EIGEN_CONSTEXPR lapack_int lapack_storage_of(const EigenBase<Derived> &) {
+  return Derived::IsRowMajor ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR;
+}
+
+/// translate UpLo type to the corresponding letter code
+template<UpLoType mode> char translate_mode;
+template<> constexpr char translate_mode<Lower> = 'L';
+template<> constexpr char translate_mode<Upper> = 'U';
+
+
+// ---------------------------------------------------------------------------------------------------------------------
+//              Automatic generation of low-level wrappers
+// ---------------------------------------------------------------------------------------------------------------------
+
+/*!
+ * \internal
+ * \brief Helper type to facilitate the wrapping of raw LAPACKE functions for different types into a single, overloaded C++ function.
+ * This is achieved in combination with \r EIGEN_MAKE_LAPACKE_WRAPPER
+ * \details This implementation works by providing an overloaded call function that just forwards its arguments to the
+ * underlying lapack function. Each of these overloads is enabled only if the call is actually well formed.
+ * Because these lapack functions take pointers to the underlying scalar type as arguments, even though the actual Scalars
+ * would be implicitly convertible, the pointers are not and therefore only a single overload can be valid at the same time.
+ * Thus, despite all functions taking fully generic `Args&&... args` as arguments, there is never any ambiguity.
+ */
+template<typename DoubleFn, typename SingleFn, typename DoubleCpxFn, typename SingleCpxFn>
+struct WrappingHelper {
+  // The naming of double, single, double complex and single complex is purely for readability
+  // and doesn't actually affect the workings of this class. In principle, the arguments can
+  // be supplied in any permuted order.
+  DoubleFn double_; SingleFn single_; DoubleCpxFn double_cpx_; SingleCpxFn single_cpx_;
+
+  template<typename... Args>
+  auto call(Args&&... args) -> decltype(double_(std::forward<Args>(args)...)) {
+    return double_(std::forward<Args>(args)...);
+  }
+
+  template<typename... Args>
+  auto call(Args&&... args) -> decltype(single_(std::forward<Args>(args)...)){
+    return single_(std::forward<Args>(args)...);
+  }
+
+  template<typename... Args>
+  auto call(Args&&... args) -> decltype(double_cpx_(std::forward<Args>(args)...)){
+    return double_cpx_(std::forward<Args>(args)...);
+  }
+
+  template<typename... Args>
+  auto call(Args&&... args) -> decltype(single_cpx_(std::forward<Args>(args)...)){
+    return single_cpx_(std::forward<Args>(args)...);
+  }
+};
+
+/** \internal Helper function that generates a `WrappingHelper` object with the given function pointers and
+ * invokes its `call` method, thus selecting one of the overloads.
+ * \sa EIGEN_MAKE_LAPACKE_WRAPPER
+ */
+template<typename DoubleFn, typename SingleFn, typename DoubleCpxFn, typename SingleCpxFn, typename... Args>
+EIGEN_ALWAYS_INLINE auto call_wrapper(DoubleFn df, SingleFn sf, DoubleCpxFn dcf, SingleCpxFn scf, Args&&... args) {
+  WrappingHelper<DoubleFn, SingleFn, DoubleCpxFn, SingleCpxFn> helper{df, sf, dcf, scf};
+  return helper.call(std::forward<Args>(args)...);
+}
+
+/**
+ * \internal
+ * Generates a new function `Function` that dispatches to the corresponding LAPACKE_? prefixed functions.
+ * \sa WrappingHelper
+ */
+#define EIGEN_MAKE_LAPACKE_WRAPPER(FUNCTION) \
+template<typename... Args> \
+EIGEN_ALWAYS_INLINE auto FUNCTION(Args&&... args) { return call_wrapper(LAPACKE_d##FUNCTION, LAPACKE_s##FUNCTION, LAPACKE_z##FUNCTION, LAPACKE_c##FUNCTION, std::forward<Args>(args)...); }
+
+// Now with this macro and the helper wrappers, we can generate the dispatch for all the lapacke functions that are
+// used in Eigen.
+// We define these here instead of in the files where they are used because this allows us to #undef the macro again
+// right here
+EIGEN_MAKE_LAPACKE_WRAPPER(potrf)
+EIGEN_MAKE_LAPACKE_WRAPPER(getrf)
+EIGEN_MAKE_LAPACKE_WRAPPER(geqrf)
+EIGEN_MAKE_LAPACKE_WRAPPER(gesdd)
+
+#undef EIGEN_MAKE_LAPACKE_WRAPPER
+}
+}
+}
+
+#endif // EIGEN_LAPACKE_HELPERS_H
diff --git a/libs/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h b/libs/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h
index 0e5d544..30e3ee1 100644
--- a/libs/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h
+++ b/libs/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h
@@ -30,15 +30,27 @@ operator/(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
   *
   * \sa max()
   */
-EIGEN_MAKE_CWISE_BINARY_OP(min,min)
+template <int NaNPropagation=PropagateFast, typename OtherDerived>
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_min_op<Scalar,Scalar,NaNPropagation>, const Derived, const OtherDerived>
+#ifdef EIGEN_PARSED_BY_DOXYGEN
+min
+#else
+(min)
+#endif
+(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
+{
+  return CwiseBinaryOp<internal::scalar_min_op<Scalar,Scalar,NaNPropagation>, const Derived, const OtherDerived>(derived(), other.derived());
+}
 
 /** \returns an expression of the coefficient-wise min of \c *this and scalar \a other
   *
   * \sa max()
   */
+template <int NaNPropagation=PropagateFast>
 EIGEN_DEVICE_FUNC
-EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_min_op<Scalar,Scalar>, const Derived,
-                                        const CwiseNullaryOp<internal::scalar_constant_op<Scalar>, PlainObject> >
+EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_min_op<Scalar,Scalar,NaNPropagation>, const Derived,
+    const CwiseNullaryOp<internal::scalar_constant_op<Scalar>, PlainObject> >
 #ifdef EIGEN_PARSED_BY_DOXYGEN
 min
 #else
@@ -46,7 +58,7 @@ min
 #endif
 (const Scalar &other) const
 {
-  return (min)(Derived::PlainObject::Constant(rows(), cols(), other));
+  return (min<NaNPropagation>)(Derived::PlainObject::Constant(rows(), cols(), other));
 }
 
 /** \returns an expression of the coefficient-wise max of \c *this and \a other
@@ -56,14 +68,26 @@ min
   *
   * \sa min()
   */
-EIGEN_MAKE_CWISE_BINARY_OP(max,max)
+template <int NaNPropagation=PropagateFast, typename OtherDerived>
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_max_op<Scalar,Scalar,NaNPropagation>, const Derived, const OtherDerived>
+#ifdef EIGEN_PARSED_BY_DOXYGEN
+max
+#else
+(max)
+#endif
+(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
+{
+  return CwiseBinaryOp<internal::scalar_max_op<Scalar,Scalar,NaNPropagation>, const Derived, const OtherDerived>(derived(), other.derived());
+}
 
 /** \returns an expression of the coefficient-wise max of \c *this and scalar \a other
   *
   * \sa min()
   */
+template <int NaNPropagation=PropagateFast>
 EIGEN_DEVICE_FUNC
-EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_max_op<Scalar,Scalar>, const Derived,
+    EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_max_op<Scalar,Scalar,NaNPropagation>, const Derived,
                                         const CwiseNullaryOp<internal::scalar_constant_op<Scalar>, PlainObject> >
 #ifdef EIGEN_PARSED_BY_DOXYGEN
 max
@@ -72,7 +96,7 @@ max
 #endif
 (const Scalar &other) const
 {
-  return (max)(Derived::PlainObject::Constant(rows(), cols(), other));
+  return (max<NaNPropagation>)(Derived::PlainObject::Constant(rows(), cols(), other));
 }
 
 /** \returns an expression of the coefficient-wise absdiff of \c *this and \a other
@@ -110,24 +134,12 @@ absolute_difference
   */
 EIGEN_MAKE_CWISE_BINARY_OP(pow,pow)
 
-#ifndef EIGEN_PARSED_BY_DOXYGEN
-EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(pow,pow)
-#else
-/** \returns an expression of the coefficients of \c *this rasied to the constant power \a exponent
+/** \returns an expression of the coefficient-wise atan2(\c *this, \a y), where \a y is the given array argument.
   *
-  * \tparam T is the scalar type of \a exponent. It must be compatible with the scalar type of the given expression.
+  * This function computes the coefficient-wise atan2.
   *
-  * This function computes the coefficient-wise power. The function MatrixBase::pow() in the
-  * unsupported module MatrixFunctions computes the matrix power.
-  *
-  * Example: \include Cwise_pow.cpp
-  * Output: \verbinclude Cwise_pow.out
-  *
-  * \sa ArrayBase::pow(ArrayBase), square(), cube(), exp(), log()
   */
-template<typename T>
-const CwiseBinaryOp<internal::scalar_pow_op<Scalar,T>,Derived,Constant<T> > pow(const T& exponent) const;
-#endif
+EIGEN_MAKE_CWISE_BINARY_OP(atan2,atan2)
 
 
 // TODO code generating macros could be moved to Macros.h and could include generation of documentation
diff --git a/libs/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h b/libs/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h
index 13c55f4..d8c1a84 100644
--- a/libs/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h
+++ b/libs/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h
@@ -24,11 +24,9 @@ typedef CwiseUnaryOp<internal::scalar_atan_op<Scalar>, const Derived> AtanReturn
 typedef CwiseUnaryOp<internal::scalar_tanh_op<Scalar>, const Derived> TanhReturnType;
 typedef CwiseUnaryOp<internal::scalar_logistic_op<Scalar>, const Derived> LogisticReturnType;
 typedef CwiseUnaryOp<internal::scalar_sinh_op<Scalar>, const Derived> SinhReturnType;
-#if EIGEN_HAS_CXX11_MATH
 typedef CwiseUnaryOp<internal::scalar_atanh_op<Scalar>, const Derived> AtanhReturnType;
 typedef CwiseUnaryOp<internal::scalar_asinh_op<Scalar>, const Derived> AsinhReturnType;
 typedef CwiseUnaryOp<internal::scalar_acosh_op<Scalar>, const Derived> AcoshReturnType;
-#endif
 typedef CwiseUnaryOp<internal::scalar_cosh_op<Scalar>, const Derived> CoshReturnType;
 typedef CwiseUnaryOp<internal::scalar_square_op<Scalar>, const Derived> SquareReturnType;
 typedef CwiseUnaryOp<internal::scalar_cube_op<Scalar>, const Derived> CubeReturnType;
@@ -355,7 +353,6 @@ cosh() const
   return CoshReturnType(derived());
 }
 
-#if EIGEN_HAS_CXX11_MATH
 /** \returns an expression of the coefficient-wise inverse hyperbolic tan of *this.
   *
   * \sa <a href="group__CoeffwiseMathFunctions.html#cwisetable_atanh">Math functions</a>, atanh(), asinh(), acosh()
@@ -388,7 +385,6 @@ acosh() const
 {
   return AcoshReturnType(derived());
 }
-#endif
 
 /** \returns an expression of the coefficient-wise logistic of *this.
   */
@@ -694,3 +690,32 @@ ndtri() const
 {
   return NdtriReturnType(derived());
 }
+
+template <typename ScalarExponent>
+using UnaryPowReturnType =
+    std::enable_if_t<internal::is_arithmetic<typename NumTraits<ScalarExponent>::Real>::value,
+                     CwiseUnaryOp<internal::scalar_unary_pow_op<Scalar, ScalarExponent>, const Derived>>;
+
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+template <typename ScalarExponent>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const UnaryPowReturnType<ScalarExponent> pow(
+    const ScalarExponent& exponent) const {
+  return UnaryPowReturnType<ScalarExponent>(derived(), internal::scalar_unary_pow_op<Scalar, ScalarExponent>(exponent));
+#else
+/** \returns an expression of the coefficients of \c *this rasied to the constant power \a exponent
+ *
+ * \tparam T is the scalar type of \a exponent. It must be compatible with the scalar type of the given expression.
+ *
+ * This function computes the coefficient-wise power. The function MatrixBase::pow() in the
+ * unsupported module MatrixFunctions computes the matrix power.
+ *
+ * Example: \include Cwise_pow.cpp
+ * Output: \verbinclude Cwise_pow.out
+ *
+ * \sa ArrayBase::pow(ArrayBase), square(), cube(), exp(), log()
+ */
+template <typename ScalarExponent>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const UnaryPowReturnType<ScalarExponent> pow(
+    const ScalarExponent& exponent) const;
+#endif
+}
diff --git a/libs/eigen/Eigen/src/plugins/BlockMethods.h b/libs/eigen/Eigen/src/plugins/BlockMethods.h
index 63a52a6..68b9413 100644
--- a/libs/eigen/Eigen/src/plugins/BlockMethods.h
+++ b/libs/eigen/Eigen/src/plugins/BlockMethods.h
@@ -1418,19 +1418,19 @@ innerVectors(Index outerStart, Index outerSize) const
   */
 template<DirectionType Direction>
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-typename internal::conditional<Direction==Vertical,ColXpr,RowXpr>::type
+std::conditional_t<Direction==Vertical,ColXpr,RowXpr>
 subVector(Index i)
 {
-  return typename internal::conditional<Direction==Vertical,ColXpr,RowXpr>::type(derived(),i);
+  return std::conditional_t<Direction==Vertical,ColXpr,RowXpr>(derived(),i);
 }
 
 /** This is the const version of subVector(Index) */
 template<DirectionType Direction>
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-typename internal::conditional<Direction==Vertical,ConstColXpr,ConstRowXpr>::type
+std::conditional_t<Direction==Vertical,ConstColXpr,ConstRowXpr>
 subVector(Index i) const
 {
-  return typename internal::conditional<Direction==Vertical,ConstColXpr,ConstRowXpr>::type(derived(),i);
+  return std::conditional_t<Direction==Vertical,ConstColXpr,ConstRowXpr>(derived(),i);
 }
 
 /** \returns the number of subvectors (rows or columns) in the direction \c Direction
diff --git a/libs/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h b/libs/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h
index 8b6730e..2f50329 100644
--- a/libs/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h
+++ b/libs/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h
@@ -12,7 +12,7 @@
 
 /** \returns an expression of the difference of \c *this and \a other
   *
-  * \note If you want to substract a given scalar from all coefficients, see Cwise::operator-().
+  * \note If you want to subtract a given scalar from all coefficients, see Cwise::operator-().
   *
   * \sa class CwiseBinaryOp, operator-=()
   */
diff --git a/libs/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h b/libs/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h
index 5418dc4..390759c 100644
--- a/libs/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h
+++ b/libs/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h
@@ -13,20 +13,20 @@
 #ifndef EIGEN_PARSED_BY_DOXYGEN
 
 /** \internal the return type of conjugate() */
-typedef typename internal::conditional<NumTraits<Scalar>::IsComplex,
-                    const CwiseUnaryOp<internal::scalar_conjugate_op<Scalar>, const Derived>,
-                    const Derived&
-                  >::type ConjugateReturnType;
+typedef std::conditional_t<NumTraits<Scalar>::IsComplex,
+            const CwiseUnaryOp<internal::scalar_conjugate_op<Scalar>, const Derived>,
+            const Derived&
+          > ConjugateReturnType;
 /** \internal the return type of real() const */
-typedef typename internal::conditional<NumTraits<Scalar>::IsComplex,
-                    const CwiseUnaryOp<internal::scalar_real_op<Scalar>, const Derived>,
-                    const Derived&
-                  >::type RealReturnType;
+typedef std::conditional_t<NumTraits<Scalar>::IsComplex,
+            const CwiseUnaryOp<internal::scalar_real_op<Scalar>, const Derived>,
+            const Derived&
+          > RealReturnType;
 /** \internal the return type of real() */
-typedef typename internal::conditional<NumTraits<Scalar>::IsComplex,
-                    CwiseUnaryView<internal::scalar_real_ref_op<Scalar>, Derived>,
-                    Derived&
-                  >::type NonConstRealReturnType;
+typedef std::conditional_t<NumTraits<Scalar>::IsComplex,
+            CwiseUnaryView<internal::scalar_real_ref_op<Scalar>, Derived>,
+            Derived&
+          > NonConstRealReturnType;
 /** \internal the return type of imag() const */
 typedef CwiseUnaryOp<internal::scalar_imag_op<Scalar>, const Derived> ImagReturnType;
 /** \internal the return type of imag() */
@@ -83,10 +83,10 @@ EIGEN_DOC_UNARY_ADDONS(conjugate,complex conjugate)
 /// \sa conjugate()
 template<bool Cond>
 EIGEN_DEVICE_FUNC
-inline typename internal::conditional<Cond,ConjugateReturnType,const Derived&>::type
+inline std::conditional_t<Cond,ConjugateReturnType,const Derived&>
 conjugateIf() const
 {
-  typedef typename internal::conditional<Cond,ConjugateReturnType,const Derived&>::type ReturnType;
+  typedef std::conditional_t<Cond,ConjugateReturnType,const Derived&> ReturnType;
   return ReturnType(derived());
 }
 
diff --git a/libs/eigen/Eigen/src/plugins/IndexedViewMethods.h b/libs/eigen/Eigen/src/plugins/IndexedViewMethods.h
index 5bfb19a..011fcbe 100644
--- a/libs/eigen/Eigen/src/plugins/IndexedViewMethods.h
+++ b/libs/eigen/Eigen/src/plugins/IndexedViewMethods.h
@@ -67,9 +67,9 @@ struct EIGEN_INDEXED_VIEW_METHOD_TYPE {
 // This is the generic version
 
 template<typename RowIndices, typename ColIndices>
-typename internal::enable_if<internal::valid_indexed_view_overload<RowIndices,ColIndices>::value
+std::enable_if_t<internal::valid_indexed_view_overload<RowIndices,ColIndices>::value
   && internal::traits<typename EIGEN_INDEXED_VIEW_METHOD_TYPE<RowIndices,ColIndices>::type>::ReturnAsIndexedView,
-  typename EIGEN_INDEXED_VIEW_METHOD_TYPE<RowIndices,ColIndices>::type >::type
+  typename EIGEN_INDEXED_VIEW_METHOD_TYPE<RowIndices,ColIndices>::type>
 operator()(const RowIndices& rowIndices, const ColIndices& colIndices) EIGEN_INDEXED_VIEW_METHOD_CONST
 {
   return typename EIGEN_INDEXED_VIEW_METHOD_TYPE<RowIndices,ColIndices>::type
@@ -79,9 +79,9 @@ operator()(const RowIndices& rowIndices, const ColIndices& colIndices) EIGEN_IND
 // The following overload returns a Block<> object
 
 template<typename RowIndices, typename ColIndices>
-typename internal::enable_if<internal::valid_indexed_view_overload<RowIndices,ColIndices>::value
+std::enable_if_t<internal::valid_indexed_view_overload<RowIndices,ColIndices>::value
   && internal::traits<typename EIGEN_INDEXED_VIEW_METHOD_TYPE<RowIndices,ColIndices>::type>::ReturnAsBlock,
-  typename internal::traits<typename EIGEN_INDEXED_VIEW_METHOD_TYPE<RowIndices,ColIndices>::type>::BlockType>::type
+  typename internal::traits<typename EIGEN_INDEXED_VIEW_METHOD_TYPE<RowIndices,ColIndices>::type>::BlockType>
 operator()(const RowIndices& rowIndices, const ColIndices& colIndices) EIGEN_INDEXED_VIEW_METHOD_CONST
 {
   typedef typename internal::traits<typename EIGEN_INDEXED_VIEW_METHOD_TYPE<RowIndices,ColIndices>::type>::BlockType BlockType;
@@ -90,23 +90,21 @@ operator()(const RowIndices& rowIndices, const ColIndices& colIndices) EIGEN_IND
   return BlockType(derived(),
                    internal::first(actualRowIndices),
                    internal::first(actualColIndices),
-                   internal::size(actualRowIndices),
-                   internal::size(actualColIndices));
+                   internal::index_list_size(actualRowIndices),
+                   internal::index_list_size(actualColIndices));
 }
 
 // The following overload returns a Scalar
 
 template<typename RowIndices, typename ColIndices>
-typename internal::enable_if<internal::valid_indexed_view_overload<RowIndices,ColIndices>::value
+std::enable_if_t<internal::valid_indexed_view_overload<RowIndices,ColIndices>::value
   && internal::traits<typename EIGEN_INDEXED_VIEW_METHOD_TYPE<RowIndices,ColIndices>::type>::ReturnAsScalar,
-  CoeffReturnType >::type
+  CoeffReturnType >
 operator()(const RowIndices& rowIndices, const ColIndices& colIndices) EIGEN_INDEXED_VIEW_METHOD_CONST
 {
   return Base::operator()(internal::eval_expr_given_size(rowIndices,rows()),internal::eval_expr_given_size(colIndices,cols()));
 }
 
-#if EIGEN_HAS_STATIC_ARRAY_TEMPLATE
-
 // The following three overloads are needed to handle raw Index[N] arrays.
 
 template<typename RowIndicesT, std::size_t RowIndicesN, typename ColIndices>
@@ -133,14 +131,13 @@ operator()(const RowIndicesT (&rowIndices)[RowIndicesN], const ColIndicesT (&col
                     (derived(), rowIndices, colIndices);
 }
 
-#endif // EIGEN_HAS_STATIC_ARRAY_TEMPLATE
 
 // Overloads for 1D vectors/arrays
 
 template<typename Indices>
-typename internal::enable_if<
+std::enable_if_t<
   IsRowMajor && (!(internal::get_compile_time_incr<typename IvcType<Indices>::type>::value==1 || internal::is_valid_index_type<Indices>::value)),
-  IndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,IvcIndex,typename IvcType<Indices>::type> >::type
+  IndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,IvcIndex,typename IvcType<Indices>::type> >
 operator()(const Indices& indices) EIGEN_INDEXED_VIEW_METHOD_CONST
 {
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
@@ -149,9 +146,9 @@ operator()(const Indices& indices) EIGEN_INDEXED_VIEW_METHOD_CONST
 }
 
 template<typename Indices>
-typename internal::enable_if<
+std::enable_if_t<
   (!IsRowMajor) && (!(internal::get_compile_time_incr<typename IvcType<Indices>::type>::value==1 || internal::is_valid_index_type<Indices>::value)),
-  IndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,typename IvcType<Indices>::type,IvcIndex> >::type
+  IndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,typename IvcType<Indices>::type,IvcIndex> >
 operator()(const Indices& indices) EIGEN_INDEXED_VIEW_METHOD_CONST
 {
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
@@ -160,29 +157,27 @@ operator()(const Indices& indices) EIGEN_INDEXED_VIEW_METHOD_CONST
 }
 
 template<typename Indices>
-typename internal::enable_if<
+std::enable_if_t<
   (internal::get_compile_time_incr<typename IvcType<Indices>::type>::value==1) && (!internal::is_valid_index_type<Indices>::value) && (!symbolic::is_symbolic<Indices>::value),
-  VectorBlock<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,internal::array_size<Indices>::value> >::type
+  VectorBlock<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,internal::array_size<Indices>::value> >
 operator()(const Indices& indices) EIGEN_INDEXED_VIEW_METHOD_CONST
 {
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
   typename IvcType<Indices>::type actualIndices = ivcSize(indices);
   return VectorBlock<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,internal::array_size<Indices>::value>
-            (derived(), internal::first(actualIndices), internal::size(actualIndices));
+            (derived(), internal::first(actualIndices), internal::index_list_size(actualIndices));
 }
 
 template<typename IndexType>
-typename internal::enable_if<symbolic::is_symbolic<IndexType>::value, CoeffReturnType >::type
+std::enable_if_t<symbolic::is_symbolic<IndexType>::value, CoeffReturnType >
 operator()(const IndexType& id) EIGEN_INDEXED_VIEW_METHOD_CONST
 {
   return Base::operator()(internal::eval_expr_given_size(id,size()));
 }
 
-#if EIGEN_HAS_STATIC_ARRAY_TEMPLATE
-
 template<typename IndicesT, std::size_t IndicesN>
-typename internal::enable_if<IsRowMajor,
-  IndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,IvcIndex,const IndicesT (&)[IndicesN]> >::type
+std::enable_if_t<IsRowMajor,
+  IndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,IvcIndex,const IndicesT (&)[IndicesN]> >
 operator()(const IndicesT (&indices)[IndicesN]) EIGEN_INDEXED_VIEW_METHOD_CONST
 {
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
@@ -191,8 +186,8 @@ operator()(const IndicesT (&indices)[IndicesN]) EIGEN_INDEXED_VIEW_METHOD_CONST
 }
 
 template<typename IndicesT, std::size_t IndicesN>
-typename internal::enable_if<!IsRowMajor,
-  IndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,const IndicesT (&)[IndicesN],IvcIndex> >::type
+std::enable_if_t<!IsRowMajor,
+  IndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,const IndicesT (&)[IndicesN],IvcIndex> >
 operator()(const IndicesT (&indices)[IndicesN]) EIGEN_INDEXED_VIEW_METHOD_CONST
 {
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
@@ -200,8 +195,6 @@ operator()(const IndicesT (&indices)[IndicesN]) EIGEN_INDEXED_VIEW_METHOD_CONST
             (derived(), indices, IvcIndex(0));
 }
 
-#endif // EIGEN_HAS_STATIC_ARRAY_TEMPLATE
-
 #undef EIGEN_INDEXED_VIEW_METHOD_CONST
 #undef EIGEN_INDEXED_VIEW_METHOD_TYPE
 
@@ -218,7 +211,7 @@ operator()(const IndicesT (&indices)[IndicesN]) EIGEN_INDEXED_VIEW_METHOD_CONST
   *
   * Each parameter must either be:
   *  - An integer indexing a single row or column
-  *  - Eigen::all indexing the full set of respective rows or columns in increasing order
+  *  - Eigen::placeholders::all indexing the full set of respective rows or columns in increasing order
   *  - An ArithmeticSequence as returned by the Eigen::seq and Eigen::seqN functions
   *  - Any %Eigen's vector/array of integers or expressions
   *  - Plain C arrays: \c int[N]
@@ -235,7 +228,7 @@ operator()(const IndicesT (&indices)[IndicesN]) EIGEN_INDEXED_VIEW_METHOD_CONST
   * method will returns a Block object after extraction of the relevant information from the passed arguments. This is the case
   * when all arguments are either:
   *  - An integer
-  *  - Eigen::all
+  *  - Eigen::placeholders::all
   *  - An ArithmeticSequence with compile-time increment strictly equal to 1, as returned by Eigen::seq(a,b), and Eigen::seqN(a,N).
   *
   * Otherwise a more general IndexedView<Derived,RowIndices',ColIndices'> object will be returned, after conversion of the inputs
diff --git a/libs/eigen/Eigen/src/plugins/InternalHeaderCheck.h b/libs/eigen/Eigen/src/plugins/InternalHeaderCheck.h
new file mode 100644
index 0000000..ac6821d
--- /dev/null
+++ b/libs/eigen/Eigen/src/plugins/InternalHeaderCheck.h
@@ -0,0 +1,3 @@
+#ifndef EIGEN_CORE_MODULE_H
+#error "Please include Eigen/plugins instead of including headers inside the src directory directly."
+#endif
diff --git a/libs/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h b/libs/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h
index a0feef8..46fe08c 100644
--- a/libs/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h
+++ b/libs/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h
@@ -72,23 +72,24 @@ cwiseNotEqual(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
   *
   * \sa class CwiseBinaryOp, max()
   */
-template<typename OtherDerived>
+template<int NaNPropagation=PropagateFast, typename OtherDerived>
 EIGEN_DEVICE_FUNC
-EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_min_op<Scalar,Scalar>, const Derived, const OtherDerived>
+EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_min_op<Scalar,Scalar,NaNPropagation>, const Derived, const OtherDerived>
 cwiseMin(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
 {
-  return CwiseBinaryOp<internal::scalar_min_op<Scalar,Scalar>, const Derived, const OtherDerived>(derived(), other.derived());
+  return CwiseBinaryOp<internal::scalar_min_op<Scalar,Scalar,NaNPropagation>, const Derived, const OtherDerived>(derived(), other.derived());
 }
 
 /** \returns an expression of the coefficient-wise min of *this and scalar \a other
   *
   * \sa class CwiseBinaryOp, min()
   */
+template<int NaNPropagation=PropagateFast>
 EIGEN_DEVICE_FUNC
-EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_min_op<Scalar,Scalar>, const Derived, const ConstantReturnType>
+EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_min_op<Scalar,Scalar,NaNPropagation>, const Derived, const ConstantReturnType>
 cwiseMin(const Scalar &other) const
 {
-  return cwiseMin(Derived::Constant(rows(), cols(), other));
+  return cwiseMin<NaNPropagation>(Derived::Constant(rows(), cols(), other));
 }
 
 /** \returns an expression of the coefficient-wise max of *this and \a other
@@ -98,23 +99,24 @@ cwiseMin(const Scalar &other) const
   *
   * \sa class CwiseBinaryOp, min()
   */
-template<typename OtherDerived>
+template<int NaNPropagation=PropagateFast, typename OtherDerived>
 EIGEN_DEVICE_FUNC
-EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_max_op<Scalar,Scalar>, const Derived, const OtherDerived>
+EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_max_op<Scalar,Scalar,NaNPropagation>, const Derived, const OtherDerived>
 cwiseMax(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
 {
-  return CwiseBinaryOp<internal::scalar_max_op<Scalar,Scalar>, const Derived, const OtherDerived>(derived(), other.derived());
+  return CwiseBinaryOp<internal::scalar_max_op<Scalar,Scalar,NaNPropagation>, const Derived, const OtherDerived>(derived(), other.derived());
 }
 
 /** \returns an expression of the coefficient-wise max of *this and scalar \a other
   *
   * \sa class CwiseBinaryOp, min()
   */
+template<int NaNPropagation=PropagateFast>
 EIGEN_DEVICE_FUNC
-EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_max_op<Scalar,Scalar>, const Derived, const ConstantReturnType>
+EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_max_op<Scalar,Scalar,NaNPropagation>, const Derived, const ConstantReturnType>
 cwiseMax(const Scalar &other) const
 {
-  return cwiseMax(Derived::Constant(rows(), cols(), other));
+  return cwiseMax<NaNPropagation>(Derived::Constant(rows(), cols(), other));
 }
 
 
diff --git a/libs/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h b/libs/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h
index 0514d8f..98d925d 100644
--- a/libs/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h
+++ b/libs/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h
@@ -93,3 +93,13 @@ EIGEN_DOC_UNARY_ADDONS(cwiseArg,arg)
 EIGEN_DEVICE_FUNC
 inline const CwiseArgReturnType
 cwiseArg() const { return CwiseArgReturnType(derived()); }
+
+template <typename ScalarExponent>
+using CwisePowReturnType =
+    std::enable_if_t<internal::is_arithmetic<typename NumTraits<ScalarExponent>::Real>::value,
+                     CwiseUnaryOp<internal::scalar_unary_pow_op<Scalar, ScalarExponent>, const Derived>>;
+
+template <typename ScalarExponent>
+EIGEN_DEVICE_FUNC inline const CwisePowReturnType<ScalarExponent> cwisePow(const ScalarExponent& exponent) const {
+  return CwisePowReturnType<ScalarExponent>(derived(), internal::scalar_unary_pow_op<Scalar, ScalarExponent>(exponent));
+}
diff --git a/libs/eigen/Eigen/src/plugins/ReshapedMethods.h b/libs/eigen/Eigen/src/plugins/ReshapedMethods.h
index 482a6b0..2cb1cf6 100644
--- a/libs/eigen/Eigen/src/plugins/ReshapedMethods.h
+++ b/libs/eigen/Eigen/src/plugins/ReshapedMethods.h
@@ -105,13 +105,13 @@ EIGEN_DEVICE_FUNC
 inline Reshaped<EIGEN_RESHAPED_METHOD_CONST Derived,
                 internal::get_compiletime_reshape_size<NRowsType,NColsType,SizeAtCompileTime>::value,
                 internal::get_compiletime_reshape_size<NColsType,NRowsType,SizeAtCompileTime>::value,
-                internal::get_compiletime_reshape_order<Flags,Order>::value>
+                internal::get_compiletime_reshape_order(Flags, Order)>
 reshaped(NRowsType nRows, NColsType nCols) EIGEN_RESHAPED_METHOD_CONST
 {
   return Reshaped<EIGEN_RESHAPED_METHOD_CONST Derived,
                   internal::get_compiletime_reshape_size<NRowsType,NColsType,SizeAtCompileTime>::value,
                   internal::get_compiletime_reshape_size<NColsType,NRowsType,SizeAtCompileTime>::value,
-                  internal::get_compiletime_reshape_order<Flags,Order>::value>
+                  internal::get_compiletime_reshape_order(Flags, Order)>
                 (derived(),
                  internal::get_runtime_reshape_size(nRows,internal::get_runtime_value(nCols),size()),
                  internal::get_runtime_reshape_size(nCols,internal::get_runtime_value(nRows),size()));
@@ -129,12 +129,12 @@ reshaped() EIGEN_RESHAPED_METHOD_CONST
 template<int Order>
 EIGEN_DEVICE_FUNC
 inline Reshaped<EIGEN_RESHAPED_METHOD_CONST Derived, SizeAtCompileTime, 1,
-                internal::get_compiletime_reshape_order<Flags,Order>::value>
+                internal::get_compiletime_reshape_order(Flags, Order)>
 reshaped() EIGEN_RESHAPED_METHOD_CONST
 {
   EIGEN_STATIC_ASSERT(Order==RowMajor || Order==ColMajor || Order==AutoOrder, INVALID_TEMPLATE_PARAMETER);
   return Reshaped<EIGEN_RESHAPED_METHOD_CONST Derived, SizeAtCompileTime, 1,
-                  internal::get_compiletime_reshape_order<Flags,Order>::value>
+                  internal::get_compiletime_reshape_order(Flags, Order)>
                 (derived(), size(), 1);
 }
 
diff --git a/libs/eigen/bench/btl/generic_bench/timers/portable_timer.hh b/libs/eigen/bench/btl/generic_bench/timers/portable_timer.hh
old mode 100755
new mode 100644
diff --git a/libs/eigen/bench/btl/libs/STL/STL_interface.hh b/libs/eigen/bench/btl/libs/STL/STL_interface.hh
index 16658c4..5b391c6 100644
--- a/libs/eigen/bench/btl/libs/STL/STL_interface.hh
+++ b/libs/eigen/bench/btl/libs/STL/STL_interface.hh
@@ -84,9 +84,12 @@ public :
     for (int j=0;j<N;j++){
       for (int i=0;i<N;i++){
         somme=0.0;
+        if(i>=j)
+        {
         for (int k=0;k<N;k++)
           somme += A[i][k]*A[j][k];
         X[j][i]=somme;
+	}
       }
     }
   }
diff --git a/libs/eigen/bench/dense_solvers.cpp b/libs/eigen/bench/dense_solvers.cpp
index 24343dc..11c755b 100644
--- a/libs/eigen/bench/dense_solvers.cpp
+++ b/libs/eigen/bench/dense_solvers.cpp
@@ -38,8 +38,6 @@ void bench(int id, int rows, int size = Size)
     A = A*A.adjoint();
   BenchTimer t_llt, t_ldlt, t_lu, t_fplu, t_qr, t_cpqr, t_cod, t_fpqr, t_jsvd, t_bdcsvd;
 
-  int svd_opt = ComputeThinU|ComputeThinV;
-  
   int tries = 5;
   int rep = 1000/size;
   if(rep==0) rep = 1;
@@ -53,8 +51,8 @@ void bench(int id, int rows, int size = Size)
   ColPivHouseholderQR<Mat> cpqr(A.rows(),A.cols());
   CompleteOrthogonalDecomposition<Mat> cod(A.rows(),A.cols());
   FullPivHouseholderQR<Mat> fpqr(A.rows(),A.cols());
-  JacobiSVD<MatDyn> jsvd(A.rows(),A.cols());
-  BDCSVD<MatDyn> bdcsvd(A.rows(),A.cols());
+  JacobiSVD<MatDyn, ComputeThinU|ComputeThinV> jsvd(A.rows(),A.cols());
+  BDCSVD<MatDyn, ComputeThinU|ComputeThinV> bdcsvd(A.rows(),A.cols());
   
   BENCH(t_llt, tries, rep, compute_norm_equation(llt,A));
   BENCH(t_ldlt, tries, rep, compute_norm_equation(ldlt,A));
@@ -67,9 +65,9 @@ void bench(int id, int rows, int size = Size)
   if(size*rows<=10000000)
     BENCH(t_fpqr, tries, rep, compute(fpqr,A));
   if(size<500) // JacobiSVD is really too slow for too large matrices
-    BENCH(t_jsvd, tries, rep, jsvd.compute(A,svd_opt));
+    BENCH(t_jsvd, tries, rep, jsvd.compute(A));
 //   if(size*rows<=20000000)
-    BENCH(t_bdcsvd, tries, rep, bdcsvd.compute(A,svd_opt));
+    BENCH(t_bdcsvd, tries, rep, bdcsvd.compute(A));
   
   results["LLT"][id] = t_llt.best();
   results["LDLT"][id] = t_ldlt.best();
diff --git a/libs/eigen/bench/spbench/CMakeLists.txt b/libs/eigen/bench/spbench/CMakeLists.txt
index b186004..75c36b0 100644
--- a/libs/eigen/bench/spbench/CMakeLists.txt
+++ b/libs/eigen/bench/spbench/CMakeLists.txt
@@ -1,7 +1,7 @@
 
 
-set(BLAS_FOUND TRUE)
-set(LAPACK_FOUND TRUE)
+set(BLAS_FOUND EIGEN_BUILD_BLAS)
+set(LAPACK_FOUND EIGEN_BUILD_LAPACK)
 set(BLAS_LIBRARIES eigen_blas_static)
 set(LAPACK_LIBRARIES eigen_lapack_static)
 
diff --git a/libs/eigen/bench/tensors/tensor_benchmarks.h b/libs/eigen/bench/tensors/tensor_benchmarks.h
index 0825e15..1a7a0fe 100644
--- a/libs/eigen/bench/tensors/tensor_benchmarks.h
+++ b/libs/eigen/bench/tensors/tensor_benchmarks.h
@@ -219,14 +219,8 @@ template <typename Device, typename T> class BenchmarkSuite {
     size_b[1] = m_;
     TensorMap<Tensor<T, 2>, Eigen::Aligned> B(b_, size_b);
 
-#if defined(EIGEN_HAS_INDEX_LIST)
     Eigen::IndexPairList<Eigen::type2indexpair<0, 0>,
                          Eigen::type2indexpair<2, 1> > paddings;
-#else
-    Eigen::array<Eigen::IndexPair<TensorIndex>, 2> paddings;
-    paddings[0] = Eigen::IndexPair<TensorIndex>(0, 0);
-    paddings[1] = Eigen::IndexPair<TensorIndex>(2, 1);
-#endif
 #ifdef EIGEN_USE_SYCL // warmup for sycl
     for (int iter = 0; iter < 10; ++iter) {
       B.device(device_) = A.pad(paddings);
@@ -251,15 +245,7 @@ template <typename Device, typename T> class BenchmarkSuite {
     size_b[1] = k_/2;
     TensorMap<Tensor<T, 2>, Eigen::Aligned> B(b_, size_b);
 
-#ifndef EIGEN_HAS_INDEX_LIST
-    Eigen::array<TensorIndex, 2> strides;
-    strides[0] = 1;
-    strides[1] = 2;
-#else
-    // Take advantage of cxx11 to give the compiler information it can use to
-    // optimize the code.
     Eigen::IndexList<Eigen::type2index<1>, Eigen::type2index<2> > strides;
-#endif
 
 #ifdef EIGEN_USE_SYCL // warmup for sycl
     for (int iter = 0; iter < 10; ++iter) {
@@ -284,17 +270,8 @@ template <typename Device, typename T> class BenchmarkSuite {
     size_c[0] = m_;
     size_c[1] = n_;
     TensorMap<Tensor<T, 2>, Eigen::Aligned> C(c_, size_c);
-
-#ifndef EIGEN_HAS_INDEX_LIST
-    Eigen::array<int, 2> broadcast;
-    broadcast[0] = 1;
-    broadcast[1] = n_;
-#else
-    // Take advantage of cxx11 to give the compiler information it can use to
-    // optimize the code.
     Eigen::IndexList<Eigen::type2index<1>, int> broadcast;
     broadcast.set(1, n_);
-#endif
 
 #ifdef EIGEN_USE_SYCL // warmup for sycl
     for (int iter = 0; iter < 10; ++iter) {
@@ -385,15 +362,7 @@ for (int iter = 0; iter < 10; ++iter) {
     Eigen::array<TensorIndex, 1> output_size;
     output_size[0] = n_;
     TensorMap<Tensor<T, 1, 0, TensorIndex>, Eigen::Aligned> C(c_, output_size);
-
-#ifndef EIGEN_HAS_INDEX_LIST
-    Eigen::array<TensorIndex, 1> sum_along_dim;
-    sum_along_dim[0] = 0;
-#else
-    // Take advantage of cxx11 to give the compiler information it can use to
-    // optimize the code.
     Eigen::IndexList<Eigen::type2index<0>> sum_along_dim;
-#endif
 #ifdef EIGEN_USE_SYCL // warmup for sycl
   for (int iter = 0; iter < 10; ++iter) {
     C.device(device_) = B.sum(sum_along_dim);
@@ -564,9 +533,9 @@ for (int iter = 0; iter < 10; ++iter) {
 
     // Initialize the content of the memory pools to prevent asan from
     // complaining.
-    device_.memset(a_, 12, m_ * k_ * sizeof(T));
-    device_.memset(b_, 23, k_ * n_ * sizeof(T));
-    device_.memset(c_, 31, m_ * n_ * sizeof(T));
+    device_.fill(a_, a_ + m_ * k_, T(12));
+    device_.fill(b_, b_ + k_ * n_, T(23));
+    device_.fill(c_, c_ + m_ * n_, T(31));
 
   }
 
diff --git a/libs/eigen/bench/tensors/tensor_contract_sycl_bench.cc b/libs/eigen/bench/tensors/tensor_contract_sycl_bench.cc
index 8f2defe..c2d098e 100644
--- a/libs/eigen/bench/tensors/tensor_contract_sycl_bench.cc
+++ b/libs/eigen/bench/tensors/tensor_contract_sycl_bench.cc
@@ -56,9 +56,9 @@ void contraction(const Device& device_, TensorIndex num_iters, TensorIndex m_, T
 
   // Initialize the content of the memory pools to prevent asan from
   // complaining.
-  device_.memset(a_, 12, m_ * k_ * sizeof(T));
-  device_.memset(b_, 23, k_ * n_ * sizeof(T));
-  device_.memset(c_, 31, m_ * n_ * sizeof(T));
+  device_.fill(a_, m_ * k_, T(12));
+  device_.fill(b_, k_ * n_, T(23));
+  device_.fill(c_, m_ * n_, T(31));
 
   Eigen::array<TensorIndex, 2> sizeA;
   sizeA[0] = m_;
diff --git a/libs/eigen/blas/BandTriangularSolver.h b/libs/eigen/blas/BandTriangularSolver.h
index ce2d74d..e8551cd 100644
--- a/libs/eigen/blas/BandTriangularSolver.h
+++ b/libs/eigen/blas/BandTriangularSolver.h
@@ -29,11 +29,9 @@ struct band_solve_triangular_selector<Index,Mode,LhsScalar,ConjLhs,RhsScalar,Row
   {
     const LhsMap lhs(_lhs,size,k+1,OuterStride<>(lhsStride));
     RhsMap other(_other,size,1);
-    typename internal::conditional<
-                          ConjLhs,
-                          const CwiseUnaryOp<typename internal::scalar_conjugate_op<LhsScalar>,LhsMap>,
-                          const LhsMap&>
-                        ::type cjLhs(lhs);
+    std::conditional_t<ConjLhs,
+                  const CwiseUnaryOp<typename internal::scalar_conjugate_op<LhsScalar>,LhsMap>,
+                  const LhsMap&> cjLhs(lhs);
                         
     for(int col=0 ; col<other.cols() ; ++col)
     {
@@ -65,11 +63,9 @@ struct band_solve_triangular_selector<Index,Mode,LhsScalar,ConjLhs,RhsScalar,Col
   {
     const LhsMap lhs(_lhs,k+1,size,OuterStride<>(lhsStride));
     RhsMap other(_other,size,1);
-    typename internal::conditional<
-                          ConjLhs,
-                          const CwiseUnaryOp<typename internal::scalar_conjugate_op<LhsScalar>,LhsMap>,
-                          const LhsMap&>
-                        ::type cjLhs(lhs);
+    std::conditional_t<ConjLhs,
+                  const CwiseUnaryOp<typename internal::scalar_conjugate_op<LhsScalar>,LhsMap>,
+                  const LhsMap&> cjLhs(lhs);
                         
     for(int col=0 ; col<other.cols() ; ++col)
     {
diff --git a/libs/eigen/blas/CMakeLists.txt b/libs/eigen/blas/CMakeLists.txt
index f3a94ec..c530957 100644
--- a/libs/eigen/blas/CMakeLists.txt
+++ b/libs/eigen/blas/CMakeLists.txt
@@ -1,6 +1,7 @@
 
 project(EigenBlas CXX)
 
+if(EIGEN_BUILD_BLAS)
 include(CheckLanguage)
 check_language(Fortran)
 if(CMAKE_Fortran_COMPILER)
@@ -50,7 +51,7 @@ endforeach()
 
 if(EIGEN_Fortran_COMPILER_WORKS)
 
-if(BUILD_TESTING)
+if(EIGEN_BUILD_TESTING)
   if(EIGEN_LEAVE_TEST_IN_ALL_TARGET)
     add_subdirectory(testing) # can't do EXCLUDE_FROM_ALL here, breaks CTest
   else()
@@ -59,4 +60,4 @@ if(BUILD_TESTING)
 endif()
 
 endif()
-
+endif()
diff --git a/libs/eigen/blas/PackedTriangularMatrixVector.h b/libs/eigen/blas/PackedTriangularMatrixVector.h
index 0039536..cc2a9b8 100644
--- a/libs/eigen/blas/PackedTriangularMatrixVector.h
+++ b/libs/eigen/blas/PackedTriangularMatrixVector.h
@@ -31,15 +31,16 @@ struct packed_triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsS
     typedef typename conj_expr_if<ConjLhs,LhsMap>::type ConjLhsType;
     typedef Map<Matrix<ResScalar,Dynamic,1> > ResMap;
 
-    for (Index i=0; i<size; ++i)
-    {
-      Index s = IsLower&&(HasUnitDiag||HasZeroDiag) ? 1 : 0;
-      Index r = IsLower ? size-i: i+1;
-      if (EIGEN_IMPLIES(HasUnitDiag||HasZeroDiag, (--r)>0))
-	ResMap(res+(IsLower ? s+i : 0),r) += alpha * cj(rhs[i]) * ConjLhsType(LhsMap(lhs+s,r));
-      if (HasUnitDiag)
-	res[i] += alpha * cj(rhs[i]);
-      lhs += IsLower ? size-i: i+1;
+    for (Index i = 0; i < size; ++i) {
+      Index s = IsLower && (HasUnitDiag || HasZeroDiag) ? 1 : 0;
+      Index r = IsLower ? size - i : i + 1;
+      if (!(HasUnitDiag || HasZeroDiag) || (--r > 0)) {
+        ResMap(res + (IsLower ? s + i : 0), r) += alpha * cj(rhs[i]) * ConjLhsType(LhsMap(lhs + s, r));
+      }
+      if (HasUnitDiag) {
+        res[i] += alpha * cj(rhs[i]);
+      }
+      lhs += IsLower ? size - i : i + 1;
     }
   };
 };
@@ -61,15 +62,16 @@ struct packed_triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsS
     typedef Map<const Matrix<RhsScalar,Dynamic,1> > RhsMap;
     typedef typename conj_expr_if<ConjRhs,RhsMap>::type ConjRhsType;
 
-    for (Index i=0; i<size; ++i)
-    {
-      Index s = !IsLower&&(HasUnitDiag||HasZeroDiag) ? 1 : 0;
-      Index r = IsLower ? i+1 : size-i;
-      if (EIGEN_IMPLIES(HasUnitDiag||HasZeroDiag, (--r)>0))
-	res[i] += alpha * (ConjLhsType(LhsMap(lhs+s,r)).cwiseProduct(ConjRhsType(RhsMap(rhs+(IsLower ? 0 : s+i),r)))).sum();
-      if (HasUnitDiag)
-	res[i] += alpha * cj(rhs[i]);
-      lhs += IsLower ? i+1 : size-i;
+    for (Index i = 0; i < size; ++i) {
+      Index s = !IsLower && (HasUnitDiag || HasZeroDiag) ? 1 : 0;
+      Index r = IsLower ? i + 1 : size - i;
+      if (!(HasUnitDiag || HasZeroDiag) || (--r > 0)) {
+        res[i] += alpha * (ConjLhsType(LhsMap(lhs + s, r)).cwiseProduct(ConjRhsType(RhsMap(rhs + (IsLower ? 0 : s + i), r)))).sum();
+      }
+      if (HasUnitDiag) {
+        res[i] += alpha * cj(rhs[i]);
+      }
+      lhs += IsLower ? i + 1 : size - i;
     }
   };
 };
diff --git a/libs/eigen/blas/common.h b/libs/eigen/blas/common.h
index a9b6978..a938cb1 100644
--- a/libs/eigen/blas/common.h
+++ b/libs/eigen/blas/common.h
@@ -149,7 +149,7 @@ T* get_compact_vector(T* x, int n, int incx)
   if(incx==1)
     return x;
 
-  typename Eigen::internal::remove_const<T>::type* ret = new Scalar[n];
+  std::remove_const_t<T>* ret = new Scalar[n];
   if(incx<0) make_vector(ret,n) = make_vector(x,n,-incx).reverse();
   else       make_vector(ret,n) = make_vector(x,n, incx);
   return ret;
diff --git a/libs/eigen/blas/level1_cplx_impl.h b/libs/eigen/blas/level1_cplx_impl.h
index 6c7edd7..aa46784 100644
--- a/libs/eigen/blas/level1_cplx_impl.h
+++ b/libs/eigen/blas/level1_cplx_impl.h
@@ -11,7 +11,6 @@
 
 struct scalar_norm1_op {
   typedef RealScalar result_type;
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_norm1_op)
   inline RealScalar operator() (const Scalar& a) const { return numext::norm1(a); }
 };
 namespace Eigen {
diff --git a/libs/eigen/blas/level3_impl.h b/libs/eigen/blas/level3_impl.h
index 6dd6338..66216c9 100644
--- a/libs/eigen/blas/level3_impl.h
+++ b/libs/eigen/blas/level3_impl.h
@@ -362,18 +362,18 @@ int EIGEN_BLAS_FUNC(syrk)(const char *uplo, const char *op, const int *n, const
   typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, DenseIndex, const Scalar&, internal::level3_blocking<Scalar,Scalar>&);
   static const functype func[8] = {
     // array index: NOTR  | (UP << 2)
-    (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,ColMajor,false,Scalar,RowMajor,ColMajor,Conj, 1, Upper>::run),
+    (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,ColMajor,false,Scalar,RowMajor,Conj,ColMajor, 1, Upper>::run),
     // array index: TR    | (UP << 2)
-    (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,RowMajor,false,Scalar,ColMajor,ColMajor,Conj, 1, Upper>::run),
+    (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,RowMajor,false,Scalar,ColMajor,Conj,ColMajor, 1, Upper>::run),
     // array index: ADJ   | (UP << 2)
-    (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,ColMajor,ColMajor,false,1, Upper>::run),
+    (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,ColMajor,false,ColMajor, 1, Upper>::run),
     0,
     // array index: NOTR  | (LO << 2)
-    (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,ColMajor,false,Scalar,RowMajor,ColMajor,Conj, 1, Lower>::run),
+    (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,ColMajor,false,Scalar,RowMajor,Conj,ColMajor, 1, Lower>::run),
     // array index: TR    | (LO << 2)
-    (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,RowMajor,false,Scalar,ColMajor,ColMajor,Conj, 1, Lower>::run),
+    (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,RowMajor,false,Scalar,ColMajor,Conj,ColMajor, 1, Lower>::run),
     // array index: ADJ   | (LO << 2)
-    (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,ColMajor,ColMajor,false,1, Lower>::run),
+    (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,ColMajor,false,ColMajor, 1, Lower>::run),
     0
   };
   #endif
diff --git a/libs/eigen/ci/README.md b/libs/eigen/ci/README.md
index 8395b16..6a63eef 100644
--- a/libs/eigen/ci/README.md
+++ b/libs/eigen/ci/README.md
@@ -9,48 +9,30 @@ The build stage consists of the following jobs:
 
 | Job Name                                 | Arch      | OS             | Compiler   | C++11   |
 |------------------------------------------|-----------|----------------|------------|---------|
-| `build:x86-64:linux:gcc-4.8:cxx11-off`   | `x86-64`  | `Ubuntu 18.04` | `GCC-4.8`  | `Off`   |
 | `build:x86-64:linux:gcc-4.8:cxx11-on`    | `x86-64`  | `Ubuntu 18.04` | `GCC-4.8`  | `On`    |
-| `build:x86-64:linux:gcc-9:cxx11-off`     | `x86-64`  | `Ubuntu 18.04` | `GCC-9`    | `Off`   |
 | `build:x86-64:linux:gcc-9:cxx11-on`      | `x86-64`  | `Ubuntu 18.04` | `GCC-9`    | `On`    |
-| `build:x86-64:linux:gcc-10:cxx11-off`    | `x86-64`  | `Ubuntu 18.04` | `GCC-10`   | `Off`   |
 | `build:x86-64:linux:gcc-10:cxx11-on`     | `x86-64`  | `Ubuntu 18.04` | `GCC-10`   | `On`    |
-| `build:x86-64:linux:clang-10:cxx11-off`  | `x86-64`  | `Ubuntu 18.04` | `Clang-10` | `Off`   |
 | `build:x86-64:linux:clang-10:cxx11-on`   | `x86-64`  | `Ubuntu 18.04` | `Clang-10` | `On`    |
-| `build:aarch64:linux:gcc-10:cxx11-off`   | `AArch64` | `Ubuntu 18.04` | `GCC-10`   | `Off`   |
 | `build:aarch64:linux:gcc-10:cxx11-on`    | `AArch64` | `Ubuntu 18.04` | `GCC-10`   | `On`    |
-| `build:aarch64:linux:clang-10:cxx11-off` | `AArch64` | `Ubuntu 18.04` | `Clang-10` | `Off`   |
 | `build:aarch64:linux:clang-10:cxx11-on`  | `AArch64` | `Ubuntu 18.04` | `Clang-10` | `On`    |
 
 ### Test stage
 
 In principle every build-job has a corresponding test-job, however testing supported and unsupported modules is divided into separate jobs. The test jobs in detail:
 
-### Job dependecies
+### Job dependencies
 
 | Job Name                                            | Arch      | OS             | Compiler   | C++11   | Module
 |-----------------------------------------------------|-----------|----------------|------------|---------|--------
-| `test:x86-64:linux:gcc-4.8:cxx11-off:official`      | `x86-64`  | `Ubuntu 18.04` | `GCC-4.8`  | `Off`   | `Official`
-| `test:x86-64:linux:gcc-4.8:cxx11-off:unsupported`   | `x86-64`  | `Ubuntu 18.04` | `GCC-4.8`  | `Off`   | `Unsupported`
 | `test:x86-64:linux:gcc-4.8:cxx11-on:official`       | `x86-64`  | `Ubuntu 18.04` | `GCC-4.8`  | `On`    | `Official`
 | `test:x86-64:linux:gcc-4.8:cxx11-on:unsupported`    | `x86-64`  | `Ubuntu 18.04` | `GCC-4.8`  | `On`    | `Unsupported`
-| `test:x86-64:linux:gcc-9:cxx11-off:official`        | `x86-64`  | `Ubuntu 18.04` | `GCC-9`    | `Off`   | `Official`
-| `test:x86-64:linux:gcc-9:cxx11-off:unsupported`     | `x86-64`  | `Ubuntu 18.04` | `GCC-9`    | `Off`   | `Unsupported`
 | `test:x86-64:linux:gcc-9:cxx11-on:official`         | `x86-64`  | `Ubuntu 18.04` | `GCC-9`    | `On`    | `Official`
 | `test:x86-64:linux:gcc-9:cxx11-on:unsupported`      | `x86-64`  | `Ubuntu 18.04` | `GCC-9`    | `On`    | `Unsupported`
-| `test:x86-64:linux:gcc-10:cxx11-off:official`       | `x86-64`  | `Ubuntu 18.04` | `GCC-10`   | `Off`   | `Official`
-| `test:x86-64:linux:gcc-10:cxx11-off:unsupported`    | `x86-64`  | `Ubuntu 18.04` | `GCC-10`   | `Off`   | `Unsupported`
 | `test:x86-64:linux:gcc-10:cxx11-on:official`        | `x86-64`  | `Ubuntu 18.04` | `GCC-10`   | `On`    | `Official`
 | `test:x86-64:linux:gcc-10:cxx11-on:unsupported`     | `x86-64`  | `Ubuntu 18.04` | `GCC-10`   | `On`    | `Unsupported`
-| `test:x86-64:linux:clang-10:cxx11-off:official`     | `x86-64`  | `Ubuntu 18.04` | `Clang-10` | `Off`   | `Official`
-| `test:x86-64:linux:clang-10:cxx11-off:unsupported`  | `x86-64`  | `Ubuntu 18.04` | `Clang-10` | `Off`   | `Unsupported`
 | `test:x86-64:linux:clang-10:cxx11-on:official`      | `x86-64`  | `Ubuntu 18.04` | `Clang-10` | `On`    | `Official`
 | `test:x86-64:linux:clang-10:cxx11-on:unsupported`   | `x86-64`  | `Ubuntu 18.04` | `Clang-10` | `On`    | `Unsupported`
-| `test:aarch64:linux:gcc-10:cxx11-off:official`      | `AArch64` | `Ubuntu 18.04` | `GCC-10`   | `Off`   | `Official`
-| `test:aarch64:linux:gcc-10:cxx11-off:unsupported`   | `AArch64` | `Ubuntu 18.04` | `GCC-10`   | `Off`   | `Unsupported`
 | `test:aarch64:linux:gcc-10:cxx11-on:official`       | `AArch64` | `Ubuntu 18.04` | `GCC-10`   | `On`    | `Official`
 | `test:aarch64:linux:gcc-10:cxx11-on:unsupported`    | `AArch64` | `Ubuntu 18.04` | `GCC-10`   | `On`    | `Unsupported`
-| `test:aarch64:linux:clang-10:cxx11-off:official`    | `AArch64` | `Ubuntu 18.04` | `Clang-10` | `Off`   | `Official`
-| `test:aarch64:linux:clang-10:cxx11-off:unsupported` | `AArch64` | `Ubuntu 18.04` | `Clang-10` | `Off`   | `Unsupported`
 | `test:aarch64:linux:clang-10:cxx11-on:official`     | `AArch64` | `Ubuntu 18.04` | `Clang-10` | `On`    | `Official`
 | `test:aarch64:linux:clang-10:cxx11-on:unsupported`  | `AArch64` | `Ubuntu 18.04` | `Clang-10` | `On`    | `Unsupported`
diff --git a/libs/eigen/ci/build.gitlab-ci.yml b/libs/eigen/ci/build.gitlab-ci.yml
index 6b9f415..073212a 100644
--- a/libs/eigen/ci/build.gitlab-ci.yml
+++ b/libs/eigen/ci/build.gitlab-ci.yml
@@ -23,41 +23,19 @@
     - schedules
 
 ######## x86-64 ################################################################
-# GCC-4.8 (the oldest compiler we support)
-build:x86-64:linux:gcc-4.8:cxx11-off:
-  extends: .build:linux:base
-  variables:
-    EIGEN_CI_CXX_COMPILER: "g++-4.8"
-    EIGEN_CI_CC_COMPILER: "gcc-4.8"
-    EIGEN_TEST_CXX11: "off"
-  tags:
-    - eigen-runner
-    - linux
-    - x86-64
-
-build:x86-64:linux:gcc-4.8:cxx11-on:
-  extends: .build:linux:base
-  variables:
-    EIGEN_CI_CXX_COMPILER: "g++-4.8"
-    EIGEN_CI_CC_COMPILER: "gcc-4.8"
-    EIGEN_TEST_CXX11: "on"
-  tags:
-    - eigen-runner
-    - linux
-    - x86-64
+# # GCC-4.8 (the oldest compiler we support)
+# build:x86-64:linux:gcc-4.8:cxx11-on:
+#   extends: .build:linux:base
+#   variables:
+#     EIGEN_CI_CXX_COMPILER: "g++-4.8"
+#     EIGEN_CI_CC_COMPILER: "gcc-4.8"
+#     EIGEN_TEST_CXX11: "on"
+#   tags:
+#     - eigen-runner
+#     - linux
+#     - x86-64
 
 # GCC-9
-build:x86-64:linux:gcc-9:cxx11-off:
-  extends: .build:linux:base
-  variables:
-    EIGEN_CI_CXX_COMPILER: "g++-9"
-    EIGEN_CI_CC_COMPILER: "gcc-9"
-    EIGEN_TEST_CXX11: "off"
-  tags:
-    - eigen-runner
-    - linux
-    - x86-64
-
 build:x86-64:linux:gcc-9:cxx11-on:
   extends: .build:linux:base
   variables:
@@ -70,17 +48,6 @@ build:x86-64:linux:gcc-9:cxx11-on:
     - x86-64
 
 # GCC-10
-build:x86-64:linux:gcc-10:cxx11-off:
-  extends: .build:linux:base
-  variables:
-    EIGEN_CI_CXX_COMPILER: "g++-10"
-    EIGEN_CI_CC_COMPILER: "gcc-10"
-    EIGEN_TEST_CXX11: "off"
-  tags:
-    - eigen-runner
-    - linux
-    - x86-64
-
 build:x86-64:linux:gcc-10:cxx11-on:
   extends: .build:linux:base
   variables:
@@ -93,17 +60,6 @@ build:x86-64:linux:gcc-10:cxx11-on:
     - x86-64
 
 # Clang-10
-build:x86-64:linux:clang-10:cxx11-off:
-  extends: .build:linux:base
-  variables:
-    EIGEN_CI_CXX_COMPILER: "clang++-10"
-    EIGEN_CI_CC_COMPILER: "clang-10"
-    EIGEN_TEST_CXX11: "off"
-  tags:
-    - eigen-runner
-    - linux
-    - x86-64
-
 build:x86-64:linux:clang-10:cxx11-on:
   extends: .build:linux:base
   variables:
@@ -115,19 +71,22 @@ build:x86-64:linux:clang-10:cxx11-on:
     - linux
     - x86-64
 
-######## AArch64 ###############################################################
-# GCC-10
-build:aarch64:linux:gcc-10:cxx11-off:
+# Clang-10, AVX512
+build:x86-64:linux:clang-10:cxx11-on:avx512:
   extends: .build:linux:base
   variables:
-    EIGEN_CI_CXX_COMPILER: "g++-10"
-    EIGEN_CI_CC_COMPILER: "gcc-10"
-    EIGEN_TEST_CXX11: "off"
+    EIGEN_CI_CXX_COMPILER: "clang++-10"
+    EIGEN_CI_CC_COMPILER: "clang-10"
+    EIGEN_TEST_CXX11: "on"
+    EIGEN_TEST_AVX512DQ: "on"
   tags:
     - eigen-runner
     - linux
-    - aarch64
+    - x86-64
+    - avx512
 
+######## AArch64 ###############################################################
+# GCC-10
 build:aarch64:linux:gcc-10:cxx11-on:
   extends: .build:linux:base
   variables:
@@ -139,18 +98,7 @@ build:aarch64:linux:gcc-10:cxx11-on:
     - linux
     - aarch64
 
-# Clang-10
-build:aarch64:linux:clang-10:cxx11-off:
-  extends: .build:linux:base
-  variables:
-    EIGEN_CI_CXX_COMPILER: "clang++-10"
-    EIGEN_CI_CC_COMPILER: "clang-10"
-    EIGEN_TEST_CXX11: "off"
-  tags:
-    - eigen-runner
-    - linux
-    - aarch64
-
+# # Clang-10
 build:aarch64:linux:clang-10:cxx11-on:
   extends: .build:linux:base
   variables:
@@ -166,18 +114,6 @@ build:aarch64:linux:clang-10:cxx11-on:
 # Currently all ppc64le jobs are allowed to fail
 
 # GCC-10
-build:ppc64le:linux:gcc-10:cxx11-off:
-  allow_failure: true
-  extends: .build:linux:base
-  variables:
-    EIGEN_CI_CXX_COMPILER: "g++-10"
-    EIGEN_CI_CC_COMPILER: "gcc-10"
-    EIGEN_TEST_CXX11: "off"
-  tags:
-    - eigen-runner
-    - linux
-    - ppc64le
-
 build:ppc64le:linux:gcc-10:cxx11-on:
   allow_failure: true
   extends: .build:linux:base
@@ -185,24 +121,13 @@ build:ppc64le:linux:gcc-10:cxx11-on:
     EIGEN_CI_CXX_COMPILER: "g++-10"
     EIGEN_CI_CC_COMPILER: "gcc-10"
     EIGEN_TEST_CXX11: "on"
+    EIGEN_CI_ADDITIONAL_ARGS: "-DCMAKE_CXX_FLAGS='-DEIGEN_ALTIVEC_DISABLE_MMA'"
   tags:
     - eigen-runner
     - linux
     - ppc64le
 
-# # Clang-10
-build:ppc64le:linux:clang-10:cxx11-off:
-  allow_failure: true
-  extends: .build:linux:base
-  variables:
-    EIGEN_CI_CXX_COMPILER: "clang++-10"
-    EIGEN_CI_CC_COMPILER: "clang-10"
-    EIGEN_TEST_CXX11: "off"
-  tags:
-    - eigen-runner
-    - linux
-    - ppc64le
-
+# Clang-10
 build:ppc64le:linux:clang-10:cxx11-on:
   allow_failure: true
   extends: .build:linux:base
diff --git a/libs/eigen/ci/smoketests.gitlab-ci.yml b/libs/eigen/ci/smoketests.gitlab-ci.yml
index 6384f10..c69d392 100644
--- a/libs/eigen/ci/smoketests.gitlab-ci.yml
+++ b/libs/eigen/ci/smoketests.gitlab-ci.yml
@@ -22,13 +22,6 @@
   only:
     - merge_requests
 
-buildsmoketests:x86-64:linux:gcc-10:cxx11-off:
-  extends: .buildsmoketests:linux:base
-  variables:
-    EIGEN_CI_CXX_COMPILER: "g++-10"
-    EIGEN_CI_CC_COMPILER: "gcc-10"
-    EIGEN_TEST_CXX11: "off"
-
 buildsmoketests:x86-64:linux:gcc-10:cxx11-on:
   extends: .buildsmoketests:linux:base
   variables:
@@ -36,13 +29,6 @@ buildsmoketests:x86-64:linux:gcc-10:cxx11-on:
     EIGEN_CI_CC_COMPILER: "gcc-10"
     EIGEN_TEST_CXX11: "on"
 
-buildsmoketests:x86-64:linux:clang-10:cxx11-off:
-  extends: .buildsmoketests:linux:base
-  variables:
-    EIGEN_CI_CXX_COMPILER: "clang++-10"
-    EIGEN_CI_CC_COMPILER: "clang-10"
-    EIGEN_TEST_CXX11: "off"
-
 buildsmoketests:x86-64:linux:clang-10:cxx11-on:
   extends: .buildsmoketests:linux:base
   variables:
@@ -61,9 +47,11 @@ buildsmoketests:x86-64:linux:clang-10:cxx11-on:
     - apt-get install --no-install-recommends -y ${EIGEN_CI_CXX_COMPILER}
       ${EIGEN_CI_CC_COMPILER} cmake ninja-build xsltproc
   script:
+    - export NPROC=`nproc`
+    - echo ${NPROC}
     - export CXX=${EIGEN_CI_CXX_COMPILER}
     - export CC=${EIGEN_CI_CC_COMPILER}
-    - cd ${BUILDDIR} && ctest --output-on-failure --no-compress-output
+    - cd ${BUILDDIR} && ctest -j${NPROC} --output-on-failure --no-compress-output
       --build-no-clean -T test -L smoketest
   after_script:
     - apt-get update -y
@@ -78,13 +66,6 @@ buildsmoketests:x86-64:linux:clang-10:cxx11-on:
   only:
     - merge_requests
 
-smoketests:x86-64:linux:gcc-10:cxx11-off:
-  extends: .smoketests:linux:base
-  variables:
-    EIGEN_CI_CXX_COMPILER: g++-10
-    EIGEN_CI_CC_COMPILER: gcc-10
-  needs: [ "buildsmoketests:x86-64:linux:gcc-10:cxx11-off" ]
-
 smoketests:x86-64:linux:gcc-10:cxx11-on:
   extends: .smoketests:linux:base
   variables:
@@ -92,13 +73,6 @@ smoketests:x86-64:linux:gcc-10:cxx11-on:
     EIGEN_CI_CC_COMPILER: gcc-10
   needs: [ "buildsmoketests:x86-64:linux:gcc-10:cxx11-on" ]
 
-smoketests:x86-64:linux:clang-10:cxx11-off:
-  extends: .smoketests:linux:base
-  variables:
-    EIGEN_CI_CXX_COMPILER: clang++-10
-    EIGEN_CI_CC_COMPILER: clang-10
-  needs: [ "buildsmoketests:x86-64:linux:clang-10:cxx11-off" ]
-
 smoketests:x86-64:linux:clang-10:cxx11-on:
   extends: .smoketests:linux:base
   variables:
diff --git a/libs/eigen/ci/test.gitlab-ci.yml b/libs/eigen/ci/test.gitlab-ci.yml
index 2a0f5dd..63012bd 100644
--- a/libs/eigen/ci/test.gitlab-ci.yml
+++ b/libs/eigen/ci/test.gitlab-ci.yml
@@ -10,9 +10,11 @@
     - apt-get install --no-install-recommends -y ${EIGEN_CI_CXX_COMPILER}
       ${EIGEN_CI_CC_COMPILER} cmake ninja-build xsltproc
   script:
+    - export NPROC=`nproc`
+    - echo ${NPROC}
     - export CXX=${EIGEN_CI_CXX_COMPILER}
     - export CC=${EIGEN_CI_CC_COMPILER}
-    - cd ${BUILDDIR} && ctest --output-on-failure --no-compress-output
+    - cd ${BUILDDIR} && ctest -j${NPROC} --output-on-failure --no-compress-output
       --build-no-clean -T test -L ${EIGEN_CI_TEST_LABEL}
   after_script:
     - apt-get update -y
@@ -28,78 +30,36 @@
     - schedules
 
 ##### x86-64 ###################################################################
-# GCC-4.8
-.test:x86-64:linux:gcc-4.8:cxx11-off:
-  extends: .test:linux:base
-  variables:
-    EIGEN_CI_CXX_COMPILER: g++-4.8
-    EIGEN_CI_CC_COMPILER: gcc-4.8
-  needs: [ "build:x86-64:linux:gcc-4.8:cxx11-off" ]
-  tags: 
-    - eigen-runner
-    - linux
-    - x86-64
+# # GCC-4.8
+# .test:x86-64:linux:gcc-4.8:cxx11-on:
+#   extends: .test:linux:base
+#   variables:
+#     EIGEN_CI_CXX_COMPILER: g++-4.8
+#     EIGEN_CI_CC_COMPILER: gcc-4.8
+#   needs: [ "build:x86-64:linux:gcc-4.8:cxx11-on" ]
+#   tags:
+#     - eigen-runner
+#     - linux
+#     - x86-64
 
-test:x86-64:linux:gcc-4.8:cxx11-off:official:
-  extends: .test:x86-64:linux:gcc-4.8:cxx11-off
-  variables:
-    EIGEN_CI_TEST_LABEL: "Official"
+# test:x86-64:linux:gcc-4.8:cxx11-on:official:
+#   extends: .test:x86-64:linux:gcc-4.8:cxx11-on
+#   variables:
+#     EIGEN_CI_TEST_LABEL: "Official"
 
-test:x86-64:linux:gcc-4.8:cxx11-off:unsupported:
-  extends: .test:x86-64:linux:gcc-4.8:cxx11-off
-  variables:
-    EIGEN_CI_TEST_LABEL: "Unsupported"
-
-.test:x86-64:linux:gcc-4.8:cxx11-on:
-  extends: .test:linux:base
-  variables:
-    EIGEN_CI_CXX_COMPILER: g++-4.8
-    EIGEN_CI_CC_COMPILER: gcc-4.8
-  needs: [ "build:x86-64:linux:gcc-4.8:cxx11-on" ]
-  tags: 
-    - eigen-runner
-    - linux
-    - x86-64
-
-test:x86-64:linux:gcc-4.8:cxx11-on:official:
-  extends: .test:x86-64:linux:gcc-4.8:cxx11-on
-  variables:
-    EIGEN_CI_TEST_LABEL: "Official"
-
-test:x86-64:linux:gcc-4.8:cxx11-on:unsupported:
-  extends: .test:x86-64:linux:gcc-4.8:cxx11-on
-  variables:
-    EIGEN_CI_TEST_LABEL: "Unsupported"
+# test:x86-64:linux:gcc-4.8:cxx11-on:unsupported:
+#   extends: .test:x86-64:linux:gcc-4.8:cxx11-on
+#   variables:
+#     EIGEN_CI_TEST_LABEL: "Unsupported"
 
 # GCC-9
-.test:x86-64:linux:gcc-9:cxx11-off:
-  extends: .test:linux:base
-  variables:
-    EIGEN_CI_CXX_COMPILER: g++-9
-    EIGEN_CI_CC_COMPILER: gcc-9
-  needs: [ "build:x86-64:linux:gcc-9:cxx11-off" ]
-  tags: 
-    - eigen-runner
-    - linux
-    - x86-64
-
-test:x86-64:linux:gcc-9:cxx11-off:official:
-  extends: .test:x86-64:linux:gcc-9:cxx11-off
-  variables:
-    EIGEN_CI_TEST_LABEL: "Official"
-
-test:x86-64:linux:gcc-9:cxx11-off:unsupported:
-  extends: .test:x86-64:linux:gcc-9:cxx11-off
-  variables:
-    EIGEN_CI_TEST_LABEL: "Unsupported"
-
 .test:x86-64:linux:gcc-9:cxx11-on:
   extends: .test:linux:base
   variables:
     EIGEN_CI_CXX_COMPILER: g++-9
     EIGEN_CI_CC_COMPILER: gcc-9
   needs: [ "build:x86-64:linux:gcc-9:cxx11-on" ]
-  tags: 
+  tags:
     - eigen-runner
     - linux
     - x86-64
@@ -115,35 +75,13 @@ test:x86-64:linux:gcc-9:cxx11-on:unsupported:
     EIGEN_CI_TEST_LABEL: "Unsupported"
 
 # GCC-10
-.test:x86-64:linux:gcc-10:cxx11-off:
-  extends: .test:linux:base
-  variables:
-    EIGEN_CI_CXX_COMPILER: g++-10
-    EIGEN_CI_CC_COMPILER: gcc-10
-  needs: [ "build:x86-64:linux:gcc-10:cxx11-off" ]
-  tags: 
-    - eigen-runner
-    - linux
-    - x86-64
-
-test:x86-64:linux:gcc-10:cxx11-off:official:
-  extends: .test:x86-64:linux:gcc-10:cxx11-off
-  allow_failure: true
-  variables:
-    EIGEN_CI_TEST_LABEL: "Official"
-
-test:x86-64:linux:gcc-10:cxx11-off:unsupported:
-  extends: .test:x86-64:linux:gcc-10:cxx11-off
-  variables:
-    EIGEN_CI_TEST_LABEL: "Unsupported"
-
 .test:x86-64:linux:gcc-10:cxx11-on:
   extends: .test:linux:base
   variables:
     EIGEN_CI_CXX_COMPILER: g++-10
     EIGEN_CI_CC_COMPILER: gcc-10
   needs: [ "build:x86-64:linux:gcc-10:cxx11-on" ]
-  tags: 
+  tags:
     - eigen-runner
     - linux
     - x86-64
@@ -161,34 +99,13 @@ test:x86-64:linux:gcc-10:cxx11-on:unsupported:
     EIGEN_CI_TEST_LABEL: "Unsupported"
 
 # Clang 10
-.test:x86-64:linux:clang-10:cxx11-off:
-  extends: .test:linux:base
-  variables:
-    EIGEN_CI_CXX_COMPILER: clang++-10
-    EIGEN_CI_CC_COMPILER: clang-10
-  needs: [ "build:x86-64:linux:clang-10:cxx11-off" ]
-  tags: 
-    - eigen-runner
-    - linux
-    - x86-64
-
-test:x86-64:linux:clang-10:cxx11-off:official:
-  extends: .test:x86-64:linux:clang-10:cxx11-off
-  variables:
-    EIGEN_CI_TEST_LABEL: "Official"
-
-test:x86-64:linux:clang-10:cxx11-off:unsupported:
-  extends: .test:x86-64:linux:clang-10:cxx11-off
-  variables:
-    EIGEN_CI_TEST_LABEL: "Unsupported"
-
 .test:x86-64:linux:clang-10:cxx11-on:
   extends: .test:linux:base
   variables:
     EIGEN_CI_CXX_COMPILER: clang++-10
     EIGEN_CI_CC_COMPILER: clang-10
   needs: [ "build:x86-64:linux:clang-10:cxx11-on" ]
-  tags: 
+  tags:
     - eigen-runner
     - linux
     - x86-64
@@ -203,38 +120,37 @@ test:x86-64:linux:clang-10:cxx11-on:unsupported:
   variables:
     EIGEN_CI_TEST_LABEL: "Unsupported"
 
-##### AArch64 ##################################################################
-# GCC-10
-.test:aarch64:linux:gcc-10:cxx11-off:
+.test:x86-64:linux:clang-10:cxx11-on:avx512:
   extends: .test:linux:base
   variables:
-    EIGEN_CI_CXX_COMPILER: g++-10
-    EIGEN_CI_CC_COMPILER: gcc-10
-  needs: [ "build:aarch64:linux:gcc-10:cxx11-off" ]
-  tags: 
+    EIGEN_CI_CXX_COMPILER: clang++-10
+    EIGEN_CI_CC_COMPILER: clang-10
+  needs: [ "build:x86-64:linux:clang-10:cxx11-on:avx512" ]
+  tags:
     - eigen-runner
     - linux
-    - aarch64
+    - x86-64
+    - avx512
 
-test:aarch64:linux:gcc-10:cxx11-off:official:
-  extends: .test:aarch64:linux:gcc-10:cxx11-off
-  allow_failure: true
+test:x86-64:linux:clang-10:cxx11-on:avx512:official:
+  extends: .test:x86-64:linux:clang-10:cxx11-on:avx512
   variables:
     EIGEN_CI_TEST_LABEL: "Official"
 
-test:aarch64:linux:gcc-10:cxx11-off:unsupported:
-  extends: .test:aarch64:linux:gcc-10:cxx11-off
-  allow_failure: true
+test:x86-64:linux:clang-10:cxx11-on:avx512:unsupported:
+  extends: .test:x86-64:linux:clang-10:cxx11-on:avx512
   variables:
     EIGEN_CI_TEST_LABEL: "Unsupported"
 
+##### AArch64 ##################################################################
+# GCC-10
 .test:aarch64:linux:gcc-10:cxx11-on:
   extends: .test:linux:base
   variables:
     EIGEN_CI_CXX_COMPILER: g++-10
     EIGEN_CI_CC_COMPILER: gcc-10
   needs: [ "build:aarch64:linux:gcc-10:cxx11-on" ]
-  tags: 
+  tags:
     - eigen-runner
     - linux
     - aarch64
@@ -252,35 +168,13 @@ test:aarch64:linux:gcc-10:cxx11-on:unsupported:
     EIGEN_CI_TEST_LABEL: "Unsupported"
 
 # Clang 10
-.test:aarch64:linux:clang-10:cxx11-off:
-  extends: .test:linux:base
-  variables:
-    EIGEN_CI_CXX_COMPILER: clang++-10
-    EIGEN_CI_CC_COMPILER: clang-10
-  needs: [ "build:aarch64:linux:clang-10:cxx11-off" ]
-  tags: 
-    - eigen-runner
-    - linux
-    - aarch64
-
-test:aarch64:linux:clang-10:cxx11-off:official:
-  extends: .test:aarch64:linux:clang-10:cxx11-off
-  allow_failure: true
-  variables:
-    EIGEN_CI_TEST_LABEL: "Official"
-
-test:aarch64:linux:clang-10:cxx11-off:unsupported:
-  extends: .test:aarch64:linux:clang-10:cxx11-off
-  variables:
-    EIGEN_CI_TEST_LABEL: "Unsupported"
-
 .test:aarch64:linux:clang-10:cxx11-on:
   extends: .test:linux:base
   variables:
     EIGEN_CI_CXX_COMPILER: clang++-10
     EIGEN_CI_CC_COMPILER: clang-10
   needs: [ "build:aarch64:linux:clang-10:cxx11-on" ]
-  tags: 
+  tags:
     - eigen-runner
     - linux
     - aarch64
@@ -298,28 +192,6 @@ test:aarch64:linux:clang-10:cxx11-on:unsupported:
 
 ##### ppc64le ##################################################################
 # GCC-10
-.test:ppc64le:linux:gcc-10:cxx11-off:
-  extends: .test:linux:base
-  variables:
-    EIGEN_CI_CXX_COMPILER: g++-10
-    EIGEN_CI_CC_COMPILER: gcc-10
-  needs: [ "build:ppc64le:linux:gcc-10:cxx11-off" ]
-  allow_failure: true
-  tags: 
-    - eigen-runner
-    - linux
-    - ppc64le
-
-test:ppc64le:linux:gcc-10:cxx11-off:official:
-  extends: .test:ppc64le:linux:gcc-10:cxx11-off
-  variables:
-    EIGEN_CI_TEST_LABEL: "Official"
-
-test:ppc64le:linux:gcc-10:cxx11-off:unsupported:
-  extends: .test:ppc64le:linux:gcc-10:cxx11-off
-  variables:
-    EIGEN_CI_TEST_LABEL: "Unsupported"
-
 .test:ppc64le:linux:gcc-10:cxx11-on:
   extends: .test:linux:base
   variables:
@@ -327,7 +199,7 @@ test:ppc64le:linux:gcc-10:cxx11-off:unsupported:
     EIGEN_CI_CC_COMPILER: gcc-10
   needs: [ "build:ppc64le:linux:gcc-10:cxx11-on" ]
   allow_failure: true
-  tags: 
+  tags:
     - eigen-runner
     - linux
     - ppc64le
@@ -342,29 +214,7 @@ test:ppc64le:linux:gcc-10:cxx11-on:unsupported:
   variables:
     EIGEN_CI_TEST_LABEL: "Unsupported"
 
-# # Clang 10
-.test:ppc64le:linux:clang-10:cxx11-off:
-  extends: .test:linux:base
-  variables:
-    EIGEN_CI_CXX_COMPILER: clang++-10
-    EIGEN_CI_CC_COMPILER: clang-10
-  needs: [ "build:ppc64le:linux:clang-10:cxx11-off" ]
-  allow_failure: true
-  tags: 
-    - eigen-runner
-    - linux
-    - ppc64le
-
-test:ppc64le:linux:clang-10:cxx11-off:official:
-  extends: .test:ppc64le:linux:clang-10:cxx11-off
-  variables:
-    EIGEN_CI_TEST_LABEL: "Official"
-
-test:ppc64le:linux:clang-10:cxx11-off:unsupported:
-  extends: .test:ppc64le:linux:clang-10:cxx11-off
-  variables:
-    EIGEN_CI_TEST_LABEL: "Unsupported"
-
+# Clang 10
 .test:ppc64le:linux:clang-10:cxx11-on:
   extends: .test:linux:base
   variables:
@@ -372,7 +222,7 @@ test:ppc64le:linux:clang-10:cxx11-off:unsupported:
     EIGEN_CI_CC_COMPILER: clang-10
   needs: [ "build:ppc64le:linux:clang-10:cxx11-on" ]
   allow_failure: true
-  tags: 
+  tags:
     - eigen-runner
     - linux
     - ppc64le
diff --git a/libs/eigen/cmake/Eigen3Config.cmake.in b/libs/eigen/cmake/Eigen3Config.cmake.in
index 0a1ac61..96582f5 100644
--- a/libs/eigen/cmake/Eigen3Config.cmake.in
+++ b/libs/eigen/cmake/Eigen3Config.cmake.in
@@ -3,21 +3,6 @@
 
 @PACKAGE_INIT@
 
-if (NOT TARGET eigen)
+if (NOT TARGET Eigen3::Eigen)
   include ("${CMAKE_CURRENT_LIST_DIR}/Eigen3Targets.cmake")
-endif ()
-
-# Legacy variables, do *not* use. May be removed in the future.
-
-set (EIGEN3_FOUND 1)
-set (EIGEN3_USE_FILE    "${CMAKE_CURRENT_LIST_DIR}/UseEigen3.cmake")
-
-set (EIGEN3_DEFINITIONS  "@EIGEN_DEFINITIONS@")
-set (EIGEN3_INCLUDE_DIR  "@PACKAGE_EIGEN_INCLUDE_DIR@")
-set (EIGEN3_INCLUDE_DIRS "@PACKAGE_EIGEN_INCLUDE_DIR@")
-set (EIGEN3_ROOT_DIR     "@PACKAGE_EIGEN_ROOT_DIR@")
-
-set (EIGEN3_VERSION_STRING "@EIGEN_VERSION_STRING@")
-set (EIGEN3_VERSION_MAJOR  "@EIGEN_VERSION_MAJOR@")
-set (EIGEN3_VERSION_MINOR  "@EIGEN_VERSION_MINOR@")
-set (EIGEN3_VERSION_PATCH  "@EIGEN_VERSION_PATCH@")
+endif (NOT TARGET Eigen3::Eigen)
diff --git a/libs/eigen/cmake/Eigen3ConfigLegacy.cmake.in b/libs/eigen/cmake/Eigen3ConfigLegacy.cmake.in
deleted file mode 100644
index 62d7224..0000000
--- a/libs/eigen/cmake/Eigen3ConfigLegacy.cmake.in
+++ /dev/null
@@ -1,30 +0,0 @@
-#                                               -*- cmake -*-
-#
-#  Eigen3Config.cmake(.in)
-
-# Use the following variables to compile and link against Eigen:
-#  EIGEN3_FOUND              - True if Eigen was found on your system
-#  EIGEN3_USE_FILE           - The file making Eigen usable
-#  EIGEN3_DEFINITIONS        - Definitions needed to build with Eigen
-#  EIGEN3_INCLUDE_DIR        - Directory where signature_of_eigen3_matrix_library can be found
-#  EIGEN3_INCLUDE_DIRS       - List of directories of Eigen and it's dependencies
-#  EIGEN3_ROOT_DIR           - The base directory of Eigen
-#  EIGEN3_VERSION_STRING     - A human-readable string containing the version
-#  EIGEN3_VERSION_MAJOR      - The major version of Eigen
-#  EIGEN3_VERSION_MINOR      - The minor version of Eigen
-#  EIGEN3_VERSION_PATCH      - The patch version of Eigen
-
-@PACKAGE_INIT@
-
-set ( EIGEN3_FOUND 1 )
-set ( EIGEN3_USE_FILE     "${CMAKE_CURRENT_LIST_DIR}/UseEigen3.cmake" )
-
-set ( EIGEN3_DEFINITIONS  "@EIGEN_DEFINITIONS@" )
-set ( EIGEN3_INCLUDE_DIR  "@PACKAGE_EIGEN_INCLUDE_DIR@" )
-set ( EIGEN3_INCLUDE_DIRS "@PACKAGE_EIGEN_INCLUDE_DIR@" )
-set ( EIGEN3_ROOT_DIR     "@PACKAGE_EIGEN_ROOT_DIR@" )
-
-set ( EIGEN3_VERSION_STRING "@EIGEN_VERSION_STRING@" )
-set ( EIGEN3_VERSION_MAJOR  "@EIGEN_VERSION_MAJOR@" )
-set ( EIGEN3_VERSION_MINOR  "@EIGEN_VERSION_MINOR@" )
-set ( EIGEN3_VERSION_PATCH  "@EIGEN_VERSION_PATCH@" )
diff --git a/libs/eigen/cmake/EigenConfigureTesting.cmake b/libs/eigen/cmake/EigenConfigureTesting.cmake
index 9cb3bb2..2a1e7ab 100644
--- a/libs/eigen/cmake/EigenConfigureTesting.cmake
+++ b/libs/eigen/cmake/EigenConfigureTesting.cmake
@@ -8,9 +8,18 @@ ei_set_sitename()
 ei_set_build_string()
 
 add_custom_target(buildtests)
-add_custom_target(check COMMAND "ctest")
+add_custom_target(check COMMAND "ctest" ${EIGEN_CTEST_ARGS})
 add_dependencies(check buildtests)
 
+# Convenience target for only building GPU tests.
+add_custom_target(buildtests_gpu)
+add_custom_target(check_gpu COMMAND "ctest" "--output-on-failure" 
+                                            "--no-compress-output"
+                                            "--build-no-clean"
+                                            "-T" "test"
+                                            "-L" "gpu")
+add_dependencies(check_gpu buildtests_gpu)
+
 # check whether /bin/bash exists (disabled as not used anymore)
 # find_file(EIGEN_BIN_BASH_EXISTS "/bin/bash" PATHS "/" NO_DEFAULT_PATH)
 
diff --git a/libs/eigen/cmake/EigenDetermineOSVersion.cmake b/libs/eigen/cmake/EigenDetermineOSVersion.cmake
deleted file mode 100644
index 9246fa6..0000000
--- a/libs/eigen/cmake/EigenDetermineOSVersion.cmake
+++ /dev/null
@@ -1,46 +0,0 @@
-# The utility function DetermineOSVersion aims at providing an
-# improved version of the CMake variable ${CMAKE_SYSTEM} on Windows
-# machines.
-#
-# Usage:
-#  include(EigenDetermineOSVersion)
-#  DetermineOSVersion(OS_VERSION)
-#  message("OS: ${OS_VERSION}")
-
-# - A little helper variable which should not be directly called
-function(DetermineShortWindowsName WIN_VERSION win_num_version)
-   if    (${win_num_version} VERSION_EQUAL "6.1")
-       set(_version "win7")
-   elseif(${win_num_version} VERSION_EQUAL "6.0")
-       set(_version "winVista")
-   elseif(${win_num_version} VERSION_EQUAL "5.2")
-       set(_version "winXpProf")
-   elseif(${win_num_version} VERSION_EQUAL "5.1")
-       set(_version "winXp")
-   elseif(${win_num_version} VERSION_EQUAL "5.0")
-       set(_version "win2000Prof")
-   else()
-       set(_version "unknownWin")
-   endif()
-   set(${WIN_VERSION} ${_version} PARENT_SCOPE)
-endfunction()
-
-function(DetermineOSVersion OS_VERSION)
-  if (WIN32 AND CMAKE_HOST_SYSTEM_NAME MATCHES Windows)
-    file (TO_NATIVE_PATH "$ENV{COMSPEC}" SHELL)
-    exec_program( ${SHELL} ARGS "/c" "ver" OUTPUT_VARIABLE ver_output)
-				
-      string(REGEX MATCHALL "[0-9]+"
-           ver_list "${ver_output}")
-      list(GET ver_list 0 _major)		   
-      list(GET ver_list 1 _minor)
-				
-    set(win_num_version ${_major}.${_minor})
-    DetermineShortWindowsName(win_version "${win_num_version}")
-    if(win_version)
-      set(${OS_VERSION} ${win_version} PARENT_SCOPE)
-    endif()
-  else()
-    set(${OS_VERSION} ${CMAKE_SYSTEM} PARENT_SCOPE)
-  endif()
-endfunction()
diff --git a/libs/eigen/cmake/EigenDetermineVSServicePack.cmake b/libs/eigen/cmake/EigenDetermineVSServicePack.cmake
deleted file mode 100644
index fed7819..0000000
--- a/libs/eigen/cmake/EigenDetermineVSServicePack.cmake
+++ /dev/null
@@ -1,41 +0,0 @@
-include(CMakeDetermineVSServicePack)
-
-# The code is almost identical to the CMake version. The only difference is that we remove
-# _DetermineVSServicePack_FastCheckVersionWithCompiler which lead to errors on some systems.
-function(EigenDetermineVSServicePack _pack)
-    if(NOT DETERMINED_VS_SERVICE_PACK OR NOT ${_pack})
-        if(NOT DETERMINED_VS_SERVICE_PACK)
-            _DetermineVSServicePack_CheckVersionWithTryCompile(DETERMINED_VS_SERVICE_PACK _cl_version)
-            if(NOT DETERMINED_VS_SERVICE_PACK)
-                _DetermineVSServicePack_CheckVersionWithTryRun(DETERMINED_VS_SERVICE_PACK _cl_version)
-            endif()
-        endif()
-
-        if(DETERMINED_VS_SERVICE_PACK)
-            if(_cl_version)
-                # Call helper function to determine VS version
-                _DetermineVSServicePackFromCompiler(_sp "${_cl_version}")
-              
-                # temporary fix, until CMake catches up
-                if (NOT _sp)
-                    if(${_cl_version} VERSION_EQUAL "17.00.50727.1")
-                        set(_sp "vc110")
-                    elseif(${_cl_version} VERSION_EQUAL "17.00.51106.1")
-                        set(_sp "vc110sp1")
-                    elseif(${_cl_version} VERSION_EQUAL "17.00.60315.1")
-                        set(_sp "vc110sp2")
-                    elseif(${_cl_version} VERSION_EQUAL "17.00.60610.1")
-                        set(_sp "vc110sp3")
-                    else()
-                        set(_sp ${CMAKE_CXX_COMPILER_VERSION})
-                    endif()
-                endif()
-                
-                if(_sp)
-                    set(${_pack} ${_sp} CACHE INTERNAL
-                        "The Visual Studio Release with Service Pack")
-                endif()
-            endif()
-        endif()
-    endif()
-endfunction()
diff --git a/libs/eigen/cmake/EigenSmokeTestList.cmake b/libs/eigen/cmake/EigenSmokeTestList.cmake
index 6f0f724..db7d3ff 100644
--- a/libs/eigen/cmake/EigenSmokeTestList.cmake
+++ b/libs/eigen/cmake/EigenSmokeTestList.cmake
@@ -61,6 +61,9 @@ set(ei_smoke_test_list
   mapped_matrix_1
   mapstaticmethods_1
   mapstride_1
+  unaryviewstride_1
+  unaryviewstride_2
+  unaryviewstride_3
   matrix_square_root_1
   meta
   minres_2
@@ -100,6 +103,7 @@ set(ei_smoke_test_list
   sizeof
   sizeoverflow
   smallvectors
+  sparse_basic_1
   sparse_basic_3
   sparse_block_1
   sparse_extra_4
@@ -128,4 +132,5 @@ set(ei_smoke_test_list
   unalignedassert
   unalignedcount
   vectorwiseop_1
-  visitor_1)
\ No newline at end of file
+  visitor_1
+  vectorization_logic_1)
diff --git a/libs/eigen/cmake/EigenTesting.cmake b/libs/eigen/cmake/EigenTesting.cmake
index eb8457d..1ddaa12 100644
--- a/libs/eigen/cmake/EigenTesting.cmake
+++ b/libs/eigen/cmake/EigenTesting.cmake
@@ -23,10 +23,14 @@ macro(ei_add_test_internal testname testname_with_suffix)
   set(EIGEN_SUBTESTS_LIST "${EIGEN_SUBTESTS_LIST}${targetname}\n")
   set_property(GLOBAL PROPERTY EIGEN_SUBTESTS_LIST "${EIGEN_SUBTESTS_LIST}")
 
+  set(is_gpu_test OFF)
   if(EIGEN_ADD_TEST_FILENAME_EXTENSION STREQUAL cu)
+    set(is_gpu_test ON)
     if(EIGEN_TEST_HIP)
       hip_reset_flags()
-      hip_add_executable(${targetname} ${filename} HIPCC_OPTIONS "-DEIGEN_USE_HIP ${ARGV2}")
+      hip_add_executable(${targetname} ${filename} HIPCC_OPTIONS -std=c++14)
+      target_compile_definitions(${targetname} PRIVATE -DEIGEN_USE_HIP)
+      set_property(TARGET ${targetname} PROPERTY HIP_ARCHITECTURES gfx900 gfx906 gfx908 gfx90a gfx1030)
     elseif(EIGEN_TEST_CUDA_CLANG)
       set_source_files_properties(${filename} PROPERTIES LANGUAGE CXX)
       
@@ -36,54 +40,46 @@ macro(ei_add_test_internal testname testname_with_suffix)
         link_directories("${CUDA_TOOLKIT_ROOT_DIR}/lib")
       endif()
 
-      if (${ARGC} GREATER 2)
-        add_executable(${targetname} ${filename})
-      else()
-        add_executable(${targetname} ${filename} OPTIONS ${ARGV2})
-      endif()
+      add_executable(${targetname} ${filename})
       set(CUDA_CLANG_LINK_LIBRARIES "cudart_static" "cuda" "dl" "pthread")
       if (CMAKE_SYSTEM_NAME STREQUAL "Linux")
       set(CUDA_CLANG_LINK_LIBRARIES ${CUDA_CLANG_LINK_LIBRARIES} "rt")
       endif()
       target_link_libraries(${targetname} ${CUDA_CLANG_LINK_LIBRARIES})
     else()
-      if (${ARGC} GREATER 2)
-        cuda_add_executable(${targetname} ${filename} OPTIONS ${ARGV2})
-      else()
-        cuda_add_executable(${targetname} ${filename})
-      endif()
+      cuda_add_executable(${targetname} ${filename})
     endif()
   else()
     add_executable(${targetname} ${filename})
   endif()
 
-  if (targetname MATCHES "^eigen2_")
-    add_dependencies(eigen2_buildtests ${targetname})
-  else()
-    add_dependencies(buildtests ${targetname})
+  add_dependencies(buildtests ${targetname})
+  
+  if (is_gpu_test)
+    add_dependencies(buildtests_gpu ${targetname})
   endif()
 
   if(EIGEN_NO_ASSERTION_CHECKING)
-    ei_add_target_property(${targetname} COMPILE_FLAGS "-DEIGEN_NO_ASSERTION_CHECKING=1")
+    target_compile_definitions(${targetname} PRIVATE EIGEN_NO_ASSERTION_CHECKING=1)
   else()
     if(EIGEN_DEBUG_ASSERTS)
-      ei_add_target_property(${targetname} COMPILE_FLAGS "-DEIGEN_DEBUG_ASSERTS=1")
+      target_compile_definitions(${targetname} PRIVATE EIGEN_DEBUG_ASSERTS=1)
     endif()
   endif()
 
-  ei_add_target_property(${targetname} COMPILE_FLAGS "-DEIGEN_TEST_MAX_SIZE=${EIGEN_TEST_MAX_SIZE}")
+  target_compile_definitions(${targetname} PRIVATE EIGEN_TEST_MAX_SIZE=${EIGEN_TEST_MAX_SIZE})
 
   if(MSVC)
-    ei_add_target_property(${targetname} COMPILE_FLAGS "/bigobj")
+    target_compile_options(${targetname} PRIVATE "/bigobj")
   endif()
 
   # let the user pass flags.
   if(${ARGC} GREATER 2)
-    ei_add_target_property(${targetname} COMPILE_FLAGS "${ARGV2}")
+    target_compile_options(${targetname} PRIVATE ${ARGV2})
   endif()
 
   if(EIGEN_TEST_CUSTOM_CXX_FLAGS)
-    ei_add_target_property(${targetname} COMPILE_FLAGS "${EIGEN_TEST_CUSTOM_CXX_FLAGS}")
+    target_compile_options(${targetname} PRIVATE ${EIGEN_TEST_CUSTOM_CXX_FLAGS})
   endif()
 
   if(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO)
@@ -118,21 +114,14 @@ macro(ei_add_test_internal testname testname_with_suffix)
     add_dependencies("Build${current_subproject}" ${targetname})
     set_property(TEST ${testname_with_suffix} PROPERTY LABELS "${current_subproject}")
   endif()
+  if (is_gpu_test)
+    # Add gpu tag for testing only GPU tests.
+    set_property(TEST ${testname_with_suffix} APPEND PROPERTY LABELS "gpu")
+  endif()
+  
   if(EIGEN_SYCL)
     # Force include of the SYCL file at the end to avoid errors.
     set_property(TARGET ${targetname} PROPERTY COMPUTECPP_INCLUDE_AFTER 1)
-    # Set COMPILE_FLAGS to COMPILE_DEFINITIONS instead to avoid having to duplicate the flags
-    # to the device compiler.
-    get_target_property(target_compile_flags ${targetname} COMPILE_FLAGS)
-    separate_arguments(target_compile_flags)
-    foreach(flag ${target_compile_flags})
-      if(${flag} MATCHES "^-D.*")
-        string(REPLACE "-D" "" definition_flag ${flag})
-        set_property(TARGET ${targetname} APPEND PROPERTY COMPILE_DEFINITIONS ${definition_flag})
-        list(REMOVE_ITEM target_compile_flags ${flag})
-      endif()
-    endforeach()
-    set_property(TARGET ${targetname} PROPERTY COMPILE_FLAGS ${target_compile_flags})
     # Link against pthread and add sycl to target
     set(THREADS_PREFER_PTHREAD_FLAG ON)
     find_package(Threads REQUIRED)
@@ -209,12 +198,13 @@ macro(ei_add_test testname)
   if( (EIGEN_SPLIT_LARGE_TESTS AND suffixes) OR explicit_suffixes)
     add_custom_target(${testname})
     foreach(suffix ${suffixes})
-      ei_add_test_internal(${testname} ${testname}_${suffix}
-        "${ARGV1} -DEIGEN_TEST_PART_${suffix}=1" "${ARGV2}")
+      ei_add_test_internal(${testname} ${testname}_${suffix} "${ARGV1}" "${ARGV2}")
       add_dependencies(${testname} ${testname}_${suffix})
+      target_compile_definitions(${testname}_${suffix} PRIVATE -DEIGEN_TEST_PART_${suffix}=1)
     endforeach()
   else()
-    ei_add_test_internal(${testname} ${testname} "${ARGV1} -DEIGEN_TEST_PART_ALL=1" "${ARGV2}")
+    ei_add_test_internal(${testname} ${testname} "${ARGV1}" "${ARGV2}")
+    target_compile_definitions(${testname} PRIVATE -DEIGEN_TEST_PART_ALL=1)
   endif()
 endmacro()
 
@@ -375,12 +365,6 @@ macro(ei_testing_print_summary)
       message(STATUS "S390X ZVECTOR:     Using architecture defaults")
     endif()
 
-    if(EIGEN_TEST_CXX11)
-      message(STATUS "C++11:             ON")
-    else()
-      message(STATUS "C++11:             OFF")
-    endif()
-
     if(EIGEN_TEST_SYCL)
       if(EIGEN_SYCL_TRISYCL)
         message(STATUS "SYCL:              ON (using triSYCL)")
@@ -455,15 +439,7 @@ endmacro()
 
 macro(ei_get_compilerver VAR)
     if(MSVC)
-      # on windows system, we use a modified CMake script
-      include(EigenDetermineVSServicePack)
-      EigenDetermineVSServicePack( my_service_pack )
-
-      if( my_service_pack )
-        set(${VAR} ${my_service_pack})
-      else()
-        set(${VAR} "na")
-      endif()
+      set(${VAR} "${CMAKE_CXX_COMPILER_VERSION}")
     elseif(${CMAKE_CXX_COMPILER_ID} MATCHES "PGI")
       set(${VAR} "${CMAKE_CXX_COMPILER_ID}-${CMAKE_CXX_COMPILER_VERSION}")
     else()
@@ -598,10 +574,7 @@ macro(ei_set_build_string)
   ei_get_compilerver(LOCAL_COMPILER_VERSION)
   ei_get_cxxflags(LOCAL_COMPILER_FLAGS)
 
-  include(EigenDetermineOSVersion)
-  DetermineOSVersion(OS_VERSION)
-
-  set(TMP_BUILD_STRING ${OS_VERSION}-${LOCAL_COMPILER_VERSION})
+  set(TMP_BUILD_STRING ${CMAKE_SYSTEM}-${LOCAL_COMPILER_VERSION})
 
   if (NOT ${LOCAL_COMPILER_FLAGS} STREQUAL  "")
     set(TMP_BUILD_STRING ${TMP_BUILD_STRING}-${LOCAL_COMPILER_FLAGS})
@@ -618,10 +591,6 @@ macro(ei_set_build_string)
     set(TMP_BUILD_STRING ${TMP_BUILD_STRING}-64bit)
   endif()
 
-  if(EIGEN_TEST_CXX11)
-    set(TMP_BUILD_STRING ${TMP_BUILD_STRING}-cxx11)
-  endif()
-
   if(EIGEN_BUILD_STRING_SUFFIX)
     set(TMP_BUILD_STRING ${TMP_BUILD_STRING}-${EIGEN_BUILD_STRING_SUFFIX})
   endif()
@@ -671,8 +640,8 @@ endmacro()
 # Split all tests listed in EIGEN_TESTS_LIST into num_splits many targets
 # named buildtestspartN with N = { 0, ..., num_splits-1}.
 #
-# The intention behind the existance of this macro is the size of Eigen's
-# testsuite. Together with the relativly big compile-times building all tests
+# The intention behind the existence of this macro is the size of Eigen's
+# testsuite. Together with the relatively big compile-times building all tests
 # can take a substantial amount of time depending on the available hardware.
 # 
 # The last buildtestspartN target will build possible remaining tests.
@@ -775,8 +744,7 @@ macro(ei_add_smoke_tests smoke_test_list)
     if ("${test}" IN_LIST EIGEN_SUBTESTS_LIST)
       add_dependencies("${buildtarget}" "${test}")
       # Add label smoketest to be able to run smoketests using ctest
-      get_property(test_labels TEST ${test} PROPERTY LABELS)
-      set_property(TEST ${test} PROPERTY LABELS "${test_labels};smoketest")
+      set_property(TEST ${test} APPEND PROPERTY LABELS "smoketest")
     endif()
   endforeach()
 endmacro(ei_add_smoke_tests)
diff --git a/libs/eigen/cmake/FindAccelerate.cmake b/libs/eigen/cmake/FindAccelerate.cmake
new file mode 100644
index 0000000..787c31c
--- /dev/null
+++ b/libs/eigen/cmake/FindAccelerate.cmake
@@ -0,0 +1,28 @@
+if (Accelerate_INCLUDES AND Accelerate_LIBRARIES)
+  set(Accelerate_FIND_QUIETLY TRUE)
+endif ()
+
+find_path(Accelerate_INCLUDES
+  NAMES
+  Accelerate.h
+  PATHS $ENV{ACCELERATEDIR}
+)
+
+find_library(Accelerate_LIBRARIES Accelerate PATHS $ENV{ACCELERATEDIR})
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(Accelerate DEFAULT_MSG
+                                  Accelerate_INCLUDES Accelerate_LIBRARIES)
+
+if (Accelerate_FOUND)
+  get_filename_component(Accelerate_PARENTDIR ${Accelerate_INCLUDES} DIRECTORY)
+
+  file(GLOB_RECURSE SparseHeader ${Accelerate_PARENTDIR}/Sparse.h)
+
+  if ("${SparseHeader}" STREQUAL "")
+    message(STATUS "Accelerate sparse matrix support was not found. Accelerate has been disabled.")
+    set(Accelerate_FOUND FALSE)
+  endif ()
+endif ()
+
+mark_as_advanced(Accelerate_INCLUDES Accelerate_LIBRARIES)
diff --git a/libs/eigen/cmake/FindBLAS.cmake b/libs/eigen/cmake/FindBLAS.cmake
deleted file mode 100644
index 1bb8f19..0000000
--- a/libs/eigen/cmake/FindBLAS.cmake
+++ /dev/null
@@ -1,1407 +0,0 @@
-###
-#
-# @copyright (c) 2009-2014 The University of Tennessee and The University
-#                          of Tennessee Research Foundation.
-#                          All rights reserved.
-# @copyright (c) 2012-2016 Inria. All rights reserved.
-# @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
-#
-###
-#
-# - Find BLAS library
-# This module finds an installed fortran library that implements the BLAS
-# linear-algebra interface (see http://www.netlib.org/blas/).
-# The list of libraries searched for is taken
-# from the autoconf macro file, acx_blas.m4 (distributed at
-# http://ac-archive.sourceforge.net/ac-archive/acx_blas.html).
-#
-# This module sets the following variables:
-#  BLAS_FOUND - set to true if a library implementing the BLAS interface
-#    is found
-#  BLAS_LINKER_FLAGS - uncached list of required linker flags (excluding -l
-#    and -L).
-#  BLAS_COMPILER_FLAGS - uncached list of required compiler flags (including -I for mkl headers).
-#  BLAS_LIBRARIES - uncached list of libraries (using full path name) to
-#    link against to use BLAS
-#  BLAS95_LIBRARIES - uncached list of libraries (using full path name)
-#    to link against to use BLAS95 interface
-#  BLAS95_FOUND - set to true if a library implementing the BLAS f95 interface
-#    is found
-#  BLA_STATIC  if set on this determines what kind of linkage we do (static)
-#  BLA_VENDOR  if set checks only the specified vendor, if not set checks
-#     all the possibilities
-#  BLAS_VENDOR_FOUND stores the BLAS vendor found 
-#  BLA_F95     if set on tries to find the f95 interfaces for BLAS/LAPACK
-# The user can give specific paths where to find the libraries adding cmake
-# options at configure (ex: cmake path/to/project -DBLAS_DIR=path/to/blas):
-#  BLAS_DIR            - Where to find the base directory of blas
-#  BLAS_INCDIR         - Where to find the header files
-#  BLAS_LIBDIR         - Where to find the library files
-# The module can also look for the following environment variables if paths
-# are not given as cmake variable: BLAS_DIR, BLAS_INCDIR, BLAS_LIBDIR
-# For MKL case and if no paths are given as hints, we will try to use the MKLROOT
-# environment variable
-#  BLAS_VERBOSE Print some additional information during BLAS libraries detection
-##########
-### List of vendors (BLA_VENDOR) valid in this module
-########## List of vendors (BLA_VENDOR) valid in this module
-##  Open (for OpenBlas), Eigen (for EigenBlas), Goto, ATLAS PhiPACK,
-##  CXML, DXML, SunPerf, SCSL, SGIMATH, IBMESSL, IBMESSLMT
-##  Intel10_32 (intel mkl v10 32 bit), Intel10_64lp (intel mkl v10 64 bit,lp thread model, lp64 model),
-##  Intel10_64lp_seq (intel mkl v10 64 bit,sequential code, lp64 model),
-##  Intel( older versions of mkl 32 and 64 bit),
-##  ACML, ACML_MP, ACML_GPU, Apple, NAS, Generic
-# C/CXX should be enabled to use Intel mkl
-###
-# We handle different modes to find the dependency
-#
-# - Detection if already installed on the system
-#   - BLAS libraries can be detected from different ways
-#     Here is the order of precedence:
-#     1) we look in cmake variable BLAS_LIBDIR or BLAS_DIR (we guess the libdirs) if defined
-#     2) we look in environment variable BLAS_LIBDIR or BLAS_DIR (we guess the libdirs) if defined
-#     3) we look in common environnment variables depending on the system (INCLUDE, C_INCLUDE_PATH, CPATH - LIB, DYLD_LIBRARY_PATH, LD_LIBRARY_PATH)
-#     4) we look in common system paths depending on the system, see for example paths contained in the following cmake variables:
-#       - CMAKE_PLATFORM_IMPLICIT_INCLUDE_DIRECTORIES, CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES
-#       - CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES, CMAKE_C_IMPLICIT_LINK_DIRECTORIES
-#
-
-#=============================================================================
-# Copyright 2007-2009 Kitware, Inc.
-#
-# Distributed under the OSI-approved BSD License (the "License");
-# see accompanying file Copyright.txt for details.
-#
-# This software is distributed WITHOUT ANY WARRANTY; without even the
-# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-# See the License for more information.
-#=============================================================================
-# (To distribute this file outside of CMake, substitute the full
-#  License text for the above reference.)
-
-## Some macros to print status when search for headers and libs
-# This macro informs why the _lib_to_find file has not been found
-macro(Print_Find_Library_Blas_Status _libname _lib_to_find)
-
-  # save _libname upper/lower case
-  string(TOUPPER ${_libname} LIBNAME)
-  string(TOLOWER ${_libname} libname)
-
-  # print status
-  #message(" ")
-  if(${LIBNAME}_LIBDIR)
-    message("${Yellow}${LIBNAME}_LIBDIR is defined but ${_lib_to_find}"
-      "has not been found in ${ARGN}${ColourReset}")
-  else()
-    if(${LIBNAME}_DIR)
-      message("${Yellow}${LIBNAME}_DIR is defined but ${_lib_to_find}"
-	"has not been found in ${ARGN}${ColourReset}")
-    else()
-      message("${Yellow}${_lib_to_find} not found."
-	"Nor ${LIBNAME}_DIR neither ${LIBNAME}_LIBDIR"
-	"are defined so that we look for ${_lib_to_find} in"
-	"system paths (Linux: LD_LIBRARY_PATH, Windows: LIB,"
-	"Mac: DYLD_LIBRARY_PATH,"
-	"CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES,"
-	"CMAKE_C_IMPLICIT_LINK_DIRECTORIES)${ColourReset}")
-      if(_lib_env)
-	message("${Yellow}${_lib_to_find} has not been found in"
-	  "${_lib_env}${ColourReset}")
-      endif()
-    endif()
-  endif()
-  message("${BoldYellow}Please indicate where to find ${_lib_to_find}. You have three options:\n"
-    "- Option 1: Provide the Installation directory of BLAS library with cmake option: -D${LIBNAME}_DIR=your/path/to/${libname}/\n"
-    "- Option 2: Provide the directory where to find the library with cmake option: -D${LIBNAME}_LIBDIR=your/path/to/${libname}/lib/\n"
-    "- Option 3: Update your environment variable (Linux: LD_LIBRARY_PATH, Windows: LIB, Mac: DYLD_LIBRARY_PATH)\n"
-    "- Option 4: If your library provides a PkgConfig file, make sure pkg-config finds your library${ColourReset}")
-
-endmacro()
-
-# This macro informs why the _lib_to_find file has not been found
-macro(Print_Find_Library_Blas_CheckFunc_Status _name)
-
-  # save _libname upper/lower case
-  string(TOUPPER ${_name} FUNCNAME)
-  string(TOLOWER ${_name} funcname)
-
-  # print status
-  #message(" ")
-  message("${Red}Libs have been found but check of symbol ${_name} failed "
-    "with following libraries ${ARGN}${ColourReset}")
-  message("${BoldRed}Please open your error file CMakeFiles/CMakeError.log"
-    "to figure out why it fails${ColourReset}")
-  #message(" ")
-
-endmacro()
-
-if (NOT BLAS_FOUND)
-  set(BLAS_DIR "" CACHE PATH "Installation directory of BLAS library")
-  if (NOT BLAS_FIND_QUIETLY)
-    message(STATUS "A cache variable, namely BLAS_DIR, has been set to specify the install directory of BLAS")
-  endif()
-endif()
-
-option(BLAS_VERBOSE "Print some additional information during BLAS libraries detection" OFF)
-mark_as_advanced(BLAS_VERBOSE)
-
-include(CheckFunctionExists)
-include(CheckFortranFunctionExists)
-include(CMakeFindDependencyMacro)
-
-set(_blas_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES})
-
-# Check the language being used
-get_property( _LANGUAGES_ GLOBAL PROPERTY ENABLED_LANGUAGES )
-if( _LANGUAGES_ MATCHES Fortran AND CMAKE_Fortran_COMPILER)
-  set( _CHECK_FORTRAN TRUE )
-elseif( (_LANGUAGES_ MATCHES C) OR (_LANGUAGES_ MATCHES CXX) )
-  set( _CHECK_FORTRAN FALSE )
-else()
-  if(BLAS_FIND_REQUIRED)
-    message(FATAL_ERROR "FindBLAS requires Fortran, C, or C++ to be enabled.")
-  else()
-    message(STATUS "Looking for BLAS... - NOT found (Unsupported languages)")
-    return()
-  endif()
-endif()
-
-macro(Check_Fortran_Libraries LIBRARIES _prefix _name _flags _list _thread)
-  # This macro checks for the existence of the combination of fortran libraries
-  # given by _list.  If the combination is found, this macro checks (using the
-  # Check_Fortran_Function_Exists macro) whether can link against that library
-  # combination using the name of a routine given by _name using the linker
-  # flags given by _flags.  If the combination of libraries is found and passes
-  # the link test, LIBRARIES is set to the list of complete library paths that
-  # have been found.  Otherwise, LIBRARIES is set to FALSE.
-
-  # N.B. _prefix is the prefix applied to the names of all cached variables that
-  # are generated internally and marked advanced by this macro.
-
-  set(_libdir ${ARGN})
-
-  set(_libraries_work TRUE)
-  set(${LIBRARIES})
-  set(_combined_name)
-  set(ENV_MKLROOT "$ENV{MKLROOT}")
-  set(ENV_BLAS_DIR "$ENV{BLAS_DIR}")
-  set(ENV_BLAS_LIBDIR "$ENV{BLAS_LIBDIR}")
-  if (NOT _libdir)
-    if (BLAS_LIBDIR)
-      list(APPEND _libdir "${BLAS_LIBDIR}")
-    elseif (BLAS_DIR)
-      list(APPEND _libdir "${BLAS_DIR}")
-      list(APPEND _libdir "${BLAS_DIR}/lib")
-      if("${CMAKE_SIZEOF_VOID_P}" EQUAL "8")
-	list(APPEND _libdir "${BLAS_DIR}/lib64")
-	list(APPEND _libdir "${BLAS_DIR}/lib/intel64")
-      else()
-	list(APPEND _libdir "${BLAS_DIR}/lib32")
-	list(APPEND _libdir "${BLAS_DIR}/lib/ia32")
-      endif()
-    elseif(ENV_BLAS_LIBDIR)
-      list(APPEND _libdir "${ENV_BLAS_LIBDIR}")
-    elseif(ENV_BLAS_DIR)
-      list(APPEND _libdir "${ENV_BLAS_DIR}")
-      list(APPEND _libdir "${ENV_BLAS_DIR}/lib")
-      if("${CMAKE_SIZEOF_VOID_P}" EQUAL "8")
-	list(APPEND _libdir "${ENV_BLAS_DIR}/lib64")
-	list(APPEND _libdir "${ENV_BLAS_DIR}/lib/intel64")
-      else()
-	list(APPEND _libdir "${ENV_BLAS_DIR}/lib32")
-	list(APPEND _libdir "${ENV_BLAS_DIR}/lib/ia32")
-      endif()
-    else()
-      if (ENV_MKLROOT)
-	list(APPEND _libdir "${ENV_MKLROOT}/lib")
-	if("${CMAKE_SIZEOF_VOID_P}" EQUAL "8")
-	  list(APPEND _libdir "${ENV_MKLROOT}/lib64")
-	  list(APPEND _libdir "${ENV_MKLROOT}/lib/intel64")
-	else()
-	  list(APPEND _libdir "${ENV_MKLROOT}/lib32")
-	  list(APPEND _libdir "${ENV_MKLROOT}/lib/ia32")
-	endif()
-      endif()
-      if (WIN32)
-	string(REPLACE ":" ";" _libdir2 "$ENV{LIB}")
-      elseif (APPLE)
-	string(REPLACE ":" ";" _libdir2 "$ENV{DYLD_LIBRARY_PATH}")
-      else ()
-	string(REPLACE ":" ";" _libdir2 "$ENV{LD_LIBRARY_PATH}")
-      endif ()
-      list(APPEND _libdir "${_libdir2}")
-      list(APPEND _libdir "${CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES}")
-      list(APPEND _libdir "${CMAKE_C_IMPLICIT_LINK_DIRECTORIES}")
-    endif()
-  endif ()
-
-  if (BLAS_VERBOSE)
-    message("${Cyan}Try to find BLAS libraries: ${_list}")
-  endif ()
-
-  foreach(_library ${_list})
-    set(_combined_name ${_combined_name}_${_library})
-
-    if(_libraries_work)
-      if (BLA_STATIC)
-	if (WIN32)
-	  set(CMAKE_FIND_LIBRARY_SUFFIXES .lib ${CMAKE_FIND_LIBRARY_SUFFIXES})
-	endif ()
-	if (APPLE)
-	  set(CMAKE_FIND_LIBRARY_SUFFIXES .lib ${CMAKE_FIND_LIBRARY_SUFFIXES})
-	else ()
-	  set(CMAKE_FIND_LIBRARY_SUFFIXES .a ${CMAKE_FIND_LIBRARY_SUFFIXES})
-	endif ()
-      else ()
-	if (CMAKE_SYSTEM_NAME STREQUAL "Linux")
-	  # for ubuntu's libblas3gf and liblapack3gf packages
-	  set(CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES} .so.3gf)
-	endif ()
-      endif ()
-      find_library(${_prefix}_${_library}_LIBRARY
-	NAMES ${_library}
-	HINTS ${_libdir}
-	NO_DEFAULT_PATH
-	)
-      mark_as_advanced(${_prefix}_${_library}_LIBRARY)
-      # Print status if not found
-      # -------------------------
-      if (NOT ${_prefix}_${_library}_LIBRARY AND NOT BLAS_FIND_QUIETLY AND BLAS_VERBOSE)
-	Print_Find_Library_Blas_Status(blas ${_library} ${_libdir})
-      endif ()
-      set(${LIBRARIES} ${${LIBRARIES}} ${${_prefix}_${_library}_LIBRARY})
-      set(_libraries_work ${${_prefix}_${_library}_LIBRARY})
-    endif()
-  endforeach()
-
-  if(_libraries_work)
-    # Test this combination of libraries.
-    if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND BLA_STATIC)
-      list(INSERT ${LIBRARIES} 0 "-Wl,--start-group")
-      list(APPEND ${LIBRARIES} "-Wl,--end-group")
-    endif()
-    set(CMAKE_REQUIRED_LIBRARIES "${_flags};${${LIBRARIES}};${_thread}")
-    set(CMAKE_REQUIRED_FLAGS "${BLAS_COMPILER_FLAGS}")
-    if (BLAS_VERBOSE)
-      message("${Cyan}BLAS libs found for BLA_VENDOR ${BLA_VENDOR}."
-	"Try to compile symbol ${_name} with following libraries:"
-	"${CMAKE_REQUIRED_LIBRARIES}")
-    endif ()
-    if(NOT BLAS_FOUND)
-      unset(${_prefix}${_combined_name}_WORKS CACHE)
-    endif()
-    if (_CHECK_FORTRAN)
-      if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU")
-	string(REPLACE "mkl_intel_lp64" "mkl_gf_lp64" CMAKE_REQUIRED_LIBRARIES "${CMAKE_REQUIRED_LIBRARIES}")
-	string(REPLACE "mkl_intel_ilp64" "mkl_gf_ilp64" CMAKE_REQUIRED_LIBRARIES "${CMAKE_REQUIRED_LIBRARIES}")
-      endif()
-      check_fortran_function_exists("${_name}" ${_prefix}${_combined_name}_WORKS)
-    else()
-      check_function_exists("${_name}_" ${_prefix}${_combined_name}_WORKS)
-    endif()
-    mark_as_advanced(${_prefix}${_combined_name}_WORKS)
-    set(_libraries_work ${${_prefix}${_combined_name}_WORKS})
-    # Print status if not found
-    # -------------------------
-    if (NOT _libraries_work AND NOT BLAS_FIND_QUIETLY AND BLAS_VERBOSE)
-      Print_Find_Library_Blas_CheckFunc_Status(${_name} ${CMAKE_REQUIRED_LIBRARIES})
-    endif ()
-    set(CMAKE_REQUIRED_LIBRARIES)
-  endif()
-
-  if(_libraries_work)
-    set(${LIBRARIES} ${${LIBRARIES}} ${_thread})
-  else()
-    set(${LIBRARIES} FALSE)
-  endif()
-
-endmacro()
-
-
-set(BLAS_LINKER_FLAGS)
-set(BLAS_LIBRARIES)
-set(BLAS95_LIBRARIES)
-if ($ENV{BLA_VENDOR} MATCHES ".+")
-  set(BLA_VENDOR $ENV{BLA_VENDOR})
-else ()
-  if(NOT BLA_VENDOR)
-    set(BLA_VENDOR "All")
-  endif()
-endif ()
-
-#BLAS in intel mkl 10 library? (em64t 64bit)
-if (BLA_VENDOR MATCHES "Intel*" OR BLA_VENDOR STREQUAL "All")
-
-  if(NOT BLAS_LIBRARIES OR BLA_VENDOR MATCHES "Intel*")
-    # Looking for include
-    # -------------------
-
-    # Add system include paths to search include
-    # ------------------------------------------
-    unset(_inc_env)
-    set(ENV_MKLROOT "$ENV{MKLROOT}")
-    set(ENV_BLAS_DIR "$ENV{BLAS_DIR}")
-    set(ENV_BLAS_INCDIR "$ENV{BLAS_INCDIR}")
-    if(ENV_BLAS_INCDIR)
-      list(APPEND _inc_env "${ENV_BLAS_INCDIR}")
-    elseif(ENV_BLAS_DIR)
-      list(APPEND _inc_env "${ENV_BLAS_DIR}")
-      list(APPEND _inc_env "${ENV_BLAS_DIR}/include")
-    else()
-      if (ENV_MKLROOT)
-	list(APPEND _inc_env "${ENV_MKLROOT}/include")
-      endif()
-      # system variables
-      if(WIN32)
-	string(REPLACE ":" ";" _path_env "$ENV{INCLUDE}")
-	list(APPEND _inc_env "${_path_env}")
-      else()
-	string(REPLACE ":" ";" _path_env "$ENV{INCLUDE}")
-	list(APPEND _inc_env "${_path_env}")
-	string(REPLACE ":" ";" _path_env "$ENV{C_INCLUDE_PATH}")
-	list(APPEND _inc_env "${_path_env}")
-	string(REPLACE ":" ";" _path_env "$ENV{CPATH}")
-	list(APPEND _inc_env "${_path_env}")
-	string(REPLACE ":" ";" _path_env "$ENV{INCLUDE_PATH}")
-	list(APPEND _inc_env "${_path_env}")
-      endif()
-    endif()
-    list(APPEND _inc_env "${CMAKE_PLATFORM_IMPLICIT_INCLUDE_DIRECTORIES}")
-    list(APPEND _inc_env "${CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES}")
-    list(REMOVE_DUPLICATES _inc_env)
-
-    # set paths where to look for
-    set(PATH_TO_LOOK_FOR "${_inc_env}")
-
-    # Try to find the fftw header in the given paths
-    # -------------------------------------------------
-    # call cmake macro to find the header path
-    if(BLAS_INCDIR)
-      set(BLAS_mkl.h_DIRS "BLAS_mkl.h_DIRS-NOTFOUND")
-      find_path(BLAS_mkl.h_DIRS
-	NAMES mkl.h
-	HINTS ${BLAS_INCDIR})
-    else()
-      if(BLAS_DIR)
-	set(BLAS_mkl.h_DIRS "BLAS_mkl.h_DIRS-NOTFOUND")
-	find_path(BLAS_mkl.h_DIRS
-	  NAMES mkl.h
-	  HINTS ${BLAS_DIR}
-	  PATH_SUFFIXES "include")
-      else()
-	set(BLAS_mkl.h_DIRS "BLAS_mkl.h_DIRS-NOTFOUND")
-	find_path(BLAS_mkl.h_DIRS
-	  NAMES mkl.h
-	  HINTS ${PATH_TO_LOOK_FOR})
-      endif()
-    endif()
-    mark_as_advanced(BLAS_mkl.h_DIRS)
-
-    # If found, add path to cmake variable
-    # ------------------------------------
-    if (BLAS_mkl.h_DIRS)
-      set(BLAS_INCLUDE_DIRS "${BLAS_mkl.h_DIRS}")
-    else ()
-      set(BLAS_INCLUDE_DIRS "BLAS_INCLUDE_DIRS-NOTFOUND")
-      if(NOT BLAS_FIND_QUIETLY)
-	message(STATUS "Looking for BLAS -- mkl.h not found")
-      endif()
-    endif()
-
-    if (WIN32)
-      string(REPLACE ":" ";" _libdir "$ENV{LIB}")
-    elseif (APPLE)
-      string(REPLACE ":" ";" _libdir "$ENV{DYLD_LIBRARY_PATH}")
-    else ()
-      string(REPLACE ":" ";" _libdir "$ENV{LD_LIBRARY_PATH}")
-    endif ()
-    list(APPEND _libdir "${CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES}")
-    list(APPEND _libdir "${CMAKE_C_IMPLICIT_LINK_DIRECTORIES}")
-    # libiomp5
-    # --------
-    set(OMP_iomp5_LIBRARY "OMP_iomp5_LIBRARY-NOTFOUND")
-    find_library(OMP_iomp5_LIBRARY
-      NAMES iomp5
-      HINTS ${_libdir}
-      )
-    mark_as_advanced(OMP_iomp5_LIBRARY)
-    set(OMP_LIB "")
-    # libgomp
-    # -------
-    set(OMP_gomp_LIBRARY "OMP_gomp_LIBRARY-NOTFOUND")
-    find_library(OMP_gomp_LIBRARY
-      NAMES gomp
-      HINTS ${_libdir}
-      )
-    mark_as_advanced(OMP_gomp_LIBRARY)
-    # choose one or another depending on the compilo
-    if (CMAKE_C_COMPILER_ID STREQUAL "GNU")
-      if (OMP_gomp_LIBRARY)
-	set(OMP_LIB "${OMP_gomp_LIBRARY}")
-      endif()
-    else()
-      if (OMP_iomp5_LIBRARY)
-	set(OMP_LIB "${OMP_iomp5_LIBRARY}")
-      endif()
-    endif()
-
-    if (UNIX AND NOT WIN32)
-      # m
-      find_library(M_LIBRARY
-	NAMES m
-	HINTS ${_libdir})
-      mark_as_advanced(M_LIBRARY)
-      if(M_LIBRARY)
-	set(LM "-lm")
-      else()
-	set(LM "")
-      endif()
-      # Fortran
-      set(LGFORTRAN "")
-      if (CMAKE_C_COMPILER_ID MATCHES "GNU")
-	find_library(
-	  FORTRAN_gfortran_LIBRARY
-	  NAMES gfortran
-	  HINTS ${_libdir}
-	  )
-	mark_as_advanced(FORTRAN_gfortran_LIBRARY)
-	if (FORTRAN_gfortran_LIBRARY)
-	  set(LGFORTRAN "${FORTRAN_gfortran_LIBRARY}")
-	endif()
-      elseif (CMAKE_C_COMPILER_ID MATCHES "Intel")
-	find_library(
-	  FORTRAN_ifcore_LIBRARY
-	  NAMES ifcore
-	  HINTS ${_libdir}
-	  )
-	mark_as_advanced(FORTRAN_ifcore_LIBRARY)
-	if (FORTRAN_ifcore_LIBRARY)
-	  set(LGFORTRAN "{FORTRAN_ifcore_LIBRARY}")
-	endif()
-      endif()
-      set(BLAS_COMPILER_FLAGS "")
-      if (NOT BLA_VENDOR STREQUAL "Intel10_64lp_seq")
-	if (CMAKE_C_COMPILER_ID STREQUAL "Intel")
-	  list(APPEND BLAS_COMPILER_FLAGS "-openmp")
-	endif()
-	if (CMAKE_C_COMPILER_ID STREQUAL "GNU")
-	  list(APPEND BLAS_COMPILER_FLAGS "-fopenmp")
-	endif()
-      endif()
-      if (CMAKE_C_COMPILER_ID STREQUAL "GNU")
-	if (BLA_VENDOR STREQUAL "Intel10_32")
-	  list(APPEND BLAS_COMPILER_FLAGS "-m32")
-	else()
-	  list(APPEND BLAS_COMPILER_FLAGS "-m64")
-	endif()
-	if (NOT BLA_VENDOR STREQUAL "Intel10_64lp_seq")
-	  list(APPEND OMP_LIB "-ldl")
-	endif()
-	if (ENV_MKLROOT)
-	  list(APPEND BLAS_COMPILER_FLAGS "-I${ENV_MKLROOT}/include")
-	endif()
-      endif()
-
-      set(additional_flags "")
-      if (CMAKE_C_COMPILER_ID STREQUAL "GNU" AND CMAKE_SYSTEM_NAME STREQUAL "Linux")
-	set(additional_flags "-Wl,--no-as-needed")
-      endif()
-    endif ()
-
-    if (_LANGUAGES_ MATCHES C OR _LANGUAGES_ MATCHES CXX)
-      if(BLAS_FIND_QUIETLY OR NOT BLAS_FIND_REQUIRED)
-	find_dependency(Threads)
-      else()
-	find_dependency(Threads REQUIRED)
-      endif()
-
-      set(BLAS_SEARCH_LIBS "")
-
-      if(BLA_F95)
-
-	set(BLAS_mkl_SEARCH_SYMBOL SGEMM)
-	set(_LIBRARIES BLAS95_LIBRARIES)
-	if (WIN32)
-	  if (BLA_STATIC)
-	    set(BLAS_mkl_DLL_SUFFIX "")
-	  else()
-	    set(BLAS_mkl_DLL_SUFFIX "_dll")
-	  endif()
-
-	  # Find the main file (32-bit or 64-bit)
-	  set(BLAS_SEARCH_LIBS_WIN_MAIN "")
-	  if (BLA_VENDOR STREQUAL "Intel10_32" OR BLA_VENDOR STREQUAL "All")
-	    list(APPEND BLAS_SEARCH_LIBS_WIN_MAIN
-	      "mkl_blas95${BLAS_mkl_DLL_SUFFIX} mkl_intel_c${BLAS_mkl_DLL_SUFFIX}")
-	  endif()
-	  if (BLA_VENDOR STREQUAL "Intel10_64lp*" OR BLA_VENDOR STREQUAL "All")
-	    list(APPEND BLAS_SEARCH_LIBS_WIN_MAIN
-	      "mkl_blas95_lp64${BLAS_mkl_DLL_SUFFIX} mkl_intel_lp64${BLAS_mkl_DLL_SUFFIX}")
-	  endif ()
-
-	  # Add threading/sequential libs
-	  set(BLAS_SEARCH_LIBS_WIN_THREAD "")
-	  if (BLA_VENDOR STREQUAL "*_seq" OR BLA_VENDOR STREQUAL "All")
-	    list(APPEND BLAS_SEARCH_LIBS_WIN_THREAD
-	      "mkl_sequential${BLAS_mkl_DLL_SUFFIX}")
-	  endif()
-	  if (NOT BLA_VENDOR STREQUAL "*_seq" OR BLA_VENDOR STREQUAL "All")
-	    # old version
-	    list(APPEND BLAS_SEARCH_LIBS_WIN_THREAD
-	      "libguide40 mkl_intel_thread${BLAS_mkl_DLL_SUFFIX}")
-	    # mkl >= 10.3
-	    list(APPEND BLAS_SEARCH_LIBS_WIN_THREAD
-	      "libiomp5md mkl_intel_thread${BLAS_mkl_DLL_SUFFIX}")
-	  endif()
-
-	  # Cartesian product of the above
-	  foreach (MAIN ${BLAS_SEARCH_LIBS_WIN_MAIN})
-	    foreach (THREAD ${BLAS_SEARCH_LIBS_WIN_THREAD})
-	      list(APPEND BLAS_SEARCH_LIBS
-		"${MAIN} ${THREAD} mkl_core${BLAS_mkl_DLL_SUFFIX}")
-	    endforeach()
-	  endforeach()
-	else ()
-	  if (BLA_VENDOR STREQUAL "Intel10_32" OR BLA_VENDOR STREQUAL "All")
-	    list(APPEND BLAS_SEARCH_LIBS
-	      "mkl_blas95 mkl_intel mkl_intel_thread mkl_core guide")
-	  endif ()
-	  if (BLA_VENDOR STREQUAL "Intel10_64lp" OR BLA_VENDOR STREQUAL "All")
-	    # old version
-	    list(APPEND BLAS_SEARCH_LIBS
-	      "mkl_blas95 mkl_intel_lp64 mkl_intel_thread mkl_core guide")
-	    # mkl >= 10.3
-	    if (CMAKE_C_COMPILER_ID STREQUAL "Intel")
-	      list(APPEND BLAS_SEARCH_LIBS
-		"mkl_blas95_lp64 mkl_intel_lp64 mkl_intel_thread mkl_core")
-	    endif()
-	    if (CMAKE_C_COMPILER_ID STREQUAL "GNU")
-	      list(APPEND BLAS_SEARCH_LIBS
-		"mkl_blas95_lp64 mkl_intel_lp64 mkl_gnu_thread mkl_core")
-	    endif()
-	  endif ()
-	  if (BLA_VENDOR STREQUAL "Intel10_64lp_seq" OR BLA_VENDOR STREQUAL "All")
-	    list(APPEND BLAS_SEARCH_LIBS
-	      "mkl_intel_lp64 mkl_sequential mkl_core")
-	    if (BLA_VENDOR STREQUAL "Intel10_64lp_seq")
-	      set(OMP_LIB "")
-	    endif()
-	  endif ()
-	endif ()
-
-      else ()
-
-	set(BLAS_mkl_SEARCH_SYMBOL sgemm)
-	set(_LIBRARIES BLAS_LIBRARIES)
-	if (WIN32)
-	  if (BLA_STATIC)
-	    set(BLAS_mkl_DLL_SUFFIX "")
-	  else()
-	    set(BLAS_mkl_DLL_SUFFIX "_dll")
-	  endif()
-
-	  # Find the main file (32-bit or 64-bit)
-	  set(BLAS_SEARCH_LIBS_WIN_MAIN "")
-	  if (BLA_VENDOR STREQUAL "Intel10_32" OR BLA_VENDOR STREQUAL "All")
-	    list(APPEND BLAS_SEARCH_LIBS_WIN_MAIN
-	      "mkl_intel_c${BLAS_mkl_DLL_SUFFIX}")
-	  endif()
-	  if (BLA_VENDOR STREQUAL "Intel10_64lp*" OR BLA_VENDOR STREQUAL "All")
-	    list(APPEND BLAS_SEARCH_LIBS_WIN_MAIN
-	      "mkl_intel_lp64${BLAS_mkl_DLL_SUFFIX}")
-	  endif ()
-
-	  # Add threading/sequential libs
-	  set(BLAS_SEARCH_LIBS_WIN_THREAD "")
-	  if (NOT BLA_VENDOR STREQUAL "*_seq" OR BLA_VENDOR STREQUAL "All")
-	    # old version
-	    list(APPEND BLAS_SEARCH_LIBS_WIN_THREAD
-	      "libguide40 mkl_intel_thread${BLAS_mkl_DLL_SUFFIX}")
-	    # mkl >= 10.3
-	    list(APPEND BLAS_SEARCH_LIBS_WIN_THREAD
-	      "libiomp5md mkl_intel_thread${BLAS_mkl_DLL_SUFFIX}")
-	  endif()
-	  if (BLA_VENDOR STREQUAL "*_seq" OR BLA_VENDOR STREQUAL "All")
-	    list(APPEND BLAS_SEARCH_LIBS_WIN_THREAD
-	      "mkl_sequential${BLAS_mkl_DLL_SUFFIX}")
-	  endif()
-
-	  # Cartesian product of the above
-	  foreach (MAIN ${BLAS_SEARCH_LIBS_WIN_MAIN})
-	    foreach (THREAD ${BLAS_SEARCH_LIBS_WIN_THREAD})
-	      list(APPEND BLAS_SEARCH_LIBS
-		"${MAIN} ${THREAD} mkl_core${BLAS_mkl_DLL_SUFFIX}")
-	    endforeach()
-	  endforeach()
-	else ()
-	  if (BLA_VENDOR STREQUAL "Intel10_32" OR BLA_VENDOR STREQUAL "All")
-	    list(APPEND BLAS_SEARCH_LIBS
-	      "mkl_intel mkl_intel_thread mkl_core guide")
-	  endif ()
-	  if (BLA_VENDOR STREQUAL "Intel10_64lp" OR BLA_VENDOR STREQUAL "All")
-	    # old version
-	    list(APPEND BLAS_SEARCH_LIBS
-	      "mkl_intel_lp64 mkl_intel_thread mkl_core guide")
-	    # mkl >= 10.3
-	    if (CMAKE_C_COMPILER_ID STREQUAL "Intel")
-	      list(APPEND BLAS_SEARCH_LIBS
-		"mkl_intel_lp64 mkl_intel_thread mkl_core")
-	    endif()
-	    if (CMAKE_C_COMPILER_ID STREQUAL "GNU")
-	      list(APPEND BLAS_SEARCH_LIBS
-		"mkl_intel_lp64 mkl_gnu_thread mkl_core")
-	    endif()
-	  endif ()
-	  if (BLA_VENDOR STREQUAL "Intel10_64lp_seq" OR BLA_VENDOR STREQUAL "All")
-	    list(APPEND BLAS_SEARCH_LIBS
-	      "mkl_intel_lp64 mkl_sequential mkl_core")
-	    if (BLA_VENDOR STREQUAL "Intel10_64lp_seq")
-	      set(OMP_LIB "")
-	    endif()
-	  endif ()
-	  #older vesions of intel mkl libs
-	  if (BLA_VENDOR STREQUAL "Intel" OR BLA_VENDOR STREQUAL "All")
-	    list(APPEND BLAS_SEARCH_LIBS
-	      "mkl")
-	    list(APPEND BLAS_SEARCH_LIBS
-	      "mkl_ia32")
-	    list(APPEND BLAS_SEARCH_LIBS
-	      "mkl_em64t")
-	  endif ()
-	endif ()
-
-      endif ()
-
-      foreach (IT ${BLAS_SEARCH_LIBS})
-	string(REPLACE " " ";" SEARCH_LIBS ${IT})
-	if (${_LIBRARIES})
-	else ()
-	  check_fortran_libraries(
-	    ${_LIBRARIES}
-	    BLAS
-	    ${BLAS_mkl_SEARCH_SYMBOL}
-	    "${additional_flags}"
-	    "${SEARCH_LIBS}"
-	    "${OMP_LIB};${CMAKE_THREAD_LIBS_INIT};${LM}"
-	    )
-	  if(_LIBRARIES)
-	    set(BLAS_LINKER_FLAGS "${additional_flags}")
-	  endif()
-	endif()
-      endforeach ()
-      if(NOT BLAS_FIND_QUIETLY)
-        if(${_LIBRARIES})
-          message(STATUS "Looking for MKL BLAS: found")
-        else()
-          message(STATUS "Looking for MKL BLAS: not found")
-        endif()
-      endif()
-      if (${_LIBRARIES} AND NOT BLAS_VENDOR_FOUND)
-          set (BLAS_VENDOR_FOUND "Intel MKL")
-      endif()
-    endif ()
-  endif()
-endif ()
-
-
-if (BLA_VENDOR STREQUAL "Goto" OR BLA_VENDOR STREQUAL "All")
-
-  if(NOT BLAS_LIBRARIES)
-    # gotoblas (http://www.tacc.utexas.edu/tacc-projects/gotoblas2)
-    check_fortran_libraries(
-      BLAS_LIBRARIES
-      BLAS
-      sgemm
-      ""
-      "goto2"
-      ""
-      )
-    if(NOT BLAS_FIND_QUIETLY)
-      if(BLAS_LIBRARIES)
-	message(STATUS "Looking for Goto BLAS: found")
-      else()
-	message(STATUS "Looking for Goto BLAS: not found")
-      endif()
-    endif()
-  endif()
-  if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND)
-      set (BLAS_VENDOR_FOUND "Goto")
-  endif()
-
-endif ()
-
-
-# OpenBlas
-if (BLA_VENDOR STREQUAL "Open" OR BLA_VENDOR STREQUAL "All")
-
-  if(NOT BLAS_LIBRARIES)
-    # openblas (http://www.openblas.net/)
-    check_fortran_libraries(
-      BLAS_LIBRARIES
-      BLAS
-      sgemm
-      ""
-      "openblas"
-      ""
-      )
-    if(NOT BLAS_FIND_QUIETLY)
-      if(BLAS_LIBRARIES)
-	message(STATUS "Looking for Open BLAS: found")
-      else()
-	message(STATUS "Looking for Open BLAS: not found")
-      endif()
-    endif()
-  endif()
-  if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND)
-      set (BLAS_VENDOR_FOUND "Openblas")
-  endif()
-
-endif ()
-
-
-# EigenBlas
-if (BLA_VENDOR STREQUAL "Eigen" OR BLA_VENDOR STREQUAL "All")
-
-  if(NOT BLAS_LIBRARIES)
-    # eigenblas (http://eigen.tuxfamily.org/index.php?title=Main_Page)
-    check_fortran_libraries(
-      BLAS_LIBRARIES
-      BLAS
-      sgemm
-      ""
-      "eigen_blas"
-      ""
-      )
-    if(NOT BLAS_FIND_QUIETLY)
-      if(BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND)
-	message(STATUS "Looking for Eigen BLAS: found")
-      else()
-	message(STATUS "Looking for Eigen BLAS: not found")
-      endif()
-    endif()
-  endif()
-
-  if(NOT BLAS_LIBRARIES)
-    # eigenblas (http://eigen.tuxfamily.org/index.php?title=Main_Page)
-    check_fortran_libraries(
-      BLAS_LIBRARIES
-      BLAS
-      sgemm
-      ""
-      "eigen_blas_static"
-      ""
-      )
-    if(NOT BLAS_FIND_QUIETLY)
-      if(BLAS_LIBRARIES)
-	message(STATUS "Looking for Eigen BLAS: found")
-      else()
-	message(STATUS "Looking for Eigen BLAS: not found")
-      endif()
-    endif()
-  endif()
-  if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND)
-      set (BLAS_VENDOR_FOUND "Eigen")
-  endif()
-
-endif ()
-
-
-if (BLA_VENDOR STREQUAL "ATLAS" OR BLA_VENDOR STREQUAL "All")
-
-  if(NOT BLAS_LIBRARIES)
-    # BLAS in ATLAS library? (http://math-atlas.sourceforge.net/)
-    check_fortran_libraries(
-      BLAS_LIBRARIES
-      BLAS
-      dgemm
-      ""
-      "f77blas;atlas"
-      ""
-      )
-    if(NOT BLAS_FIND_QUIETLY)
-      if(BLAS_LIBRARIES)
-	message(STATUS "Looking for Atlas BLAS: found")
-      else()
-	message(STATUS "Looking for Atlas BLAS: not found")
-      endif()
-    endif()
-  endif()
-
-  if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND)
-      set (BLAS_VENDOR_FOUND "Atlas")
-  endif()
-
-endif ()
-
-
-# BLAS in PhiPACK libraries? (requires generic BLAS lib, too)
-if (BLA_VENDOR STREQUAL "PhiPACK" OR BLA_VENDOR STREQUAL "All")
-
-  if(NOT BLAS_LIBRARIES)
-    check_fortran_libraries(
-      BLAS_LIBRARIES
-      BLAS
-      sgemm
-      ""
-      "sgemm;dgemm;blas"
-      ""
-      )
-    if(NOT BLAS_FIND_QUIETLY)
-      if(BLAS_LIBRARIES)
-	message(STATUS "Looking for PhiPACK BLAS: found")
-      else()
-	message(STATUS "Looking for PhiPACK BLAS: not found")
-      endif()
-    endif()
-  endif()
-
-  if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND)
-      set (BLAS_VENDOR_FOUND "PhiPACK")
-  endif()
-
-endif ()
-
-
-# BLAS in Alpha CXML library?
-if (BLA_VENDOR STREQUAL "CXML" OR BLA_VENDOR STREQUAL "All")
-
-  if(NOT BLAS_LIBRARIES)
-    check_fortran_libraries(
-      BLAS_LIBRARIES
-      BLAS
-      sgemm
-      ""
-      "cxml"
-      ""
-      )
-    if(NOT BLAS_FIND_QUIETLY)
-      if(BLAS_LIBRARIES)
-	message(STATUS "Looking for CXML BLAS: found")
-      else()
-	message(STATUS "Looking for CXML BLAS: not found")
-      endif()
-    endif()
-  endif()
-
-  if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND)
-      set (BLAS_VENDOR_FOUND "CXML")
-  endif()
-
-endif ()
-
-
-# BLAS in Alpha DXML library? (now called CXML, see above)
-if (BLA_VENDOR STREQUAL "DXML" OR BLA_VENDOR STREQUAL "All")
-
-  if(NOT BLAS_LIBRARIES)
-    check_fortran_libraries(
-      BLAS_LIBRARIES
-      BLAS
-      sgemm
-      ""
-      "dxml"
-      ""
-      )
-    if(NOT BLAS_FIND_QUIETLY)
-      if(BLAS_LIBRARIES)
-	message(STATUS "Looking for DXML BLAS: found")
-      else()
-	message(STATUS "Looking for DXML BLAS: not found")
-      endif()
-    endif()
-  endif()
-
-  if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND)
-      set (BLAS_VENDOR_FOUND "DXML")
-  endif()
-  
-endif ()
-
-
-# BLAS in Sun Performance library?
-if (BLA_VENDOR STREQUAL "SunPerf" OR BLA_VENDOR STREQUAL "All")
-
-  if(NOT BLAS_LIBRARIES)
-    check_fortran_libraries(
-      BLAS_LIBRARIES
-      BLAS
-      sgemm
-      "-xlic_lib=sunperf"
-      "sunperf;sunmath"
-      ""
-      )
-    if(BLAS_LIBRARIES)
-      set(BLAS_LINKER_FLAGS "-xlic_lib=sunperf")
-    endif()
-    if(NOT BLAS_FIND_QUIETLY)
-      if(BLAS_LIBRARIES)
-	message(STATUS "Looking for SunPerf BLAS: found")
-      else()
-	message(STATUS "Looking for SunPerf BLAS: not found")
-      endif()
-    endif()
-  endif()
-
-  if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND)
-      set (BLAS_VENDOR_FOUND "SunPerf")
-  endif()
-
-endif ()
-
-
-# BLAS in SCSL library?  (SGI/Cray Scientific Library)
-if (BLA_VENDOR STREQUAL "SCSL" OR BLA_VENDOR STREQUAL "All")
-
-  if(NOT BLAS_LIBRARIES)
-    check_fortran_libraries(
-      BLAS_LIBRARIES
-      BLAS
-      sgemm
-      ""
-      "scsl"
-      ""
-      )
-    if(NOT BLAS_FIND_QUIETLY)
-      if(BLAS_LIBRARIES)
-	message(STATUS "Looking for SCSL BLAS: found")
-      else()
-	message(STATUS "Looking for SCSL BLAS: not found")
-      endif()
-    endif()
-  endif()
-
-  if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND)
-      set (BLAS_VENDOR_FOUND "SunPerf")
-  endif()
-
-endif ()
-
-
-# BLAS in SGIMATH library?
-if (BLA_VENDOR STREQUAL "SGIMATH" OR BLA_VENDOR STREQUAL "All")
-
-  if(NOT BLAS_LIBRARIES)
-    check_fortran_libraries(
-      BLAS_LIBRARIES
-      BLAS
-      sgemm
-      ""
-      "complib.sgimath"
-      ""
-      )
-    if(NOT BLAS_FIND_QUIETLY)
-      if(BLAS_LIBRARIES)
-	message(STATUS "Looking for SGIMATH BLAS: found")
-      else()
-	message(STATUS "Looking for SGIMATH BLAS: not found")
-      endif()
-    endif()
-  endif()
-
-  if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND)
-      set (BLAS_VENDOR_FOUND "SGIMATH")
-  endif()
-
-endif ()
-
-
-# BLAS in IBM ESSL library (requires generic BLAS lib, too)
-if (BLA_VENDOR STREQUAL "IBMESSL" OR BLA_VENDOR STREQUAL "All")
-
-  if(NOT BLAS_LIBRARIES)
-    check_fortran_libraries(
-      BLAS_LIBRARIES
-      BLAS
-      sgemm
-      ""
-      "essl;xlfmath;xlf90_r;blas"
-      ""
-      )
-    if(NOT BLAS_FIND_QUIETLY)
-      if(BLAS_LIBRARIES)
-	message(STATUS "Looking for IBM ESSL BLAS: found")
-      else()
-	message(STATUS "Looking for IBM ESSL BLAS: not found")
-      endif()
-    endif()
-  endif()
-
-  if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND)
-      set (BLAS_VENDOR_FOUND "IBM ESSL")
-  endif()
-
-endif ()
-
-# BLAS in IBM ESSL_MT library (requires generic BLAS lib, too)
-if (BLA_VENDOR STREQUAL "IBMESSLMT" OR BLA_VENDOR STREQUAL "All")
-
-  if(NOT BLAS_LIBRARIES)
-    check_fortran_libraries(
-      BLAS_LIBRARIES
-      BLAS
-      sgemm
-      ""
-      "esslsmp;xlsmp;xlfmath;xlf90_r;blas"
-      ""
-      )
-    if(NOT BLAS_FIND_QUIETLY)
-      if(BLAS_LIBRARIES)
-	message(STATUS "Looking for IBM ESSL MT BLAS: found")
-      else()
-	message(STATUS "Looking for IBM ESSL MT BLAS: not found")
-      endif()
-    endif()
-  endif()
-
-  if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND)
-      set (BLAS_VENDOR_FOUND "IBM ESSL MT")
-  endif()
-
-endif ()
-
-
-#BLAS in acml library?
-if (BLA_VENDOR MATCHES "ACML.*" OR BLA_VENDOR STREQUAL "All")
-
-  if( ((BLA_VENDOR STREQUAL "ACML") AND (NOT BLAS_ACML_LIB_DIRS)) OR
-      ((BLA_VENDOR STREQUAL "ACML_MP") AND (NOT BLAS_ACML_MP_LIB_DIRS)) OR
-      ((BLA_VENDOR STREQUAL "ACML_GPU") AND (NOT BLAS_ACML_GPU_LIB_DIRS)))
-
-    # try to find acml in "standard" paths
-    if( WIN32 )
-      file( GLOB _ACML_ROOT "C:/AMD/acml*/ACML-EULA.txt" )
-    else()
-      file( GLOB _ACML_ROOT "/opt/acml*/ACML-EULA.txt" )
-    endif()
-    if( WIN32 )
-      file( GLOB _ACML_GPU_ROOT "C:/AMD/acml*/GPGPUexamples" )
-    else()
-      file( GLOB _ACML_GPU_ROOT "/opt/acml*/GPGPUexamples" )
-    endif()
-    list(GET _ACML_ROOT 0 _ACML_ROOT)
-    list(GET _ACML_GPU_ROOT 0 _ACML_GPU_ROOT)
-
-    if( _ACML_ROOT )
-
-      get_filename_component( _ACML_ROOT ${_ACML_ROOT} PATH )
-      if( SIZEOF_INTEGER EQUAL 8 )
-	set( _ACML_PATH_SUFFIX "_int64" )
-      else()
-	set( _ACML_PATH_SUFFIX "" )
-      endif()
-      if( CMAKE_Fortran_COMPILER_ID STREQUAL "Intel" )
-	set( _ACML_COMPILER32 "ifort32" )
-	set( _ACML_COMPILER64 "ifort64" )
-      elseif( CMAKE_Fortran_COMPILER_ID STREQUAL "SunPro" )
-	set( _ACML_COMPILER32 "sun32" )
-	set( _ACML_COMPILER64 "sun64" )
-      elseif( CMAKE_Fortran_COMPILER_ID STREQUAL "PGI" )
-	set( _ACML_COMPILER32 "pgi32" )
-	if( WIN32 )
-	  set( _ACML_COMPILER64 "win64" )
-	else()
-	  set( _ACML_COMPILER64 "pgi64" )
-	endif()
-      elseif( CMAKE_Fortran_COMPILER_ID STREQUAL "Open64" )
-	# 32 bit builds not supported on Open64 but for code simplicity
-	# We'll just use the same directory twice
-	set( _ACML_COMPILER32 "open64_64" )
-	set( _ACML_COMPILER64 "open64_64" )
-      elseif( CMAKE_Fortran_COMPILER_ID STREQUAL "NAG" )
-	set( _ACML_COMPILER32 "nag32" )
-	set( _ACML_COMPILER64 "nag64" )
-      else()
-	set( _ACML_COMPILER32 "gfortran32" )
-	set( _ACML_COMPILER64 "gfortran64" )
-      endif()
-
-      if( BLA_VENDOR STREQUAL "ACML_MP" )
-	set(_ACML_MP_LIB_DIRS
-	  "${_ACML_ROOT}/${_ACML_COMPILER32}_mp${_ACML_PATH_SUFFIX}/lib"
-	  "${_ACML_ROOT}/${_ACML_COMPILER64}_mp${_ACML_PATH_SUFFIX}/lib" )
-      else()
-	set(_ACML_LIB_DIRS
-	  "${_ACML_ROOT}/${_ACML_COMPILER32}${_ACML_PATH_SUFFIX}/lib"
-	  "${_ACML_ROOT}/${_ACML_COMPILER64}${_ACML_PATH_SUFFIX}/lib" )
-      endif()
-
-    endif()
-
-  elseif(BLAS_${BLA_VENDOR}_LIB_DIRS)
-
-    set(_${BLA_VENDOR}_LIB_DIRS ${BLAS_${BLA_VENDOR}_LIB_DIRS})
-
-  endif()
-
-  if( BLA_VENDOR STREQUAL "ACML_MP" )
-    foreach( BLAS_ACML_MP_LIB_DIRS ${_ACML_MP_LIB_DIRS})
-      check_fortran_libraries (
-	BLAS_LIBRARIES
-	BLAS
-	sgemm
-	"" "acml_mp;acml_mv" "" ${BLAS_ACML_MP_LIB_DIRS}
-	)
-      if( BLAS_LIBRARIES )
-	break()
-      endif()
-    endforeach()
-  elseif( BLA_VENDOR STREQUAL "ACML_GPU" )
-    foreach( BLAS_ACML_GPU_LIB_DIRS ${_ACML_GPU_LIB_DIRS})
-      check_fortran_libraries (
-	BLAS_LIBRARIES
-	BLAS
-	sgemm
-	"" "acml;acml_mv;CALBLAS" "" ${BLAS_ACML_GPU_LIB_DIRS}
-	)
-      if( BLAS_LIBRARIES )
-	break()
-      endif()
-    endforeach()
-  else()
-    foreach( BLAS_ACML_LIB_DIRS ${_ACML_LIB_DIRS} )
-      check_fortran_libraries (
-	BLAS_LIBRARIES
-	BLAS
-	sgemm
-	"" "acml;acml_mv" "" ${BLAS_ACML_LIB_DIRS}
-	)
-      if( BLAS_LIBRARIES )
-	break()
-      endif()
-    endforeach()
-  endif()
-
-  # Either acml or acml_mp should be in LD_LIBRARY_PATH but not both
-  if(NOT BLAS_LIBRARIES)
-    check_fortran_libraries(
-      BLAS_LIBRARIES
-      BLAS
-      sgemm
-      ""
-      "acml;acml_mv"
-      ""
-      )
-    if(NOT BLAS_FIND_QUIETLY)
-      if(BLAS_LIBRARIES)
-	message(STATUS "Looking for ACML BLAS: found")
-      else()
-	message(STATUS "Looking for ACML BLAS: not found")
-      endif()
-    endif()
-  endif()
-
-  if(NOT BLAS_LIBRARIES)
-    check_fortran_libraries(
-      BLAS_LIBRARIES
-      BLAS
-      sgemm
-      ""
-      "acml_mp;acml_mv"
-      ""
-      )
-    if(NOT BLAS_FIND_QUIETLY)
-      if(BLAS_LIBRARIES)
-	message(STATUS "Looking for ACML BLAS: found")
-      else()
-	message(STATUS "Looking for ACML BLAS: not found")
-      endif()
-    endif()
-  endif()
-
-  if(NOT BLAS_LIBRARIES)
-    check_fortran_libraries(
-      BLAS_LIBRARIES
-      BLAS
-      sgemm
-      ""
-      "acml;acml_mv;CALBLAS"
-      ""
-      )
-    if(NOT BLAS_FIND_QUIETLY)
-      if(BLAS_LIBRARIES)
-	message(STATUS "Looking for ACML BLAS: found")
-      else()
-	message(STATUS "Looking for ACML BLAS: not found")
-      endif()
-    endif()
-  endif()
-
-  if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND)
-      set (BLAS_VENDOR_FOUND "ACML")
-  endif()
-
-endif () # ACML
-
-
-# Apple BLAS library?
-if (BLA_VENDOR STREQUAL "Apple" OR BLA_VENDOR STREQUAL "All")
-
-  if(NOT BLAS_LIBRARIES)
-    check_fortran_libraries(
-      BLAS_LIBRARIES
-      BLAS
-      dgemm
-      ""
-      "Accelerate"
-      ""
-      )
-    if(NOT BLAS_FIND_QUIETLY)
-      if(BLAS_LIBRARIES)
-	message(STATUS "Looking for Apple BLAS: found")
-      else()
-	message(STATUS "Looking for Apple BLAS: not found")
-      endif()
-    endif()
-  endif()
-
-  if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND)
-      set (BLAS_VENDOR_FOUND "Apple Accelerate")
-  endif()
-
-endif ()
-
-
-if (BLA_VENDOR STREQUAL "NAS" OR BLA_VENDOR STREQUAL "All")
-
-  if ( NOT BLAS_LIBRARIES )
-    check_fortran_libraries(
-      BLAS_LIBRARIES
-      BLAS
-      dgemm
-      ""
-      "vecLib"
-      ""
-      )
-    if(NOT BLAS_FIND_QUIETLY)
-      if(BLAS_LIBRARIES)
-	message(STATUS "Looking for NAS BLAS: found")
-      else()
-	message(STATUS "Looking for NAS BLAS: not found")
-      endif()
-    endif()
-  endif ()
-
-  if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND)
-      set (BLAS_VENDOR_FOUND "NAS")
-  endif()
-
-endif ()
-
-
-# Generic BLAS library?
-if (BLA_VENDOR STREQUAL "Generic" OR BLA_VENDOR STREQUAL "All")
-
-  set(BLAS_SEARCH_LIBS "blas;blas_LINUX;blas_MAC;blas_WINDOWS;refblas")
-  foreach (SEARCH_LIB ${BLAS_SEARCH_LIBS})
-    if (BLAS_LIBRARIES)
-    else ()
-      check_fortran_libraries(
-	BLAS_LIBRARIES
-	BLAS
-	sgemm
-	""
-	"${SEARCH_LIB}"
-	"${LGFORTRAN}"
-	)
-      if(NOT BLAS_FIND_QUIETLY)
-	if(BLAS_LIBRARIES)
-	  message(STATUS "Looking for Generic BLAS: found")
-	else()
-	  message(STATUS "Looking for Generic BLAS: not found")
-	endif()
-      endif()
-    endif()
-  endforeach ()
-
-  if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND)
-      set (BLAS_VENDOR_FOUND "Netlib or other Generic libblas")
-  endif()
-
-endif ()
-
-
-if(BLA_F95)
-
-  if(BLAS95_LIBRARIES)
-    set(BLAS95_FOUND TRUE)
-  else()
-    set(BLAS95_FOUND FALSE)
-  endif()
-
-  if(NOT BLAS_FIND_QUIETLY)
-    if(BLAS95_FOUND)
-      message(STATUS "A library with BLAS95 API found.")
-      message(STATUS "BLAS_LIBRARIES ${BLAS_LIBRARIES}")
-    else()
-      message(WARNING "BLA_VENDOR has been set to ${BLA_VENDOR} but blas 95 libraries could not be found or check of symbols failed."
-	"\nPlease indicate where to find blas libraries. You have three options:\n"
-	"- Option 1: Provide the installation directory of BLAS library with cmake option: -DBLAS_DIR=your/path/to/blas\n"
-	"- Option 2: Provide the directory where to find BLAS libraries with cmake option: -DBLAS_LIBDIR=your/path/to/blas/libs\n"
-	"- Option 3: Update your environment variable (Linux: LD_LIBRARY_PATH, Windows: LIB, Mac: DYLD_LIBRARY_PATH)\n"
-	"\nTo follow libraries detection more precisely you can activate a verbose mode with -DBLAS_VERBOSE=ON at cmake configure."
-	"\nYou could also specify a BLAS vendor to look for by setting -DBLA_VENDOR=blas_vendor_name."
-	"\nList of possible BLAS vendor: Goto, ATLAS PhiPACK, CXML, DXML, SunPerf, SCSL, SGIMATH, IBMESSL, Intel10_32 (intel mkl v10 32 bit),"
-	"Intel10_64lp (intel mkl v10 64 bit, lp thread model, lp64 model), Intel10_64lp_seq (intel mkl v10 64 bit, sequential code, lp64 model),"
-	"Intel( older versions of mkl 32 and 64 bit), ACML, ACML_MP, ACML_GPU, Apple, NAS, Generic")
-      if(BLAS_FIND_REQUIRED)
-	message(FATAL_ERROR
-	  "A required library with BLAS95 API not found. Please specify library location.")
-      else()
-	message(STATUS
-	  "A library with BLAS95 API not found. Please specify library location.")
-      endif()
-    endif()
-  endif()
-
-  set(BLAS_FOUND TRUE)
-  set(BLAS_LIBRARIES "${BLAS95_LIBRARIES}")
-
-else()
-
-  if(BLAS_LIBRARIES)
-    set(BLAS_FOUND TRUE)
-  else()
-    set(BLAS_FOUND FALSE)
-  endif()
-
-  if(NOT BLAS_FIND_QUIETLY)
-    if(BLAS_FOUND)
-      message(STATUS "A library with BLAS API found.")
-      message(STATUS "BLAS_LIBRARIES ${BLAS_LIBRARIES}")
-    else()
-      message(WARNING "BLA_VENDOR has been set to ${BLA_VENDOR} but blas libraries could not be found or check of symbols failed."
-	"\nPlease indicate where to find blas libraries. You have three options:\n"
-	"- Option 1: Provide the installation directory of BLAS library with cmake option: -DBLAS_DIR=your/path/to/blas\n"
-	"- Option 2: Provide the directory where to find BLAS libraries with cmake option: -DBLAS_LIBDIR=your/path/to/blas/libs\n"
-	"- Option 3: Update your environment variable (Linux: LD_LIBRARY_PATH, Windows: LIB, Mac: DYLD_LIBRARY_PATH)\n"
-	"\nTo follow libraries detection more precisely you can activate a verbose mode with -DBLAS_VERBOSE=ON at cmake configure."
-	"\nYou could also specify a BLAS vendor to look for by setting -DBLA_VENDOR=blas_vendor_name."
-	"\nList of possible BLAS vendor: Goto, ATLAS PhiPACK, CXML, DXML, SunPerf, SCSL, SGIMATH, IBMESSL, Intel10_32 (intel mkl v10 32 bit),"
-	"Intel10_64lp (intel mkl v10 64 bit, lp thread model, lp64 model), Intel10_64lp_seq (intel mkl v10 64 bit, sequential code, lp64 model),"
-	"Intel( older versions of mkl 32 and 64 bit), ACML, ACML_MP, ACML_GPU, Apple, NAS, Generic")
-      if(BLAS_FIND_REQUIRED)
-	message(FATAL_ERROR
-	  "A required library with BLAS API not found. Please specify library location.")
-      else()
-	message(STATUS
-	  "A library with BLAS API not found. Please specify library location.")
-      endif()
-    endif()
-  endif()
-
-endif()
-
-set(CMAKE_FIND_LIBRARY_SUFFIXES ${_blas_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES})
-
-if (BLAS_FOUND)
-  list(GET BLAS_LIBRARIES 0 first_lib)
-  get_filename_component(first_lib_path "${first_lib}" PATH)
-  if (${first_lib_path} MATCHES "(/lib(32|64)?$)|(/lib/intel64$|/lib/ia32$)")
-    string(REGEX REPLACE "(/lib(32|64)?$)|(/lib/intel64$|/lib/ia32$)" "" not_cached_dir "${first_lib_path}")
-    set(BLAS_DIR_FOUND "${not_cached_dir}" CACHE PATH "Installation directory of BLAS library" FORCE)
-  else()
-    set(BLAS_DIR_FOUND "${first_lib_path}" CACHE PATH "Installation directory of BLAS library" FORCE)
-  endif()
-endif()
-mark_as_advanced(BLAS_DIR)
-mark_as_advanced(BLAS_DIR_FOUND)
diff --git a/libs/eigen/cmake/FindCLANG_FORMAT.cmake b/libs/eigen/cmake/FindCLANG_FORMAT.cmake
new file mode 100644
index 0000000..e00f19f
--- /dev/null
+++ b/libs/eigen/cmake/FindCLANG_FORMAT.cmake
@@ -0,0 +1,61 @@
+
+
+# Find clang-format
+#
+# CLANG_FORMAT_EXECUTABLE   - Path to clang-format executable
+# CLANG_FORMAT_FOUND        - True if the clang-format executable was found.
+# CLANG_FORMAT_VERSION      - The version of clang-format found
+#
+# Copyright 2009-2020 The VOTCA Development Team (http://www.votca.org)
+#
+# Licensed under the Mozilla Public License Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.mozilla.org/en-US/MPL/2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+find_program(CLANG_FORMAT_EXECUTABLE
+             NAMES
+                   clang-format-9
+                   clang-format
+                   clang-format-11
+                   clang-format-10
+                   clang-format-8
+                   clang-format-7
+
+             DOC "clang-format executable")
+mark_as_advanced(CLANG_FORMAT_EXECUTABLE)
+
+# Extract version from command "clang-format -version"
+if(CLANG_FORMAT_EXECUTABLE)
+  execute_process(COMMAND ${CLANG_FORMAT_EXECUTABLE} -version
+                  OUTPUT_VARIABLE clang_format_version
+                  ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
+
+  if(clang_format_version MATCHES "^.*clang-format version .*")
+    # clang_format_version sample: "clang-format version 3.9.1-4ubuntu3~16.04.1
+    # (tags/RELEASE_391/rc2)"
+    string(REGEX
+           REPLACE "^.*clang-format version ([.0-9]+).*"
+                   "\\1"
+                   CLANG_FORMAT_VERSION
+                   "${clang_format_version}")
+    # CLANG_FORMAT_VERSION sample: "3.9.1"
+  else()
+    set(CLANG_FORMAT_VERSION 0.0)
+  endif()
+else()
+  set(CLANG_FORMAT_VERSION 0.0)
+endif()
+
+include(FindPackageHandleStandardArgs)
+# handle the QUIETLY and REQUIRED arguments and set CLANG_FORMAT_FOUND to TRUE
+# if all listed variables are TRUE
+find_package_handle_standard_args(CLANG_FORMAT REQUIRED_VARS CLANG_FORMAT_EXECUTABLE VERSION_VAR CLANG_FORMAT_VERSION)
diff --git a/libs/eigen/cmake/FindComputeCpp.cmake b/libs/eigen/cmake/FindComputeCpp.cmake
index 1c271f0..e200522 100644
--- a/libs/eigen/cmake/FindComputeCpp.cmake
+++ b/libs/eigen/cmake/FindComputeCpp.cmake
@@ -382,7 +382,7 @@ endfunction(__build_ir)
 #######################
 #
 #  Adds a SYCL compilation custom command associated with an existing
-#  target and sets a dependancy on that new command.
+#  target and sets a dependency on that new command.
 #
 #  TARGET : Name of the target to add SYCL to.
 #  SOURCES : Source files to be compiled for SYCL.
diff --git a/libs/eigen/cmake/FindEigen2.cmake b/libs/eigen/cmake/FindEigen2.cmake
deleted file mode 100644
index eb2709d..0000000
--- a/libs/eigen/cmake/FindEigen2.cmake
+++ /dev/null
@@ -1,80 +0,0 @@
-# - Try to find Eigen2 lib
-#
-# This module supports requiring a minimum version, e.g. you can do
-#   find_package(Eigen2 2.0.3)
-# to require version 2.0.3 to newer of Eigen2.
-#
-# Once done this will define
-#
-#  EIGEN2_FOUND - system has eigen lib with correct version
-#  EIGEN2_INCLUDE_DIR - the eigen include directory
-#  EIGEN2_VERSION - eigen version
-
-# Copyright (c) 2006, 2007 Montel Laurent, <montel@kde.org>
-# Copyright (c) 2008, 2009 Gael Guennebaud, <g.gael@free.fr>
-# Redistribution and use is allowed according to the terms of the BSD license.
-
-if(NOT Eigen2_FIND_VERSION)
-  if(NOT Eigen2_FIND_VERSION_MAJOR)
-    set(Eigen2_FIND_VERSION_MAJOR 2)
-  endif()
-  if(NOT Eigen2_FIND_VERSION_MINOR)
-    set(Eigen2_FIND_VERSION_MINOR 0)
-  endif()
-  if(NOT Eigen2_FIND_VERSION_PATCH)
-    set(Eigen2_FIND_VERSION_PATCH 0)
-  endif()
-
-  set(Eigen2_FIND_VERSION "${Eigen2_FIND_VERSION_MAJOR}.${Eigen2_FIND_VERSION_MINOR}.${Eigen2_FIND_VERSION_PATCH}")
-endif()
-
-macro(_eigen2_check_version)
-  file(READ "${EIGEN2_INCLUDE_DIR}/Eigen/src/Core/util/Macros.h" _eigen2_version_header)
-
-  string(REGEX MATCH "define[ \t]+EIGEN_WORLD_VERSION[ \t]+([0-9]+)" _eigen2_world_version_match "${_eigen2_version_header}")
-  set(EIGEN2_WORLD_VERSION "${CMAKE_MATCH_1}")
-  string(REGEX MATCH "define[ \t]+EIGEN_MAJOR_VERSION[ \t]+([0-9]+)" _eigen2_major_version_match "${_eigen2_version_header}")
-  set(EIGEN2_MAJOR_VERSION "${CMAKE_MATCH_1}")
-  string(REGEX MATCH "define[ \t]+EIGEN_MINOR_VERSION[ \t]+([0-9]+)" _eigen2_minor_version_match "${_eigen2_version_header}")
-  set(EIGEN2_MINOR_VERSION "${CMAKE_MATCH_1}")
-
-  set(EIGEN2_VERSION ${EIGEN2_WORLD_VERSION}.${EIGEN2_MAJOR_VERSION}.${EIGEN2_MINOR_VERSION})
-  if((${EIGEN2_WORLD_VERSION} NOTEQUAL 2) OR (${EIGEN2_MAJOR_VERSION} GREATER 10) OR (${EIGEN2_VERSION} VERSION_LESS ${Eigen2_FIND_VERSION}))
-    set(EIGEN2_VERSION_OK FALSE)
-  else()
-    set(EIGEN2_VERSION_OK TRUE)
-  endif()
-
-  if(NOT EIGEN2_VERSION_OK)
-
-    message(STATUS "Eigen2 version ${EIGEN2_VERSION} found in ${EIGEN2_INCLUDE_DIR}, "
-                   "but at least version ${Eigen2_FIND_VERSION} is required")
-  endif()
-endmacro()
-
-if (EIGEN2_INCLUDE_DIR)
-
-  # in cache already
-  _eigen2_check_version()
-  set(EIGEN2_FOUND ${EIGEN2_VERSION_OK})
-
-else ()
-
-find_path(EIGEN2_INCLUDE_DIR NAMES Eigen/Core
-     PATHS
-     ${INCLUDE_INSTALL_DIR}
-     ${KDE4_INCLUDE_DIR}
-     PATH_SUFFIXES eigen2
-   )
-
-if(EIGEN2_INCLUDE_DIR)
-  _eigen2_check_version()
-endif()
-
-include(FindPackageHandleStandardArgs)
-find_package_handle_standard_args(Eigen2 DEFAULT_MSG EIGEN2_INCLUDE_DIR EIGEN2_VERSION_OK)
-
-mark_as_advanced(EIGEN2_INCLUDE_DIR)
-
-endif()
-
diff --git a/libs/eigen/cmake/FindEigen3.cmake b/libs/eigen/cmake/FindEigen3.cmake
deleted file mode 100644
index 0b36805..0000000
--- a/libs/eigen/cmake/FindEigen3.cmake
+++ /dev/null
@@ -1,107 +0,0 @@
-# - Try to find Eigen3 lib
-#
-# This module supports requiring a minimum version, e.g. you can do
-#   find_package(Eigen3 3.1.2)
-# to require version 3.1.2 or newer of Eigen3.
-#
-# Once done this will define
-#
-#  EIGEN3_FOUND - system has eigen lib with correct version
-#  EIGEN3_INCLUDE_DIR - the eigen include directory
-#  EIGEN3_VERSION - eigen version
-#
-# and the following imported target:
-#
-#  Eigen3::Eigen - The header-only Eigen library
-#
-# This module reads hints about search locations from 
-# the following environment variables:
-#
-# EIGEN3_ROOT
-# EIGEN3_ROOT_DIR
-
-# Copyright (c) 2006, 2007 Montel Laurent, <montel@kde.org>
-# Copyright (c) 2008, 2009 Gael Guennebaud, <g.gael@free.fr>
-# Copyright (c) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
-# Redistribution and use is allowed according to the terms of the 2-clause BSD license.
-
-if(NOT Eigen3_FIND_VERSION)
-  if(NOT Eigen3_FIND_VERSION_MAJOR)
-    set(Eigen3_FIND_VERSION_MAJOR 2)
-  endif()
-  if(NOT Eigen3_FIND_VERSION_MINOR)
-    set(Eigen3_FIND_VERSION_MINOR 91)
-  endif()
-  if(NOT Eigen3_FIND_VERSION_PATCH)
-    set(Eigen3_FIND_VERSION_PATCH 0)
-  endif()
-
-  set(Eigen3_FIND_VERSION "${Eigen3_FIND_VERSION_MAJOR}.${Eigen3_FIND_VERSION_MINOR}.${Eigen3_FIND_VERSION_PATCH}")
-endif()
-
-macro(_eigen3_check_version)
-  file(READ "${EIGEN3_INCLUDE_DIR}/Eigen/src/Core/util/Macros.h" _eigen3_version_header)
-
-  string(REGEX MATCH "define[ \t]+EIGEN_WORLD_VERSION[ \t]+([0-9]+)" _eigen3_world_version_match "${_eigen3_version_header}")
-  set(EIGEN3_WORLD_VERSION "${CMAKE_MATCH_1}")
-  string(REGEX MATCH "define[ \t]+EIGEN_MAJOR_VERSION[ \t]+([0-9]+)" _eigen3_major_version_match "${_eigen3_version_header}")
-  set(EIGEN3_MAJOR_VERSION "${CMAKE_MATCH_1}")
-  string(REGEX MATCH "define[ \t]+EIGEN_MINOR_VERSION[ \t]+([0-9]+)" _eigen3_minor_version_match "${_eigen3_version_header}")
-  set(EIGEN3_MINOR_VERSION "${CMAKE_MATCH_1}")
-
-  set(EIGEN3_VERSION ${EIGEN3_WORLD_VERSION}.${EIGEN3_MAJOR_VERSION}.${EIGEN3_MINOR_VERSION})
-  if(${EIGEN3_VERSION} VERSION_LESS ${Eigen3_FIND_VERSION})
-    set(EIGEN3_VERSION_OK FALSE)
-  else()
-    set(EIGEN3_VERSION_OK TRUE)
-  endif()
-
-  if(NOT EIGEN3_VERSION_OK)
-
-    message(STATUS "Eigen3 version ${EIGEN3_VERSION} found in ${EIGEN3_INCLUDE_DIR}, "
-                   "but at least version ${Eigen3_FIND_VERSION} is required")
-  endif()
-endmacro()
-
-if (EIGEN3_INCLUDE_DIR)
-
-  # in cache already
-  _eigen3_check_version()
-  set(EIGEN3_FOUND ${EIGEN3_VERSION_OK})
-  set(Eigen3_FOUND ${EIGEN3_VERSION_OK})
-
-else ()
-  
-  # search first if an Eigen3Config.cmake is available in the system,
-  # if successful this would set EIGEN3_INCLUDE_DIR and the rest of
-  # the script will work as usual
-  find_package(Eigen3 ${Eigen3_FIND_VERSION} NO_MODULE QUIET)
-
-  if(NOT EIGEN3_INCLUDE_DIR)
-    find_path(EIGEN3_INCLUDE_DIR NAMES signature_of_eigen3_matrix_library
-        HINTS
-        ENV EIGEN3_ROOT 
-        ENV EIGEN3_ROOT_DIR
-        PATHS
-        ${CMAKE_INSTALL_PREFIX}/include
-        ${KDE4_INCLUDE_DIR}
-        PATH_SUFFIXES eigen3 eigen
-      )
-  endif()
-
-  if(EIGEN3_INCLUDE_DIR)
-    _eigen3_check_version()
-  endif()
-
-  include(FindPackageHandleStandardArgs)
-  find_package_handle_standard_args(Eigen3 DEFAULT_MSG EIGEN3_INCLUDE_DIR EIGEN3_VERSION_OK)
-
-  mark_as_advanced(EIGEN3_INCLUDE_DIR)
-
-endif()
-
-if(EIGEN3_FOUND AND NOT TARGET Eigen3::Eigen)
-  add_library(Eigen3::Eigen INTERFACE IMPORTED)
-  set_target_properties(Eigen3::Eigen PROPERTIES
-    INTERFACE_INCLUDE_DIRECTORIES "${EIGEN3_INCLUDE_DIR}")
-endif()
diff --git a/libs/eigen/cmake/FindGLEW.cmake b/libs/eigen/cmake/FindGLEW.cmake
deleted file mode 100644
index 9d486d5..0000000
--- a/libs/eigen/cmake/FindGLEW.cmake
+++ /dev/null
@@ -1,105 +0,0 @@
-# Copyright (c) 2009 Boudewijn Rempt <boud@valdyas.org>                                                                                          
-#                                                                                                                                                
-# Redistribution and use is allowed according to the terms of the BSD license.                                                                   
-# For details see the accompanying COPYING-CMAKE-SCRIPTS file. 
-# 
-# - try to find glew library and include files
-#  GLEW_INCLUDE_DIR, where to find GL/glew.h, etc.
-#  GLEW_LIBRARIES, the libraries to link against
-#  GLEW_FOUND, If false, do not try to use GLEW.
-# Also defined, but not for general use are:
-#  GLEW_GLEW_LIBRARY = the full path to the glew library.
-
-if (WIN32)
-
-  if(CYGWIN)
-
-    find_path( GLEW_INCLUDE_DIR GL/glew.h)
-
-    find_library( GLEW_GLEW_LIBRARY glew32
-      ${OPENGL_LIBRARY_DIR}
-      /usr/lib/w32api
-      /usr/X11R6/lib
-    )
-
-
-  else(CYGWIN)
-  
-    find_path( GLEW_INCLUDE_DIR GL/glew.h
-      $ENV{GLEW_ROOT_PATH}/include
-    )
-
-    find_library( GLEW_GLEW_LIBRARY
-      NAMES glew glew32
-      PATHS
-      $ENV{GLEW_ROOT_PATH}/lib
-      ${OPENGL_LIBRARY_DIR}
-    )
-
-  endif(CYGWIN)
-
-else (WIN32)
-
-  if (APPLE)
-# These values for Apple could probably do with improvement.
-    find_path( GLEW_INCLUDE_DIR glew.h
-      /System/Library/Frameworks/GLEW.framework/Versions/A/Headers
-      ${OPENGL_LIBRARY_DIR}
-    )
-    set(GLEW_GLEW_LIBRARY "-framework GLEW" CACHE STRING "GLEW library for OSX")
-    set(GLEW_cocoa_LIBRARY "-framework Cocoa" CACHE STRING "Cocoa framework for OSX")
-  else (APPLE)
-
-    find_path( GLEW_INCLUDE_DIR GL/glew.h
-      /usr/include/GL
-      /usr/openwin/share/include
-      /usr/openwin/include
-      /usr/X11R6/include
-      /usr/include/X11
-      /opt/graphics/OpenGL/include
-      /opt/graphics/OpenGL/contrib/libglew
-    )
-
-    find_library( GLEW_GLEW_LIBRARY GLEW
-      /usr/openwin/lib
-      /usr/X11R6/lib
-    )
-
-  endif (APPLE)
-
-endif (WIN32)
-
-set( GLEW_FOUND "NO" )
-if(GLEW_INCLUDE_DIR)
-  if(GLEW_GLEW_LIBRARY)
-    # Is -lXi and -lXmu required on all platforms that have it?
-    # If not, we need some way to figure out what platform we are on.
-    set( GLEW_LIBRARIES
-      ${GLEW_GLEW_LIBRARY}
-      ${GLEW_cocoa_LIBRARY}
-    )
-    set( GLEW_FOUND "YES" )
-
-#The following deprecated settings are for backwards compatibility with CMake1.4
-    set (GLEW_LIBRARY ${GLEW_LIBRARIES})
-    set (GLEW_INCLUDE_PATH ${GLEW_INCLUDE_DIR})
-
-  endif(GLEW_GLEW_LIBRARY)
-endif(GLEW_INCLUDE_DIR)
-
-if(GLEW_FOUND)
-  if(NOT GLEW_FIND_QUIETLY)
-    message(STATUS "Found Glew: ${GLEW_LIBRARIES}")
-  endif(NOT GLEW_FIND_QUIETLY)
-else(GLEW_FOUND)
-  if(GLEW_FIND_REQUIRED)
-    message(FATAL_ERROR "Could not find Glew")
-  endif(GLEW_FIND_REQUIRED)
-endif(GLEW_FOUND)
-
-mark_as_advanced(
-  GLEW_INCLUDE_DIR
-  GLEW_GLEW_LIBRARY
-  GLEW_Xmu_LIBRARY
-  GLEW_Xi_LIBRARY
-)
diff --git a/libs/eigen/cmake/FindGSL.cmake b/libs/eigen/cmake/FindGSL.cmake
deleted file mode 100644
index 8632232..0000000
--- a/libs/eigen/cmake/FindGSL.cmake
+++ /dev/null
@@ -1,170 +0,0 @@
-# Try to find gnu scientific library GSL
-# See 
-# http://www.gnu.org/software/gsl/  and
-# http://gnuwin32.sourceforge.net/packages/gsl.htm
-#
-# Once run this will define: 
-# 
-# GSL_FOUND       = system has GSL lib
-#
-# GSL_LIBRARIES   = full path to the libraries
-#    on Unix/Linux with additional linker flags from "gsl-config --libs"
-# 
-# CMAKE_GSL_CXX_FLAGS  = Unix compiler flags for GSL, essentially "`gsl-config --cxxflags`"
-#
-# GSL_INCLUDE_DIR      = where to find headers 
-#
-# GSL_LINK_DIRECTORIES = link directories, useful for rpath on Unix
-# GSL_EXE_LINKER_FLAGS = rpath on Unix
-#
-# Felix Woelk 07/2004
-# Jan Woetzel
-#
-# www.mip.informatik.uni-kiel.de
-# --------------------------------
-
-if(WIN32)
-  # JW tested with gsl-1.8, Windows XP, MSVS 7.1
-  set(GSL_POSSIBLE_ROOT_DIRS
-    ${GSL_ROOT_DIR}
-    $ENV{GSL_ROOT_DIR}
-    ${GSL_DIR}
-    ${GSL_HOME}    
-    $ENV{GSL_DIR}
-    $ENV{GSL_HOME}
-    $ENV{EXTRA}
-    "C:/Program Files/GnuWin32"
-    )
-  find_path(GSL_INCLUDE_DIR
-    NAMES gsl/gsl_cdf.h gsl/gsl_randist.h
-    PATHS ${GSL_POSSIBLE_ROOT_DIRS}
-    PATH_SUFFIXES include
-    DOC "GSL header include dir"
-    )
-  
-  find_library(GSL_GSL_LIBRARY
-    NAMES libgsl.dll.a gsl libgsl
-    PATHS  ${GSL_POSSIBLE_ROOT_DIRS}
-    PATH_SUFFIXES lib
-    DOC "GSL library" )
-  
-  if(NOT GSL_GSL_LIBRARY)
-	find_file(GSL_GSL_LIBRARY
-		NAMES libgsl.dll.a
-		PATHS  ${GSL_POSSIBLE_ROOT_DIRS}
-		PATH_SUFFIXES lib
-		DOC "GSL library")
-  endif()
-  
-  find_library(GSL_GSLCBLAS_LIBRARY
-    NAMES libgslcblas.dll.a gslcblas libgslcblas
-    PATHS  ${GSL_POSSIBLE_ROOT_DIRS}
-    PATH_SUFFIXES lib
-    DOC "GSL cblas library dir" )
-  
-  if(NOT GSL_GSLCBLAS_LIBRARY)
-	find_file(GSL_GSLCBLAS_LIBRARY
-		NAMES libgslcblas.dll.a
-		PATHS  ${GSL_POSSIBLE_ROOT_DIRS}
-		PATH_SUFFIXES lib
-		DOC "GSL library")
-  endif()
-  
-  set(GSL_LIBRARIES ${GSL_GSL_LIBRARY})
-
-  #message("DBG\n"
-  #  "GSL_GSL_LIBRARY=${GSL_GSL_LIBRARY}\n"
-  #  "GSL_GSLCBLAS_LIBRARY=${GSL_GSLCBLAS_LIBRARY}\n"
-  #  "GSL_LIBRARIES=${GSL_LIBRARIES}")
-
-
-else(WIN32)
-  
-  if(UNIX) 
-    set(GSL_CONFIG_PREFER_PATH 
-      "$ENV{GSL_DIR}/bin"
-      "$ENV{GSL_DIR}"
-      "$ENV{GSL_HOME}/bin" 
-      "$ENV{GSL_HOME}" 
-      CACHE STRING "preferred path to GSL (gsl-config)")
-    find_program(GSL_CONFIG gsl-config
-      ${GSL_CONFIG_PREFER_PATH}
-      /usr/bin/
-      )
-    # message("DBG GSL_CONFIG ${GSL_CONFIG}")
-    
-    if (GSL_CONFIG) 
-      # set CXXFLAGS to be fed into CXX_FLAGS by the user:
-      set(GSL_CXX_FLAGS "`${GSL_CONFIG} --cflags`")
-      
-      # set INCLUDE_DIRS to prefix+include
-      exec_program(${GSL_CONFIG}
-        ARGS --prefix
-        OUTPUT_VARIABLE GSL_PREFIX)
-      set(GSL_INCLUDE_DIR ${GSL_PREFIX}/include CACHE STRING INTERNAL)
-
-      # set link libraries and link flags
-      #set(GSL_LIBRARIES "`${GSL_CONFIG} --libs`")
-      exec_program(${GSL_CONFIG}
-        ARGS --libs
-        OUTPUT_VARIABLE GSL_LIBRARIES )
-        
-      # extract link dirs for rpath  
-      exec_program(${GSL_CONFIG}
-        ARGS --libs
-        OUTPUT_VARIABLE GSL_CONFIG_LIBS )
-      
-      # extract version
-      exec_program(${GSL_CONFIG}
-        ARGS --version
-        OUTPUT_VARIABLE GSL_FULL_VERSION )
-      
-      # split version as major/minor
-      string(REGEX MATCH "(.)\\..*" GSL_VERSION_MAJOR_ "${GSL_FULL_VERSION}")
-      set(GSL_VERSION_MAJOR ${CMAKE_MATCH_1})
-      string(REGEX MATCH ".\\.(.*)" GSL_VERSION_MINOR_ "${GSL_FULL_VERSION}")
-      set(GSL_VERSION_MINOR ${CMAKE_MATCH_1})
-
-      # split off the link dirs (for rpath)
-      # use regular expression to match wildcard equivalent "-L*<endchar>"
-      # with <endchar> is a space or a semicolon
-      string(REGEX MATCHALL "[-][L]([^ ;])+" 
-        GSL_LINK_DIRECTORIES_WITH_PREFIX 
-        "${GSL_CONFIG_LIBS}" )
-      #      message("DBG  GSL_LINK_DIRECTORIES_WITH_PREFIX=${GSL_LINK_DIRECTORIES_WITH_PREFIX}")
-
-      # remove prefix -L because we need the pure directory for LINK_DIRECTORIES
-      
-      if (GSL_LINK_DIRECTORIES_WITH_PREFIX)
-        string(REGEX REPLACE "[-][L]" "" GSL_LINK_DIRECTORIES ${GSL_LINK_DIRECTORIES_WITH_PREFIX} )
-      endif (GSL_LINK_DIRECTORIES_WITH_PREFIX)
-      set(GSL_EXE_LINKER_FLAGS "-Wl,-rpath,${GSL_LINK_DIRECTORIES}" CACHE STRING INTERNAL)
-      #      message("DBG  GSL_LINK_DIRECTORIES=${GSL_LINK_DIRECTORIES}")
-      #      message("DBG  GSL_EXE_LINKER_FLAGS=${GSL_EXE_LINKER_FLAGS}")
-
-      #      add_definitions("-DHAVE_GSL")
-      #      set(GSL_DEFINITIONS "-DHAVE_GSL")
-      mark_as_advanced(
-        GSL_CXX_FLAGS
-        GSL_INCLUDE_DIR
-        GSL_LIBRARIES
-        GSL_LINK_DIRECTORIES
-        GSL_DEFINITIONS
-        )
-      message(STATUS "Using GSL from ${GSL_PREFIX}")
-      
-    else(GSL_CONFIG)
-      message("FindGSL.cmake: gsl-config not found. Please set it manually. GSL_CONFIG=${GSL_CONFIG}")
-    endif(GSL_CONFIG)
-
-  endif(UNIX)
-endif(WIN32)
-
-
-if(GSL_LIBRARIES)
-  if(GSL_INCLUDE_DIR OR GSL_CXX_FLAGS)
-
-    set(GSL_FOUND 1)
-    
-  endif(GSL_INCLUDE_DIR OR GSL_CXX_FLAGS)
-endif(GSL_LIBRARIES)
diff --git a/libs/eigen/cmake/FindLAPACK.cmake b/libs/eigen/cmake/FindLAPACK.cmake
deleted file mode 100644
index 3fd7388..0000000
--- a/libs/eigen/cmake/FindLAPACK.cmake
+++ /dev/null
@@ -1,274 +0,0 @@
-# Find LAPACK library
-#
-# This module finds an installed library that implements the LAPACK
-# linear-algebra interface (see http://www.netlib.org/lapack/).
-# The approach follows mostly that taken for the autoconf macro file, acx_lapack.m4
-# (distributed at http://ac-archive.sourceforge.net/ac-archive/acx_lapack.html).
-#
-# This module sets the following variables:
-#  LAPACK_FOUND - set to true if a library implementing the LAPACK interface
-#    is found
-#  LAPACK_INCLUDE_DIR - Directories containing the LAPACK header files
-#  LAPACK_DEFINITIONS - Compilation options to use LAPACK
-#  LAPACK_LINKER_FLAGS - Linker flags to use LAPACK (excluding -l
-#    and -L).
-#  LAPACK_LIBRARIES_DIR - Directories containing the LAPACK libraries.
-#     May be null if LAPACK_LIBRARIES contains libraries name using full path.
-#  LAPACK_LIBRARIES - List of libraries to link against LAPACK interface.
-#     May be null if the compiler supports auto-link (e.g. VC++).
-#  LAPACK_USE_FILE - The name of the cmake module to include to compile
-#     applications or libraries using LAPACK.
-#
-# This module was modified by CGAL team:
-# - find libraries for a C++ compiler, instead of Fortran
-# - added LAPACK_INCLUDE_DIR, LAPACK_DEFINITIONS and LAPACK_LIBRARIES_DIR
-# - removed LAPACK95_LIBRARIES
-
-
-include(CheckFunctionExists)
-include(CMakeFindDependencyMacro)
-
-# This macro checks for the existence of the combination of fortran libraries
-# given by _list.  If the combination is found, this macro checks (using the
-# check_function_exists macro) whether can link against that library
-# combination using the name of a routine given by _name using the linker
-# flags given by _flags.  If the combination of libraries is found and passes
-# the link test, LIBRARIES is set to the list of complete library paths that
-# have been found and DEFINITIONS to the required definitions.
-# Otherwise, LIBRARIES is set to FALSE.
-# N.B. _prefix is the prefix applied to the names of all cached variables that
-# are generated internally and marked advanced by this macro.
-macro(check_lapack_libraries DEFINITIONS LIBRARIES _prefix _name _flags _list _blas _path)
-  #message("DEBUG: check_lapack_libraries(${_list} in ${_path} with ${_blas})")
-
-  # Check for the existence of the libraries given by _list
-  set(_libraries_found TRUE)
-  set(_libraries_work FALSE)
-  set(${DEFINITIONS} "")
-  set(${LIBRARIES} "")
-  set(_combined_name)
-  foreach(_library ${_list})
-    set(_combined_name ${_combined_name}_${_library})
-
-    if(_libraries_found)
-      # search first in ${_path}
-      find_library(${_prefix}_${_library}_LIBRARY
-                  NAMES ${_library}
-                  PATHS ${_path} NO_DEFAULT_PATH
-                  )
-      # if not found, search in environment variables and system
-      if ( WIN32 )
-        find_library(${_prefix}_${_library}_LIBRARY
-                    NAMES ${_library}
-                    PATHS ENV LIB
-                    )
-      elseif ( APPLE )
-        find_library(${_prefix}_${_library}_LIBRARY
-                    NAMES ${_library}
-                    PATHS /usr/local/lib /usr/lib /usr/local/lib64 /usr/lib64 ENV DYLD_LIBRARY_PATH
-                    )
-      else ()
-        find_library(${_prefix}_${_library}_LIBRARY
-                    NAMES ${_library}
-                    PATHS /usr/local/lib /usr/lib /usr/local/lib64 /usr/lib64 ENV LD_LIBRARY_PATH
-                    )
-      endif()
-      mark_as_advanced(${_prefix}_${_library}_LIBRARY)
-      set(${LIBRARIES} ${${LIBRARIES}} ${${_prefix}_${_library}_LIBRARY})
-      set(_libraries_found ${${_prefix}_${_library}_LIBRARY})
-    endif()
-  endforeach()
-  if(_libraries_found)
-    set(_libraries_found ${${LIBRARIES}})
-  endif()
-
-  # Test this combination of libraries with the Fortran/f2c interface.
-  # We test the Fortran interface first as it is well standardized.
-  if(_libraries_found AND NOT _libraries_work)
-    set(${DEFINITIONS}  "-D${_prefix}_USE_F2C")
-    set(${LIBRARIES}    ${_libraries_found})
-    # Some C++ linkers require the f2c library to link with Fortran libraries.
-    # I do not know which ones, thus I just add the f2c library if it is available.
-    find_dependency( F2C QUIET )
-    if ( F2C_FOUND )
-      set(${DEFINITIONS}  ${${DEFINITIONS}} ${F2C_DEFINITIONS})
-      set(${LIBRARIES}    ${${LIBRARIES}} ${F2C_LIBRARIES})
-    endif()
-    set(CMAKE_REQUIRED_DEFINITIONS  ${${DEFINITIONS}})
-    set(CMAKE_REQUIRED_LIBRARIES    ${_flags} ${${LIBRARIES}} ${_blas})
-    #message("DEBUG: CMAKE_REQUIRED_DEFINITIONS = ${CMAKE_REQUIRED_DEFINITIONS}")
-    #message("DEBUG: CMAKE_REQUIRED_LIBRARIES = ${CMAKE_REQUIRED_LIBRARIES}")
-    # Check if function exists with f2c calling convention (ie a trailing underscore)
-    check_function_exists(${_name}_ ${_prefix}_${_name}_${_combined_name}_f2c_WORKS)
-    set(CMAKE_REQUIRED_DEFINITIONS} "")
-    set(CMAKE_REQUIRED_LIBRARIES    "")
-    mark_as_advanced(${_prefix}_${_name}_${_combined_name}_f2c_WORKS)
-    set(_libraries_work ${${_prefix}_${_name}_${_combined_name}_f2c_WORKS})
-  endif()
-
-  # If not found, test this combination of libraries with a C interface.
-  # A few implementations (ie ACML) provide a C interface. Unfortunately, there is no standard.
-  if(_libraries_found AND NOT _libraries_work)
-    set(${DEFINITIONS} "")
-    set(${LIBRARIES}   ${_libraries_found})
-    set(CMAKE_REQUIRED_DEFINITIONS "")
-    set(CMAKE_REQUIRED_LIBRARIES   ${_flags} ${${LIBRARIES}} ${_blas})
-    #message("DEBUG: CMAKE_REQUIRED_LIBRARIES = ${CMAKE_REQUIRED_LIBRARIES}")
-    check_function_exists(${_name} ${_prefix}_${_name}${_combined_name}_WORKS)
-    set(CMAKE_REQUIRED_LIBRARIES "")
-    mark_as_advanced(${_prefix}_${_name}${_combined_name}_WORKS)
-    set(_libraries_work ${${_prefix}_${_name}${_combined_name}_WORKS})
-  endif()
-
-  # on failure
-  if(NOT _libraries_work)
-    set(${DEFINITIONS} "")
-    set(${LIBRARIES}   FALSE)
-  endif()
-  #message("DEBUG: ${DEFINITIONS} = ${${DEFINITIONS}}")
-  #message("DEBUG: ${LIBRARIES} = ${${LIBRARIES}}")
-endmacro()
-
-
-#
-# main
-#
-
-# LAPACK requires BLAS
-if(LAPACK_FIND_QUIETLY OR NOT LAPACK_FIND_REQUIRED)
-  find_dependency(BLAS)
-else()
-  find_dependency(BLAS REQUIRED)
-endif()
-
-if (NOT BLAS_FOUND)
-
-  message(STATUS "LAPACK requires BLAS.")
-  set(LAPACK_FOUND FALSE)
-
-# Is it already configured?
-elseif (LAPACK_LIBRARIES_DIR OR LAPACK_LIBRARIES)
-
-  set(LAPACK_FOUND TRUE)
-
-else()
-
-  # reset variables
-  set( LAPACK_INCLUDE_DIR "" )
-  set( LAPACK_DEFINITIONS "" )
-  set( LAPACK_LINKER_FLAGS "" ) # unused (yet)
-  set( LAPACK_LIBRARIES "" )
-  set( LAPACK_LIBRARIES_DIR "" )
-
-    #
-    # If Unix, search for LAPACK function in possible libraries
-    #
-
-    #intel mkl lapack?
-    if(NOT LAPACK_LIBRARIES)
-      check_lapack_libraries(
-      LAPACK_DEFINITIONS
-      LAPACK_LIBRARIES
-      LAPACK
-      cheev
-      ""
-      "mkl_lapack"
-      "${BLAS_LIBRARIES}"
-      "${CGAL_TAUCS_LIBRARIES_DIR} ENV LAPACK_LIB_DIR"
-      )
-    endif()
-
-    #acml lapack?
-    if(NOT LAPACK_LIBRARIES)
-      check_lapack_libraries(
-      LAPACK_DEFINITIONS
-      LAPACK_LIBRARIES
-      LAPACK
-      cheev
-      ""
-      "acml"
-      "${BLAS_LIBRARIES}"
-      "${CGAL_TAUCS_LIBRARIES_DIR} ENV LAPACK_LIB_DIR"
-      )
-    endif()
-
-    # Apple LAPACK library?
-    if(NOT LAPACK_LIBRARIES)
-      check_lapack_libraries(
-      LAPACK_DEFINITIONS
-      LAPACK_LIBRARIES
-      LAPACK
-      cheev
-      ""
-      "Accelerate"
-      "${BLAS_LIBRARIES}"
-      "${CGAL_TAUCS_LIBRARIES_DIR} ENV LAPACK_LIB_DIR"
-      )
-    endif()
-
-    if ( NOT LAPACK_LIBRARIES )
-      check_lapack_libraries(
-      LAPACK_DEFINITIONS
-      LAPACK_LIBRARIES
-      LAPACK
-      cheev
-      ""
-      "vecLib"
-      "${BLAS_LIBRARIES}"
-      "${CGAL_TAUCS_LIBRARIES_DIR} ENV LAPACK_LIB_DIR"
-      )
-    endif ()
-
-    # Generic LAPACK library?
-    # This configuration *must* be the last try as this library is notably slow.
-    if ( NOT LAPACK_LIBRARIES )
-      check_lapack_libraries(
-      LAPACK_DEFINITIONS
-      LAPACK_LIBRARIES
-      LAPACK
-      cheev
-      ""
-      "lapack"
-      "${BLAS_LIBRARIES}"
-      "${CGAL_TAUCS_LIBRARIES_DIR} ENV LAPACK_LIB_DIR"
-      )
-    endif()
-
-  if(LAPACK_LIBRARIES_DIR OR LAPACK_LIBRARIES)
-    set(LAPACK_FOUND TRUE)
-  else()
-    set(LAPACK_FOUND FALSE)
-  endif()
-
-  if(NOT LAPACK_FIND_QUIETLY)
-    if(LAPACK_FOUND)
-      message(STATUS "A library with LAPACK API found.")
-    else()
-      if(LAPACK_FIND_REQUIRED)
-        message(FATAL_ERROR "A required library with LAPACK API not found. Please specify library location.")
-      else()
-        message(STATUS "A library with LAPACK API not found. Please specify library location.")
-      endif()
-    endif()
-  endif()
-
-  # Add variables to cache
-  set( LAPACK_INCLUDE_DIR   "${LAPACK_INCLUDE_DIR}"
-                            CACHE PATH "Directories containing the LAPACK header files" FORCE )
-  set( LAPACK_DEFINITIONS   "${LAPACK_DEFINITIONS}"
-                            CACHE STRING "Compilation options to use LAPACK" FORCE )
-  set( LAPACK_LINKER_FLAGS  "${LAPACK_LINKER_FLAGS}"
-                            CACHE STRING "Linker flags to use LAPACK" FORCE )
-  set( LAPACK_LIBRARIES     "${LAPACK_LIBRARIES}"
-                            CACHE FILEPATH "LAPACK libraries name" FORCE )
-  set( LAPACK_LIBRARIES_DIR "${LAPACK_LIBRARIES_DIR}"
-                            CACHE PATH "Directories containing the LAPACK libraries" FORCE )
-
-  #message("DEBUG: LAPACK_INCLUDE_DIR = ${LAPACK_INCLUDE_DIR}")
-  #message("DEBUG: LAPACK_DEFINITIONS = ${LAPACK_DEFINITIONS}")
-  #message("DEBUG: LAPACK_LINKER_FLAGS = ${LAPACK_LINKER_FLAGS}")
-  #message("DEBUG: LAPACK_LIBRARIES = ${LAPACK_LIBRARIES}")
-  #message("DEBUG: LAPACK_LIBRARIES_DIR = ${LAPACK_LIBRARIES_DIR}")
-  #message("DEBUG: LAPACK_FOUND = ${LAPACK_FOUND}")
-
-endif()
diff --git a/libs/eigen/cmake/UseEigen3.cmake b/libs/eigen/cmake/UseEigen3.cmake
deleted file mode 100644
index a38bac8..0000000
--- a/libs/eigen/cmake/UseEigen3.cmake
+++ /dev/null
@@ -1,6 +0,0 @@
-#                                               -*- cmake -*-
-#
-#  UseEigen3.cmake
-
-add_definitions     ( ${EIGEN3_DEFINITIONS} )
-include_directories ( ${EIGEN3_INCLUDE_DIRS} )
diff --git a/libs/eigen/debug/gdb/printers.py b/libs/eigen/debug/gdb/printers.py
index 24961d1..2c3fccf 100644
--- a/libs/eigen/debug/gdb/printers.py
+++ b/libs/eigen/debug/gdb/printers.py
@@ -22,29 +22,29 @@
 #      import sys
 #      sys.path.insert(0, '/path/to/eigen/printer/directory')
 #      from printers import register_eigen_printers
-#      register_eigen_printers (None)
+#      register_eigen_printers(None)
 #      end
 
 import gdb
 import re
-import itertools
 from bisect import bisect_left
 
+
 # Basic row/column iteration code for use with Sparse and Dense matrices
 class _MatrixEntryIterator(object):
 	
-	def __init__ (self, rows, cols, rowMajor):
+	def __init__(self, rows, cols, row_major):
 		self.rows = rows
 		self.cols = cols
 		self.currentRow = 0
 		self.currentCol = 0
-		self.rowMajor = rowMajor
+		self.rowMajor = row_major
 
-	def __iter__ (self):
+	def __iter__(self):
 		return self
 
 	def next(self):
-                return self.__next__()  # Python 2.x compatibility
+		return self.__next__()  # Python 2.x compatibility
 
 	def __next__(self):
 		row = self.currentRow
@@ -53,54 +53,55 @@ class _MatrixEntryIterator(object):
 			if self.currentCol >= self.cols:
 				raise StopIteration
 				
-			self.currentRow = self.currentRow + 1
+			self.currentRow += 1
 			if self.currentRow >= self.rows:
 				self.currentRow = 0
-				self.currentCol = self.currentCol + 1
+				self.currentCol += 1
 		else:
 			if self.currentRow >= self.rows:
 				raise StopIteration
 				
-			self.currentCol = self.currentCol + 1
+			self.currentCol += 1
 			if self.currentCol >= self.cols:
 				self.currentCol = 0
-				self.currentRow = self.currentRow + 1
+				self.currentRow += 1
+
+		return row, col
 
-		return (row, col)
 
 class EigenMatrixPrinter:
-	"Print Eigen Matrix or Array of some kind"
+	"""Print Eigen Matrix or Array of some kind"""
 
 	def __init__(self, variety, val):
-		"Extract all the necessary information"
+		"""Extract all the necessary information"""
 		
 		# Save the variety (presumably "Matrix" or "Array") for later usage
 		self.variety = variety
 		
 		# The gdb extension does not support value template arguments - need to extract them by hand
-		type = val.type
-		if type.code == gdb.TYPE_CODE_REF:
-			type = type.target()
-		self.type = type.unqualified().strip_typedefs()
+		typeinfo = val.type
+		if typeinfo.code == gdb.TYPE_CODE_REF:
+			typeinfo = typeinfo.target()
+		self.type = typeinfo.unqualified().strip_typedefs()
 		tag = self.type.tag
-		regex = re.compile('\<.*\>')
+		regex = re.compile('<.*>')
 		m = regex.findall(tag)[0][1:-1]
 		template_params = m.split(',')
 		template_params = [x.replace(" ", "") for x in template_params]
 		
-		if template_params[1] == '-0x00000000000000001' or template_params[1] == '-0x000000001' or template_params[1] == '-1':
+		if template_params[1] in ['-0x00000000000000001', '-0x000000001', '-1']:
 			self.rows = val['m_storage']['m_rows']
 		else:
 			self.rows = int(template_params[1])
 		
-		if template_params[2] == '-0x00000000000000001' or template_params[2] == '-0x000000001' or template_params[2] == '-1':
+		if template_params[2] in ['-0x00000000000000001', '-0x000000001', '-1']:
 			self.cols = val['m_storage']['m_cols']
 		else:
 			self.cols = int(template_params[2])
 		
-		self.options = 0 # default value
+		self.options = 0  # default value
 		if len(template_params) > 3:
-			self.options = template_params[3];
+			self.options = template_params[3]
 		
 		self.rowMajor = (int(self.options) & 0x1)
 		
@@ -114,50 +115,51 @@ class EigenMatrixPrinter:
 			self.data = self.data['array']
 			self.data = self.data.cast(self.innerType.pointer())
 			
-	class _iterator(_MatrixEntryIterator):
-		def __init__ (self, rows, cols, dataPtr, rowMajor):
-			super(EigenMatrixPrinter._iterator, self).__init__(rows, cols, rowMajor)
+	class _Iterator(_MatrixEntryIterator):
+		def __init__(self, rows, cols, data_ptr, row_major):
+			super(EigenMatrixPrinter._Iterator, self).__init__(rows, cols, row_major)
 
-			self.dataPtr = dataPtr
+			self.dataPtr = data_ptr
 
 		def __next__(self):
-			
-			row, col = super(EigenMatrixPrinter._iterator, self).__next__()
+			row, col = super(EigenMatrixPrinter._Iterator, self).__next__()
 			
 			item = self.dataPtr.dereference()
-			self.dataPtr = self.dataPtr + 1
-			if (self.cols == 1): #if it's a column vector
-				return ('[%d]' % (row,), item)
-			elif (self.rows == 1): #if it's a row vector
-				return ('[%d]' % (col,), item)
-			return ('[%d,%d]' % (row, col), item)
+			self.dataPtr += 1
+			if self.cols == 1:  # if it's a column vector
+				return '[%d]' % (row,), item
+			elif self.rows == 1:  # if it's a row vector
+				return '[%d]' % (col,), item
+			return '[%d,%d]' % (row, col), item
 			
 	def children(self):
-		
-		return self._iterator(self.rows, self.cols, self.data, self.rowMajor)
+		return self._Iterator(self.rows, self.cols, self.data, self.rowMajor)
 		
 	def to_string(self):
-		return "Eigen::%s<%s,%d,%d,%s> (data ptr: %s)" % (self.variety, self.innerType, self.rows, self.cols, "RowMajor" if self.rowMajor else  "ColMajor", self.data)
+		return "Eigen::%s<%s,%d,%d,%s> (data ptr: %s)" % (
+			self.variety, self.innerType, self.rows, self.cols,
+			"RowMajor" if self.rowMajor else "ColMajor", self.data)
+
 
 class EigenSparseMatrixPrinter:
-	"Print an Eigen SparseMatrix"
+	"""Print an Eigen SparseMatrix"""
 
 	def __init__(self, val):
-		"Extract all the necessary information"
+		"""Extract all the necessary information"""
 
-		type = val.type
-		if type.code == gdb.TYPE_CODE_REF:
-			type = type.target()
-		self.type = type.unqualified().strip_typedefs()
+		typeinfo = val.type
+		if typeinfo.code == gdb.TYPE_CODE_REF:
+			typeinfo = typeinfo.target()
+		self.type = typeinfo.unqualified().strip_typedefs()
 		tag = self.type.tag
-		regex = re.compile('\<.*\>')
+		regex = re.compile('<.*>')
 		m = regex.findall(tag)[0][1:-1]
 		template_params = m.split(',')
 		template_params = [x.replace(" ", "") for x in template_params]
 
 		self.options = 0
 		if len(template_params) > 1:
-			self.options = template_params[1];
+			self.options = template_params[1]
 		
 		self.rowMajor = (int(self.options) & 0x1)
 		
@@ -168,22 +170,23 @@ class EigenSparseMatrixPrinter:
 		self.data = self.val['m_data']
 		self.data = self.data.cast(self.innerType.pointer())
 
-	class _iterator(_MatrixEntryIterator):
-		def __init__ (self, rows, cols, val, rowMajor):
-			super(EigenSparseMatrixPrinter._iterator, self).__init__(rows, cols, rowMajor)
+	class _Iterator(_MatrixEntryIterator):
+		def __init__(self, rows, cols, val, row_major):
+			super(EigenSparseMatrixPrinter._Iterator, self).__init__(rows, cols, row_major)
 
 			self.val = val
 			
 		def __next__(self):
-			
-			row, col = super(EigenSparseMatrixPrinter._iterator, self).__next__()
+			row, col = super(EigenSparseMatrixPrinter._Iterator, self).__next__()
 				
 			# repeat calculations from SparseMatrix.h:
 			outer = row if self.rowMajor else col
 			inner = col if self.rowMajor else row
 			start = self.val['m_outerIndex'][outer]
-			end = ((start + self.val['m_innerNonZeros'][outer]) if self.val['m_innerNonZeros'] else
-			       self.val['m_outerIndex'][outer+1])
+			end = (
+				(start + self.val['m_innerNonZeros'][outer])
+				if self.val['m_innerNonZeros'] else self.val['m_outerIndex'][outer+1]
+			)
 
 			# and from CompressedStorage.h:
 			data = self.val['m_data']
@@ -196,20 +199,19 @@ class EigenSparseMatrixPrinter:
 				indices = [data['m_indices'][x] for x in range(int(start), int(end)-1)]
 				# find the index with binary search
 				idx = int(start) + bisect_left(indices, inner)
-				if ((idx < end) and (data['m_indices'][idx] == inner)):
+				if idx < end and data['m_indices'][idx] == inner:
 					item = data['m_values'][idx]
 				else:
 					item = 0
 
-			return ('[%d,%d]' % (row, col), item)
+			return '[%d,%d]' % (row, col), item
 
 	def children(self):
 		if self.data:
-			return self._iterator(self.rows(), self.cols(), self.val, self.rowMajor)
+			return self._Iterator(self.rows(), self.cols(), self.val, self.rowMajor)
 
 		return iter([])   # empty matrix, for now
 
-
 	def rows(self):
 		return self.val['m_outerSize'] if self.rowMajor else self.val['m_innerSize']
 
@@ -222,22 +224,23 @@ class EigenSparseMatrixPrinter:
 			status = ("not compressed" if self.val['m_innerNonZeros'] else "compressed")
 		else:
 			status = "empty"
-		dimensions  = "%d x %d" % (self.rows(), self.cols())
-		layout      = "row" if self.rowMajor else "column"
+		dimensions = "%d x %d" % (self.rows(), self.cols())
+		layout = "row" if self.rowMajor else "column"
 
 		return "Eigen::SparseMatrix<%s>, %s, %s major, %s" % (
-			self.innerType, dimensions, layout, status )
+			self.innerType, dimensions, layout, status)
+
 
 class EigenQuaternionPrinter:
-	"Print an Eigen Quaternion"
+	"""Print an Eigen Quaternion"""
 	
 	def __init__(self, val):
-		"Extract all the necessary information"
+		"""Extract all the necessary information"""
 		# The gdb extension does not support value template arguments - need to extract them by hand
-		type = val.type
-		if type.code == gdb.TYPE_CODE_REF:
-			type = type.target()
-		self.type = type.unqualified().strip_typedefs()
+		typeinfo = val.type
+		if typeinfo.code == gdb.TYPE_CODE_REF:
+			typeinfo = typeinfo.target()
+		self.type = typeinfo.unqualified().strip_typedefs()
 		self.innerType = self.type.template_argument(0)
 		self.val = val
 		
@@ -245,13 +248,13 @@ class EigenQuaternionPrinter:
 		self.data = self.val['m_coeffs']['m_storage']['m_data']['array']
 		self.data = self.data.cast(self.innerType.pointer())
 			
-	class _iterator:
-		def __init__ (self, dataPtr):
-			self.dataPtr = dataPtr
+	class _Iterator:
+		def __init__(self, data_ptr):
+			self.dataPtr = data_ptr
 			self.currentElement = 0
 			self.elementNames = ['x', 'y', 'z', 'w']
 			
-		def __iter__ (self):
+		def __iter__(self):
 			return self
 	
 		def next(self):
@@ -260,47 +263,67 @@ class EigenQuaternionPrinter:
 		def __next__(self):
 			element = self.currentElement
 			
-			if self.currentElement >= 4: #there are 4 elements in a quanternion
+			if self.currentElement >= 4:  # there are 4 elements in a quaternion
 				raise StopIteration
 			
-			self.currentElement = self.currentElement + 1
+			self.currentElement += 1
 			
 			item = self.dataPtr.dereference()
-			self.dataPtr = self.dataPtr + 1
-			return ('[%s]' % (self.elementNames[element],), item)
+			self.dataPtr += 1
+			return '[%s]' % (self.elementNames[element],), item
 			
 	def children(self):
-		
-		return self._iterator(self.data)
+		return self._Iterator(self.data)
 	
 	def to_string(self):
 		return "Eigen::Quaternion<%s> (data ptr: %s)" % (self.innerType, self.data)
 
-def build_eigen_dictionary ():
+
+def cast_eigen_block_to_matrix(val):
+	# Get the type of the variable (and convert to a string)
+	# Example: 'const Eigen::Block<Eigen::Block<Eigen::Matrix<double, -1, -1, 0, -1, -1>, -1, -1, false> const, -1, -1, false>'
+	val_type = str(val.type)
+
+	# Extract the Eigen::Matrix type from the Block:
+	# From the previous example: Eigen::Matrix<double, -1, -1, 0, -1, -1>
+	begin = val_type.find('Eigen::Matrix<')
+	end = val_type.find('>', begin) + 1
+
+	# Convert the Eigen::Block to an Eigen::Matrix
+	return val.cast(gdb.lookup_type(val_type[begin:end]))
+
+
+def build_eigen_dictionary():
 	pretty_printers_dict[re.compile('^Eigen::Quaternion<.*>$')] = lambda val: EigenQuaternionPrinter(val)
 	pretty_printers_dict[re.compile('^Eigen::Matrix<.*>$')] = lambda val: EigenMatrixPrinter("Matrix", val)
+	pretty_printers_dict[re.compile('^Eigen::Block<.*>$')] =\
+		lambda val: EigenMatrixPrinter("Matrix", cast_eigen_block_to_matrix(val))
+	pretty_printers_dict[re.compile('^Eigen::VectorBlock<.*>$')] =\
+		lambda val: EigenMatrixPrinter("Matrix", cast_eigen_block_to_matrix(val))
 	pretty_printers_dict[re.compile('^Eigen::SparseMatrix<.*>$')] = lambda val: EigenSparseMatrixPrinter(val)
-	pretty_printers_dict[re.compile('^Eigen::Array<.*>$')]  = lambda val: EigenMatrixPrinter("Array",  val)
+	pretty_printers_dict[re.compile('^Eigen::Array<.*>$')] = lambda val: EigenMatrixPrinter("Array",  val)
+
 
 def register_eigen_printers(obj):
-	"Register eigen pretty-printers with objfile Obj"
+	"""Register eigen pretty-printers with objfile Obj"""
 
-	if obj == None:
+	if obj is None:
 		obj = gdb
 	obj.pretty_printers.append(lookup_function)
 
+
 def lookup_function(val):
-	"Look-up and return a pretty-printer that can print va."
+	"""Look-up and return a pretty-printer that can print val."""
 	
-	type = val.type
+	typeinfo = val.type
 	
-	if type.code == gdb.TYPE_CODE_REF:
-		type = type.target()
+	if typeinfo.code == gdb.TYPE_CODE_REF:
+		typeinfo = typeinfo.target()
 	
-	type = type.unqualified().strip_typedefs()
+	typeinfo = typeinfo.unqualified().strip_typedefs()
 	
-	typename = type.tag
-	if typename == None:
+	typename = typeinfo.tag
+	if typename is None:
 		return None
 	
 	for function in pretty_printers_dict:
@@ -309,6 +332,7 @@ def lookup_function(val):
 	
 	return None
 
+
 pretty_printers_dict = {}
 
-build_eigen_dictionary ()
+build_eigen_dictionary()
diff --git a/libs/eigen/debug/lldb/eigenlldb.py b/libs/eigen/debug/lldb/eigenlldb.py
new file mode 100644
index 0000000..d9b5d06
--- /dev/null
+++ b/libs/eigen/debug/lldb/eigenlldb.py
@@ -0,0 +1,234 @@
+# -*- coding: utf-8 -*-
+# This file is part of Eigen, a lightweight C++ template library
+# for linear algebra.
+#
+# Copyright (C) 2021 Huang, Zhaoquan <zhaoquan2008@hotmail.com>
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# Pretty printers for Eigen::Matrix to use with LLDB debugger
+#
+# Usage:
+# 1. Add the following line (change it according to the path to this file)
+#    to the file ~/.lldbinit (create one if it doesn't exist):
+#        `command script import /path/to/eigenlldb.py`
+# 2. Inspect the variables in LLDB command line
+#        `frame variable`
+
+import lldb
+from typing import List
+import bisect
+
+
+def __lldb_init_module(debugger, internal_dict):
+    debugger.HandleCommand("type synthetic add -x Eigen::Matrix<.*> --python-class eigenlldb.EigenMatrixChildProvider")
+    debugger.HandleCommand(
+        "type synthetic add -x Eigen::SparseMatrix<.*> --python-class eigenlldb.EigenSparseMatrixChildProvider")
+
+
+class EigenMatrixChildProvider:
+    _valobj: lldb.SBValue
+    _scalar_type: lldb.SBType
+    _scalar_size: int
+    _rows_compile_time: int
+    _cols_compile_time: int
+    _row_major: bool
+    _fixed_storage: bool
+
+    def __init__(self, valobj, internal_dict):
+        self._valobj = valobj
+        valtype = valobj.GetType().GetCanonicalType()
+
+        scalar_type = valtype.GetTemplateArgumentType(0)
+        if not scalar_type.IsValid():
+            # In the case that scalar_type is invalid on LLDB 9.0 on Windows with CLion
+            storage = valobj.GetChildMemberWithName("m_storage")
+            data = storage.GetChildMemberWithName("m_data")
+            data_type = data.GetType()
+            if data_type.IsPointerType():
+                scalar_type = data.GetType().GetPointeeType()
+            else:
+                scalar_type = data.GetChildMemberWithName("array").GetType().GetArrayElementType()
+        self._scalar_type = scalar_type
+        self._scalar_size = self._scalar_type.GetByteSize()
+
+        name = valtype.GetName()
+        template_begin = name.find("<")
+        template_end = name.find(">")
+        template_args = name[(template_begin + 1):template_end].split(",")
+        self._rows_compile_time = int(template_args[1])
+        self._cols_compile_time = int(template_args[2])
+        self._row_major = (int(template_args[3]) & 1) != 0
+
+        max_rows = int(template_args[4])
+        max_cols = int(template_args[5])
+        self._fixed_storage = (max_rows != -1 and max_cols != -1)
+
+    def num_children(self):
+        return self._cols() * self._rows()
+
+    def get_child_index(self, name):
+        pass
+
+    def get_child_at_index(self, index):
+        storage = self._valobj.GetChildMemberWithName("m_storage")
+        data = storage.GetChildMemberWithName("m_data")
+        offset = self._scalar_size * index
+
+        if self._row_major:
+            row = index // self._cols()
+            col = index % self._cols()
+        else:
+            row = index % self._rows()
+            col = index // self._rows()
+        if self._fixed_storage:
+            data = data.GetChildMemberWithName("array")
+        if self._cols() == 1:
+            name = '[{}]'.format(row)
+        elif self._rows() == 1:
+            name = '[{}]'.format(col)
+        else:
+            name = '[{},{}]'.format(row, col)
+        return data.CreateChildAtOffset(
+            name, offset, self._scalar_type
+        )
+
+    def _cols(self):
+        if self._cols_compile_time == -1:
+            storage = self._valobj.GetChildMemberWithName("m_storage")
+            cols = storage.GetChildMemberWithName("m_cols")
+            return cols.GetValueAsUnsigned()
+        else:
+            return self._cols_compile_time
+
+    def _rows(self):
+        if self._rows_compile_time == -1:
+            storage = self._valobj.GetChildMemberWithName("m_storage")
+            rows = storage.GetChildMemberWithName("m_rows")
+            return rows.GetValueAsUnsigned()
+        else:
+            return self._rows_compile_time
+
+
+class EigenSparseMatrixChildProvider:
+    _valobj: lldb.SBValue
+    _scalar_type: lldb.SBType
+    _scalar_size: int
+    _index_type: lldb.SBType
+    _index_size: int
+    _row_major: bool
+
+    _outer_size: int
+    _nnz: int
+    _values: lldb.SBValue
+    _inner_indices: lldb.SBValue
+    _outer_starts: lldb.SBValue
+    _inner_nnzs: lldb.SBValue
+    _compressed: bool
+
+    # Index of the first synthetic child under each outer index
+    _child_indices: List[int]
+
+    def __init__(self, valobj, internal_dict):
+        self._valobj = valobj
+        valtype = valobj.GetType().GetCanonicalType()
+        scalar_type = valtype.GetTemplateArgumentType(0)
+        if not scalar_type.IsValid():
+            # In the case that scalar_type is invalid on LLDB 9.0 on Windows with CLion
+            data = valobj.GetChildMemberWithName("m_data")
+            values = data.GetChildMemberWithName("m_values")
+            scalar_type = values.GetType().GetPointeeType()
+        self._scalar_type = scalar_type
+        self._scalar_size = scalar_type.GetByteSize()
+
+        index_type = valtype.GetTemplateArgumentType(2)
+        if not index_type.IsValid():
+            # In the case that scalar_type is invalid on LLDB 9.0 on Windows with CLion
+            outer_starts = valobj.GetChildMemberWithName("m_outerIndex")
+            index_type = outer_starts.GetType().GetPointeeType()
+        self._index_type = index_type
+        self._index_size = index_type.GetByteSize()
+
+        name = valtype.GetName()
+        template_begin = name.find("<")
+        template_end = name.find(">")
+        template_args = name[(template_begin + 1):template_end].split(",")
+        self._row_major = (int(template_args[1]) & 1) != 0
+
+    def num_children(self):
+        return self._nnz + 2
+
+    def get_child_index(self, name):
+        pass
+
+    def get_child_at_index(self, index):
+        if index == 0:
+            name = "rows" if self._row_major else "cols"
+            return self._valobj.GetChildMemberWithName("m_outerSize") \
+                .CreateChildAtOffset(name, 0, self._index_type)
+        elif index == 1:
+            name = "cols" if self._row_major else "rows"
+            return self._valobj.GetChildMemberWithName("m_innerSize") \
+                .CreateChildAtOffset(name, 0, self._index_type)
+        else:
+            index = index - 2
+        outer_index = bisect.bisect_right(self._child_indices, index) - 1
+        total_nnzs = self._child_indices[outer_index]
+        if self._compressed:
+            item_index = index
+            inner_index = self._inner_indices \
+                .CreateChildAtOffset("", item_index * self._index_size, self._index_type) \
+                .GetValueAsUnsigned()
+            return self._values \
+                .CreateChildAtOffset(self._child_name(outer_index, inner_index),
+                                     item_index * self._scalar_size,
+                                     self._scalar_type)
+        else:
+            index_begin = self._outer_starts \
+                .CreateChildAtOffset("", outer_index * self._index_size, self._index_type) \
+                .GetValueAsUnsigned()
+            item_index = index - total_nnzs + index_begin
+            inner_index = self._inner_indices \
+                .CreateChildAtOffset("", item_index * self._index_size, self._index_type) \
+                .GetValueAsUnsigned()
+            return self._values \
+                .CreateChildAtOffset(self._child_name(outer_index, inner_index),
+                                     item_index * self._scalar_size,
+                                     self._scalar_type)
+
+    def update(self):
+        valobj = self._valobj
+        self._outer_size = valobj.GetChildMemberWithName("m_outerSize").GetValueAsUnsigned()
+        data = valobj.GetChildMemberWithName("m_data")
+        self._values = data.GetChildMemberWithName("m_values")
+        self._inner_indices = data.GetChildMemberWithName("m_indices")
+        self._outer_starts = valobj.GetChildMemberWithName("m_outerIndex")
+        self._inner_nnzs = valobj.GetChildMemberWithName("m_innerNonZeros")
+
+        self._compressed = self._inner_nnzs.GetValueAsUnsigned() == 0
+
+        total_nnzs = 0
+        child_indices = [0]
+        for outer_index in range(self._outer_size):
+            if self._compressed:
+                index_end = self._outer_starts \
+                    .CreateChildAtOffset("", (outer_index + 1) * self._index_size, self._index_type) \
+                    .GetValueAsUnsigned()
+                total_nnzs = index_end
+                child_indices.append(total_nnzs)
+            else:
+                nnzs = self._inner_nnzs \
+                    .CreateChildAtOffset("", outer_index * self._index_size, self._index_type) \
+                    .GetValueAsUnsigned()
+                total_nnzs = total_nnzs + nnzs
+                child_indices.append(total_nnzs)
+        self._child_indices = child_indices
+        self._nnz = total_nnzs
+
+    def _child_name(self, outer_index, inner_index):
+        if self._row_major:
+            return "[{0},{1}]".format(outer_index, inner_index)
+        else:
+            return "[{1},{0}]".format(outer_index, inner_index)
diff --git a/libs/eigen/demos/opengl/gpuhelper.h b/libs/eigen/demos/opengl/gpuhelper.h
index 9ff98e9..880e9a5 100644
--- a/libs/eigen/demos/opengl/gpuhelper.h
+++ b/libs/eigen/demos/opengl/gpuhelper.h
@@ -34,21 +34,21 @@ class GpuHelper
         Essentially, this helper function automatically calls glMatrixMode(matrixTarget) if required
         and does a proper call to the right glMultMatrix*() function according to the scalar type
         and storage order.
-        \warning glMatrixMode() must never be called directly. If your're unsure, use forceMatrixMode().
+        \warning glMatrixMode() must never be called directly. If you are unsure, use forceMatrixMode().
         \sa Matrix, loadMatrix(), forceMatrixMode()
     */
-    template<typename Scalar, int _Flags>
-    void multMatrix(const Matrix<Scalar,4,4, _Flags, 4,4>& mat, GLenum matrixTarget);
+    template<typename Scalar, int Flags_>
+    void multMatrix(const Matrix<Scalar,4,4, Flags_, 4,4>& mat, GLenum matrixTarget);
 
     /** Load the matrix \a mat to the OpenGL matrix \a matrixTarget.
         Essentially, this helper function automatically calls glMatrixMode(matrixTarget) if required
         and does a proper call to the right glLoadMatrix*() or glLoadIdentity() function according to the scalar type
         and storage order.
-        \warning glMatrixMode() must never be called directly. If your're unsure, use forceMatrixMode().
+        \warning glMatrixMode() must never be called directly. If you are unsure, use forceMatrixMode().
         \sa Matrix, multMatrix(), forceMatrixMode()
     */
-    template<typename Scalar, int _Flags>
-    void loadMatrix(const Eigen::Matrix<Scalar,4,4, _Flags, 4,4>& mat, GLenum matrixTarget);
+    template<typename Scalar, int Flags_>
+    void loadMatrix(const Eigen::Matrix<Scalar,4,4, Flags_, 4,4>& mat, GLenum matrixTarget);
 
     template<typename Scalar, typename Derived>
     void loadMatrix(
@@ -66,8 +66,8 @@ class GpuHelper
 
     /** Push the OpenGL matrix \a matrixTarget and load \a mat.
     */
-    template<typename Scalar, int _Flags>
-    inline void pushMatrix(const Matrix<Scalar,4,4, _Flags, 4,4>& mat, GLenum matrixTarget);
+    template<typename Scalar, int Flags_>
+    inline void pushMatrix(const Matrix<Scalar,4,4, Flags_, 4,4>& mat, GLenum matrixTarget);
 
     template<typename Scalar, typename Derived>
     void pushMatrix(
@@ -113,22 +113,22 @@ extern GpuHelper gpu;
 
 /** \internal
 */
-template<bool RowMajor, int _Flags> struct GlMatrixHelper;
+template<bool RowMajor, int Flags_> struct GlMatrixHelper;
 
-template<int _Flags> struct GlMatrixHelper<false,_Flags>
+template<int Flags_> struct GlMatrixHelper<false,Flags_>
 {
-    static void loadMatrix(const Matrix<float, 4,4, _Flags, 4,4>&  mat) { glLoadMatrixf(mat.data()); }
-    static void loadMatrix(const Matrix<double,4,4, _Flags, 4,4>& mat) { glLoadMatrixd(mat.data()); }
-    static void multMatrix(const Matrix<float, 4,4, _Flags, 4,4>&  mat) { glMultMatrixf(mat.data()); }
-    static void multMatrix(const Matrix<double,4,4, _Flags, 4,4>& mat) { glMultMatrixd(mat.data()); }
+    static void loadMatrix(const Matrix<float, 4,4, Flags_, 4,4>&  mat) { glLoadMatrixf(mat.data()); }
+    static void loadMatrix(const Matrix<double,4,4, Flags_, 4,4>& mat) { glLoadMatrixd(mat.data()); }
+    static void multMatrix(const Matrix<float, 4,4, Flags_, 4,4>&  mat) { glMultMatrixf(mat.data()); }
+    static void multMatrix(const Matrix<double,4,4, Flags_, 4,4>& mat) { glMultMatrixd(mat.data()); }
 };
 
-template<int _Flags> struct GlMatrixHelper<true,_Flags>
+template<int Flags_> struct GlMatrixHelper<true,Flags_>
 {
-    static void loadMatrix(const Matrix<float, 4,4, _Flags, 4,4>&  mat) { glLoadMatrixf(mat.transpose().eval().data()); }
-    static void loadMatrix(const Matrix<double,4,4, _Flags, 4,4>& mat) { glLoadMatrixd(mat.transpose().eval().data()); }
-    static void multMatrix(const Matrix<float, 4,4, _Flags, 4,4>&  mat) { glMultMatrixf(mat.transpose().eval().data()); }
-    static void multMatrix(const Matrix<double,4,4, _Flags, 4,4>& mat) { glMultMatrixd(mat.transpose().eval().data()); }
+    static void loadMatrix(const Matrix<float, 4,4, Flags_, 4,4>&  mat) { glLoadMatrixf(mat.transpose().eval().data()); }
+    static void loadMatrix(const Matrix<double,4,4, Flags_, 4,4>& mat) { glLoadMatrixd(mat.transpose().eval().data()); }
+    static void multMatrix(const Matrix<float, 4,4, Flags_, 4,4>&  mat) { glMultMatrixf(mat.transpose().eval().data()); }
+    static void multMatrix(const Matrix<double,4,4, Flags_, 4,4>& mat) { glMultMatrixd(mat.transpose().eval().data()); }
 };
 
 inline void GpuHelper::setMatrixTarget(GLenum matrixTarget)
@@ -137,11 +137,11 @@ inline void GpuHelper::setMatrixTarget(GLenum matrixTarget)
         glMatrixMode(mCurrentMatrixTarget=matrixTarget);
 }
 
-template<typename Scalar, int _Flags>
-void GpuHelper::multMatrix(const Matrix<Scalar,4,4, _Flags, 4,4>& mat, GLenum matrixTarget)
+template<typename Scalar, int Flags_>
+void GpuHelper::multMatrix(const Matrix<Scalar,4,4, Flags_, 4,4>& mat, GLenum matrixTarget)
 {
     setMatrixTarget(matrixTarget);
-    GlMatrixHelper<_Flags&Eigen::RowMajorBit, _Flags>::multMatrix(mat);
+    GlMatrixHelper<Flags_&Eigen::RowMajorBit, Flags_>::multMatrix(mat);
 }
 
 template<typename Scalar, typename Derived>
@@ -153,11 +153,11 @@ void GpuHelper::loadMatrix(
     glLoadIdentity();
 }
 
-template<typename Scalar, int _Flags>
-void GpuHelper::loadMatrix(const Eigen::Matrix<Scalar,4,4, _Flags, 4,4>& mat, GLenum matrixTarget)
+template<typename Scalar, int Flags_>
+void GpuHelper::loadMatrix(const Eigen::Matrix<Scalar,4,4, Flags_, 4,4>& mat, GLenum matrixTarget)
 {
     setMatrixTarget(matrixTarget);
-    GlMatrixHelper<(_Flags&Eigen::RowMajorBit)!=0, _Flags>::loadMatrix(mat);
+    GlMatrixHelper<(Flags_&Eigen::RowMajorBit)!=0, Flags_>::loadMatrix(mat);
 }
 
 inline void GpuHelper::pushMatrix(GLenum matrixTarget)
@@ -166,11 +166,11 @@ inline void GpuHelper::pushMatrix(GLenum matrixTarget)
     glPushMatrix();
 }
 
-template<typename Scalar, int _Flags>
-inline void GpuHelper::pushMatrix(const Matrix<Scalar,4,4, _Flags, 4,4>& mat, GLenum matrixTarget)
+template<typename Scalar, int Flags_>
+inline void GpuHelper::pushMatrix(const Matrix<Scalar,4,4, Flags_, 4,4>& mat, GLenum matrixTarget)
 {
     pushMatrix(matrixTarget);
-    GlMatrixHelper<_Flags&Eigen::RowMajorBit,_Flags>::loadMatrix(mat);
+    GlMatrixHelper<Flags_&Eigen::RowMajorBit,Flags_>::loadMatrix(mat);
 }
 
 template<typename Scalar, typename Derived>
diff --git a/libs/eigen/demos/opengl/quaternion_demo.cpp b/libs/eigen/demos/opengl/quaternion_demo.cpp
index dd323a4..531448d 100644
--- a/libs/eigen/demos/opengl/quaternion_demo.cpp
+++ b/libs/eigen/demos/opengl/quaternion_demo.cpp
@@ -132,11 +132,11 @@ inline static Frame lerpFrame(float alpha, const Frame& a, const Frame& b)
                Quaternionf(lerp(alpha,OrientationType(a.orientation),OrientationType(b.orientation))));
 }
 
-template<typename _Scalar> class EulerAngles
+template<typename Scalar_> class EulerAngles
 {
 public:
   enum { Dim = 3 };
-  typedef _Scalar Scalar;
+  typedef Scalar_ Scalar;
   typedef Matrix<Scalar,3,3> Matrix3;
   typedef Matrix<Scalar,3,1> Vector3;
   typedef Quaternion<Scalar> QuaternionType;
diff --git a/libs/eigen/doc/CMakeLists.txt b/libs/eigen/doc/CMakeLists.txt
index 0f9ef23..e7eaa4b 100644
--- a/libs/eigen/doc/CMakeLists.txt
+++ b/libs/eigen/doc/CMakeLists.txt
@@ -10,9 +10,6 @@ if(CMAKE_COMPILER_IS_GNUCXX)
   endif()
 endif()
 
-# some examples and snippets needs c++11, so let's check it once
-check_cxx_compiler_flag("-std=c++11" EIGEN_COMPILER_SUPPORT_CPP11)
-
 option(EIGEN_INTERNAL_DOCUMENTATION "Build internal documentation" OFF)
 option(EIGEN_DOC_USE_MATHJAX "Use MathJax for rendering math in HTML docs" ON)
 
diff --git a/libs/eigen/doc/Doxyfile.in b/libs/eigen/doc/Doxyfile.in
index bc1e03c..d0e96fa 100644
--- a/libs/eigen/doc/Doxyfile.in
+++ b/libs/eigen/doc/Doxyfile.in
@@ -1600,8 +1600,6 @@ PREDEFINED             = EIGEN_EMPTY_STRUCT \
                          EIGEN_QT_SUPPORT \
                          EIGEN_STRONG_INLINE=inline \
                          EIGEN_DEVICE_FUNC= \
-                         EIGEN_HAS_CXX11=1 \
-                         EIGEN_HAS_CXX11_MATH=1 \
                          "EIGEN_MAKE_CWISE_BINARY_OP(METHOD,FUNCTOR)=template<typename OtherDerived> const CwiseBinaryOp<FUNCTOR<Scalar>, const Derived, const OtherDerived> METHOD(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const;" \
                          "EIGEN_CWISE_PRODUCT_RETURN_TYPE(LHS,RHS)=CwiseBinaryOp<internal::scalar_product_op<LHS::Scalar,RHS::Scalar>, const LHS, const RHS>"\
                          "EIGEN_CAT2(a,b)= a ## b"\
diff --git a/libs/eigen/doc/InsideEigenExample.dox b/libs/eigen/doc/InsideEigenExample.dox
index ea2275b..4af185d 100644
--- a/libs/eigen/doc/InsideEigenExample.dox
+++ b/libs/eigen/doc/InsideEigenExample.dox
@@ -88,7 +88,7 @@ You may wonder, isn't it overengineering to have the storage in a separate class
 
 Let's look at this constructor, in src/Core/DenseStorage.h. You can see that there are many partial template specializations of DenseStorages here, treating separately the cases where dimensions are Dynamic or fixed at compile-time. The partial specialization that we are looking at is:
 \code
-template<typename T, int _Cols> class DenseStorage<T, Dynamic, Dynamic, _Cols>
+template<typename T, int Cols_> class DenseStorage<T, Dynamic, Dynamic, Cols_>
 \endcode
 
 Here, the constructor called is DenseStorage::DenseStorage(int size, int rows, int columns)
@@ -101,7 +101,7 @@ inline DenseStorage(int size, int rows, int) : m_data(internal::aligned_new<T>(s
 
 Here, the \a m_data member is the actual array of coefficients of the matrix. As you see, it is dynamically allocated. Rather than calling new[] or malloc(), as you can see, we have our own internal::aligned_new defined in src/Core/util/Memory.h. What it does is that if vectorization is enabled, then it uses a platform-specific call to allocate a 128-bit-aligned array, as that is very useful for vectorization with both SSE2 and AltiVec. If vectorization is disabled, it amounts to the standard new[].
 
-As you can see, the constructor also sets the \a m_rows member to \a size. Notice that there is no \a m_columns member: indeed, in this partial specialization of DenseStorage, we know the number of columns at compile-time, since the _Cols template parameter is different from Dynamic. Namely, in our case, _Cols is 1, which is to say that our vector is just a matrix with 1 column. Hence, there is no need to store the number of columns as a runtime variable.
+As you can see, the constructor also sets the \a m_rows member to \a size. Notice that there is no \a m_columns member: indeed, in this partial specialization of DenseStorage, we know the number of columns at compile-time, since the Cols_ template parameter is different from Dynamic. Namely, in our case, Cols_ is 1, which is to say that our vector is just a matrix with 1 column. Hence, there is no need to store the number of columns as a runtime variable.
 
 When you call VectorXf::data() to get the pointer to the array of coefficients, it returns DenseStorage::data() which returns the \a m_data member.
 
diff --git a/libs/eigen/doc/PreprocessorDirectives.dox b/libs/eigen/doc/PreprocessorDirectives.dox
index 0f545b0..d6024dc 100644
--- a/libs/eigen/doc/PreprocessorDirectives.dox
+++ b/libs/eigen/doc/PreprocessorDirectives.dox
@@ -55,29 +55,15 @@ By default, %Eigen strive to automatically detect and enable language features a
 the information provided by the compiler.
 
  - \b EIGEN_MAX_CPP_VER - disables usage of C++ features requiring a version greater than EIGEN_MAX_CPP_VER.
-   Possible values are: 03, 11, 14, 17, etc. If not defined (the default), %Eigen enables all features supported
+   Possible values are: 14, 17, etc. If not defined (the default), %Eigen enables all features supported
    by the compiler.
 
 Individual features can be explicitly enabled or disabled by defining the following token to 0 or 1 respectively.
-For instance, one might limit the C++ version to C++03 by defining EIGEN_MAX_CPP_VER=03, but still enable C99 math
+For instance, one might limit the C++ version to C++14 by defining EIGEN_MAX_CPP_VER=14, but still enable C99 math
 functions by defining EIGEN_HAS_C99_MATH=1.
 
  - \b EIGEN_HAS_C99_MATH - controls the usage of C99 math functions such as erf, erfc, lgamma, etc.
-   Automatic detection disabled if EIGEN_MAX_CPP_VER<11.
- - \b EIGEN_HAS_CXX11_MATH - controls the implementation of some functions such as round, logp1, isinf, isnan, etc.
-   Automatic detection disabled if EIGEN_MAX_CPP_VER<11.
- - \b EIGEN_HAS_RVALUE_REFERENCES - defines whether rvalue references are supported
-   Automatic detection disabled if EIGEN_MAX_CPP_VER<11.
  - \b EIGEN_HAS_STD_RESULT_OF - defines whether std::result_of is supported
-   Automatic detection disabled if EIGEN_MAX_CPP_VER<11.
- - \b EIGEN_HAS_VARIADIC_TEMPLATES - defines whether variadic templates are supported
-   Automatic detection disabled if EIGEN_MAX_CPP_VER<11.
- - \b EIGEN_HAS_CONSTEXPR - defines whether relaxed const expression are supported
-   Automatic detection disabled if EIGEN_MAX_CPP_VER<14.
- - \b EIGEN_HAS_CXX11_CONTAINERS - defines whether STL's containers follows C++11 specifications
-   Automatic detection disabled if EIGEN_MAX_CPP_VER<11.
- - \b EIGEN_HAS_CXX11_NOEXCEPT - defines whether noexcept is supported
-   Automatic detection disabled if EIGEN_MAX_CPP_VER<11.
  - \b EIGEN_NO_IO - Disables any usage and support for `<iostreams>`.
 
 \section TopicPreprocessorDirectivesAssertions Assertions
@@ -104,7 +90,7 @@ run time. However, these assertions do cost time and can thus be turned off.
  - \b \c EIGEN_MAX_ALIGN_BYTES - Must be a power of two, or 0. Defines an upper bound on the memory boundary in bytes on which dynamically and statically allocated data may be aligned by %Eigen. If not defined, a default value is automatically computed based on architecture, compiler, and OS.
  This option is typically used to enforce binary compatibility between code/libraries compiled with different SIMD options. For instance, one may compile AVX code and enforce ABI compatibility with existing SSE code by defining \c EIGEN_MAX_ALIGN_BYTES=16. In the other way round, since by default AVX implies 32 bytes alignment for best performance, one can compile SSE code to be ABI compatible with AVX code by defining \c EIGEN_MAX_ALIGN_BYTES=32.
  - \b \c EIGEN_MAX_STATIC_ALIGN_BYTES - Same as \c EIGEN_MAX_ALIGN_BYTES but for statically allocated data only. By default, if only  \c EIGEN_MAX_ALIGN_BYTES is defined, then \c EIGEN_MAX_STATIC_ALIGN_BYTES == \c EIGEN_MAX_ALIGN_BYTES, otherwise a default value is automatically computed based on architecture, compiler, and OS (can be smaller than the default value of EIGEN_MAX_ALIGN_BYTES on architectures that do not support stack alignment).
- Let us emphasize that \c EIGEN_MAX_*_ALIGN_BYTES define only a diserable upper bound. In practice data is aligned to largest power-of-two common divisor of \c EIGEN_MAX_STATIC_ALIGN_BYTES and the size of the data, such that memory is not wasted.
+ Let us emphasize that \c EIGEN_MAX_*_ALIGN_BYTES define only a desirable upper bound. In practice data is aligned to largest power-of-two common divisor of \c EIGEN_MAX_STATIC_ALIGN_BYTES and the size of the data, such that memory is not wasted.
  - \b \c EIGEN_DONT_PARALLELIZE - if defined, this disables multi-threading. This is only relevant if you enabled OpenMP.
    See \ref TopicMultiThreading for details.
  - \b \c EIGEN_DONT_VECTORIZE - disables explicit vectorization when defined. Not defined by default, unless 
@@ -131,8 +117,11 @@ run time. However, these assertions do cost time and can thus be turned off.
  - \b \c EIGEN_DEFAULT_L2_CACHE_SIZE - Sets the default L2 cache size that is used in Eigen's GEBP kernel when the correct cache size cannot be determined at runtime.
  - \b \c EIGEN_DEFAULT_L3_CACHE_SIZE - Sets the default L3 cache size that is used in Eigen's GEBP kernel when the correct cache size cannot be determined at runtime.
 
- - \c EIGEN_DONT_ALIGN - Deprecated, it is a synonym for \c EIGEN_MAX_ALIGN_BYTES=0. It disables alignment completely. %Eigen will not try to align its objects and does not expect that any objects passed to it are aligned. This will turn off vectorization if \b \c EIGEN_UNALIGNED_VECTORIZE=1. Not defined by default.
- - \c EIGEN_DONT_ALIGN_STATICALLY - Deprecated, it is a synonym for \c EIGEN_MAX_STATIC_ALIGN_BYTES=0. It disables alignment of arrays on the stack. Not defined by default, unless \c EIGEN_DONT_ALIGN is defined.
+ - \b \c EIGEN_DONT_ALIGN - Deprecated, it is a synonym for \c EIGEN_MAX_ALIGN_BYTES=0. It disables alignment completely. %Eigen will not try to align its objects and does not expect that any objects passed to it are aligned. This will turn off vectorization if \b \c EIGEN_UNALIGNED_VECTORIZE=1. Not defined by default.
+ - \b \c EIGEN_DONT_ALIGN_STATICALLY - Deprecated, it is a synonym for \c EIGEN_MAX_STATIC_ALIGN_BYTES=0. It disables alignment of arrays on the stack. Not defined by default, unless \c EIGEN_DONT_ALIGN is defined.
+ - \b \c EIGEN_ALTIVEC_ENABLE_MMA_DYNAMIC_DISPATCH - Controls whether to use Eigen's dynamic dispatching for Altivec MMA or not.
+ - \b \c EIGEN_ALTIVEC_DISABLE_MMA - Overrides the usage of Altivec MMA instructions.
+ - \b \c EIGEN_ALTIVEC_USE_CUSTOM_PACK - Controls whether to use Eigen's custom packing for Altivec or not.
 
 
 \section TopicPreprocessorDirectivesPlugins Plugins
diff --git a/libs/eigen/doc/QuickReference.dox b/libs/eigen/doc/QuickReference.dox
index c5dfce4..e96b617 100644
--- a/libs/eigen/doc/QuickReference.dox
+++ b/libs/eigen/doc/QuickReference.dox
@@ -367,7 +367,8 @@ vec2 = vec1.normalized();     vec1.normalize(); // inplace \endcode
 <tr class="alt"><td>
 \link MatrixBase::cross() cross product \endlink \matrixworld</td><td>\code
 #include <Eigen/Geometry>
-vec3 = vec1.cross(vec2);\endcode</td></tr>
+v3c = v3a.cross(v3b);    // size-3 vectors
+scalar = v2a.cross(v2b); // size-2 vectors \endcode</td></tr>
 </table>
 
 <a href="#" class="top">top</a>
diff --git a/libs/eigen/doc/QuickStartGuide.dox b/libs/eigen/doc/QuickStartGuide.dox
index 4192b28..0372694 100644
--- a/libs/eigen/doc/QuickStartGuide.dox
+++ b/libs/eigen/doc/QuickStartGuide.dox
@@ -22,11 +22,11 @@ We will explain the program after telling you how to compile it.
 
 \section GettingStartedCompiling Compiling and running your first program
 
-There is no library to link to. The only thing that you need to keep in mind when compiling the above program is that the compiler must be able to find the Eigen header files. The directory in which you placed Eigen's source code must be in the include path. With GCC you use the -I option to achieve this, so you can compile the program with a command like this:
+There is no library to link to. The only thing that you need to keep in mind when compiling the above program is that the compiler must be able to find the Eigen header files. The directory in which you placed Eigen's source code must be in the include path. With GCC you use the \c -I option to achieve this, so you can compile the program with a command like this:
 
 \code g++ -I /path/to/eigen/ my_program.cpp -o my_program \endcode
 
-On Linux or Mac OS X, another option is to symlink or copy the Eigen folder into /usr/local/include/. This way, you can compile the program with:
+On Linux or Mac OS X, another option is to symlink or copy the Eigen folder into \c /usr/local/include/. This way, you can compile the program with:
 
 \code g++ my_program.cpp -o my_program \endcode
 
diff --git a/libs/eigen/doc/SparseLinearSystems.dox b/libs/eigen/doc/SparseLinearSystems.dox
index 38754e4..0135ee2 100644
--- a/libs/eigen/doc/SparseLinearSystems.dox
+++ b/libs/eigen/doc/SparseLinearSystems.dox
@@ -13,24 +13,20 @@ They are summarized in the following tables:
 
 <table class="manual">
 <tr><th>Class</th><th>Solver kind</th><th>Matrix kind</th><th>Features related to performance</th>
-    <th>License</th><th class="width20em"><p>Notes</p></th></tr>
+    <th class="width20em"><p>Notes</p></th></tr>
 
 <tr><td>SimplicialLLT \n <tt>\#include<Eigen/\link SparseCholesky_Module SparseCholesky\endlink></tt></td><td>Direct LLt factorization</td><td>SPD</td><td>Fill-in reducing</td>
-    <td>LGPL</td>
     <td>SimplicialLDLT is often preferable</td></tr>
 
 <tr><td>SimplicialLDLT \n <tt>\#include<Eigen/\link SparseCholesky_Module SparseCholesky\endlink></tt></td><td>Direct LDLt factorization</td><td>SPD</td><td>Fill-in reducing</td>
-    <td>LGPL</td>
     <td>Recommended for very sparse and not too large problems (e.g., 2D Poisson eq.)</td></tr>
 
 <tr><td>SparseLU \n <tt>\#include<Eigen/\link SparseLU_Module SparseLU\endlink></tt></td> <td>LU factorization </td>
     <td>Square </td><td>Fill-in reducing, Leverage fast dense algebra</td>
-    <td>MPL2</td>
     <td>optimized for small and large problems with irregular patterns </td></tr>
 
 <tr><td>SparseQR \n <tt>\#include<Eigen/\link SparseQR_Module SparseQR\endlink></tt></td> <td> QR factorization</td>
     <td>Any, rectangular</td><td> Fill-in reducing</td>
-    <td>MPL2</td>
     <td>recommended for least-square problems, has a basic rank-revealing feature</td></tr>
  </table>
 
@@ -38,21 +34,18 @@ They are summarized in the following tables:
 
 <table class="manual">
 <tr><th>Class</th><th>Solver kind</th><th>Matrix kind</th><th>Supported preconditioners, [default]</th>
-    <th>License</th><th class="width20em"><p>Notes</p></th></tr>
+    <th class="width20em"><p>Notes</p></th></tr>
 
 <tr><td>ConjugateGradient \n <tt>\#include<Eigen/\link IterativeLinearSolvers_Module IterativeLinearSolvers\endlink></tt></td> <td>Classic iterative CG</td><td>SPD</td>
     <td>IdentityPreconditioner, [DiagonalPreconditioner], IncompleteCholesky</td>
-    <td>MPL2</td>
     <td>Recommended for large symmetric problems (e.g., 3D Poisson eq.)</td></tr>
 
 <tr><td>LeastSquaresConjugateGradient \n <tt>\#include<Eigen/\link IterativeLinearSolvers_Module IterativeLinearSolvers\endlink></tt></td><td>CG for rectangular least-square problem</td><td>Rectangular</td>
     <td>IdentityPreconditioner, [LeastSquareDiagonalPreconditioner]</td>
-    <td>MPL2</td>
-    <td>Solve for min |A'Ax-b|^2 without forming A'A</td></tr>
+    <td>Solve for min |Ax-b|^2 without forming A'A</td></tr>
 
 <tr><td>BiCGSTAB \n <tt>\#include<Eigen/\link IterativeLinearSolvers_Module IterativeLinearSolvers\endlink></tt></td><td>Iterative stabilized bi-conjugate gradient</td><td>Square</td>
     <td>IdentityPreconditioner, [DiagonalPreconditioner], IncompleteLUT</td>
-    <td>MPL2</td>
     <td>To speedup the convergence, try it with the \ref IncompleteLUT preconditioner.</td></tr>
 </table>
 
@@ -82,6 +75,9 @@ They are summarized in the following tables:
 <tr><td>PardisoLLT \n PardisoLDLT \n PardisoLU</td><td>\link PardisoSupport_Module PardisoSupport \endlink</td><td>Direct LLt, LDLt, LU factorizations</td><td>SPD \n SPD \n Square</td><td>Fill-in reducing, Leverage fast dense algebra, Multithreading</td>
     <td>Requires the <a href="http://eigen.tuxfamily.org/Counter/redirect_to_mkl.php">Intel MKL</a> package, \b Proprietary </td>
     <td>optimized for tough problems patterns, see also \link TopicUsingIntelMKL using MKL with Eigen \endlink</td></tr>
+<tr><td>AccelerateLLT \n AccelerateLDLT \n AccelerateQR</td><td>\link AccelerateSupport_Module AccelerateSupport \endlink</td><td>Direct LLt, LDLt, QR factorizations</td><td>SPD \n SPD \n Rectangular</td><td>Fill-in reducing, Leverage fast dense algebra, Multithreading</td>
+    <td>Requires the <a href="https://developer.apple.com/documentation/accelerate">Apple Accelerate</a> package, \b Proprietary </td>
+    <td></td></tr>
 </table>
 
 Here \c SPD means symmetric positive definite.
@@ -137,7 +133,7 @@ x1 = solver.solve(b1);
 x2 = solver.solve(b2);
 ...
 \endcode
-The compute() method is equivalent to calling both analyzePattern() and factorize().
+The `compute()` method is equivalent to calling both `analyzePattern()` and `factorize()`.
 
 Each solver provides some specific features, such as determinant, access to the factors, controls of the iterations, and so on.
 More details are available in the documentations of the respective classes.
@@ -145,9 +141,9 @@ More details are available in the documentations of the respective classes.
 Finally, most of the iterative solvers, can also be used in a \b matrix-free context, see the following \link MatrixfreeSolverExample example \endlink.
 
 \section TheSparseCompute The Compute Step
-In the compute() function, the matrix is generally factorized: LLT for self-adjoint matrices, LDLT for general hermitian matrices, LU for non hermitian matrices and QR for rectangular matrices. These are the results of using direct solvers. For this class of solvers precisely, the compute step is further subdivided into analyzePattern() and factorize(). 
+In the `compute()` function, the matrix is generally factorized: LLT for self-adjoint matrices, LDLT for general hermitian matrices, LU for non hermitian matrices and QR for rectangular matrices. These are the results of using direct solvers. For this class of solvers precisely, the compute step is further subdivided into `analyzePattern()` and `factorize()`. 
 
-The goal of analyzePattern() is to reorder the nonzero elements of the matrix, such that the factorization step creates less fill-in. This step exploits only the structure of the matrix. Hence, the results of this step can be used for other linear systems where the matrix has the same structure. Note however that sometimes, some external solvers (like SuperLU) require that the values of the matrix are set in this step, for instance to equilibrate the rows and columns of the matrix. In this situation, the results of this step should not be used with other matrices.
+The goal of `analyzePattern()` is to reorder the nonzero elements of the matrix, such that the factorization step creates less fill-in. This step exploits only the structure of the matrix. Hence, the results of this step can be used for other linear systems where the matrix has the same structure. Note however that sometimes, some external solvers (like SuperLU) require that the values of the matrix are set in this step, for instance to equilibrate the rows and columns of the matrix. In this situation, the results of this step should not be used with other matrices.
 
 Eigen provides a limited set of methods to reorder the matrix in this step, either built-in (COLAMD, AMD) or external (METIS). These methods are set in template parameter list of the solver :
 \code
@@ -156,21 +152,21 @@ DirectSolverClassName<SparseMatrix<double>, OrderingMethod<IndexType> > solver;
 
 See the \link OrderingMethods_Module OrderingMethods module \endlink for the list of available methods and the associated options. 
 
-In factorize(), the factors of the coefficient matrix are computed. This step should be called each time the values of the matrix change. However, the structural pattern of the matrix should not change between multiple calls. 
+In `factorize()`, the factors of the coefficient matrix are computed. This step should be called each time the values of the matrix change. However, the structural pattern of the matrix should not change between multiple calls. 
 
 For iterative solvers, the compute step is used to eventually setup a preconditioner. For instance, with the ILUT preconditioner, the incomplete factors L and U are computed in this step. Remember that, basically, the goal of the preconditioner is to speedup the convergence of an iterative method by solving a modified linear system where the coefficient matrix has more clustered eigenvalues. For real problems, an iterative solver should always be used with a preconditioner. In Eigen, a preconditioner is  selected by simply adding it as a template parameter to the iterative solver object. 
 \code
 IterativeSolverClassName<SparseMatrix<double>, PreconditionerName<SparseMatrix<double> > solver; 
 \endcode
-The member function preconditioner() returns a read-write reference to the preconditioner 
+The member function `preconditioner()` returns a read-write reference to the preconditioner 
  to directly interact with it. See the \link IterativeLinearSolvers_Module Iterative solvers module \endlink and the documentation of each class for the list of available methods.
 
 \section TheSparseSolve The Solve step
-The solve() function computes the solution of the linear systems with one or many right hand sides.
+The `solve()` function computes the solution of the linear systems with one or many right hand sides.
 \code
 X = solver.solve(B);
 \endcode 
-Here, B  can be a vector or a matrix where the columns form the different right hand sides. The solve() function can be called several times as well, for instance when all the right hand sides are not available at once. 
+Here, B  can be a vector or a matrix where the columns form the different right hand sides. `The solve()` function can be called several times as well, for instance when all the right hand sides are not available at once. 
 \code
 x1 = solver.solve(b1);
 // Get the second right hand side b2
@@ -180,7 +176,7 @@ x2 = solver.solve(b2);
 For direct methods, the solution are computed at the machine precision. Sometimes, the solution need not be too accurate. In this case, the iterative methods are more suitable and the desired accuracy can be set before the solve step using \b setTolerance(). For all the available functions, please, refer to the documentation of the \link IterativeLinearSolvers_Module Iterative solvers module \endlink. 
 
 \section BenchmarkRoutine
-Most of the time, all you need is to know how much time it will take to solve your system, and hopefully, what is the most suitable solver. In Eigen, we provide a benchmark routine that can be used for this purpose. It is very easy to use. In the build directory, navigate to bench/spbench and compile the routine by typing \b make \e spbenchsolver. Run it with --help option to get the list of all available options. Basically, the matrices to test should be in <a href="http://math.nist.gov/MatrixMarket/formats.html">MatrixMarket Coordinate format</a>, and the routine returns the statistics from all available solvers in Eigen.
+Most of the time, all you need is to know how much time it will take to solve your system, and hopefully, what is the most suitable solver. In Eigen, we provide a benchmark routine that can be used for this purpose. It is very easy to use. In the build directory, navigate to `bench/spbench` and compile the routine by typing `make spbenchsolver`. Run it with `--help` option to get the list of all available options. Basically, the matrices to test should be in <a href="http://math.nist.gov/MatrixMarket/formats.html">MatrixMarket Coordinate format</a>, and the routine returns the statistics from all available solvers in Eigen.
 
 To export your matrices and right-hand-side vectors in the matrix-market format, you can the the unsupported SparseExtra module:
 \code
diff --git a/libs/eigen/doc/SparseQuickReference.dox b/libs/eigen/doc/SparseQuickReference.dox
index 9779f3f..b8264a4 100644
--- a/libs/eigen/doc/SparseQuickReference.dox
+++ b/libs/eigen/doc/SparseQuickReference.dox
@@ -249,7 +249,7 @@ sm1.outerIndexPtr(); // Pointer to the beginning of each inner vector
 \endcode
 </td>
 <td>
-If the matrix is not in compressed form, makeCompressed() should be called before.\n
+If the matrix is not in compressed form, `makeCompressed()` should be called before.\n
 Note that these functions are mostly provided for interoperability purposes with external libraries.\n
 A better access to the values of the matrix is done by using the InnerIterator class as described in \link TutorialSparse the Tutorial Sparse \endlink section</td>
 </tr>
diff --git a/libs/eigen/doc/StructHavingEigenMembers.dox b/libs/eigen/doc/StructHavingEigenMembers.dox
index 87016cd..4c97093 100644
--- a/libs/eigen/doc/StructHavingEigenMembers.dox
+++ b/libs/eigen/doc/StructHavingEigenMembers.dox
@@ -80,7 +80,7 @@ But AVX instructions (at least the ones that %Eigen uses, which are the fast one
 Otherwise you get a segmentation fault.
 
 For this reason, %Eigen takes care by itself to require 256-bit alignment for Eigen::Vector4d, by doing two things:
-\li %Eigen requires 256-bit alignment for the Eigen::Vector4d's array (of 4 doubles). With \cpp11 this is done with the <a href="https://en.cppreference.com/w/cpp/keyword/alignas">alignas</a> keyword, or compiler's extensions for c++98/03.
+\li %Eigen requires 256-bit alignment for the Eigen::Vector4d's array (of 4 doubles). This is done with the <a href="https://en.cppreference.com/w/cpp/keyword/alignas">alignas</a> keyword.
 \li %Eigen overloads the `operator new` of Eigen::Vector4d so it will always return 256-bit aligned pointers. (removed in \cpp17)
 
 Thus, normally, you don't have to worry about anything, %Eigen handles alignment of operator new for you...
diff --git a/libs/eigen/doc/TopicLinearAlgebraDecompositions.dox b/libs/eigen/doc/TopicLinearAlgebraDecompositions.dox
index 402b376..8598ce6 100644
--- a/libs/eigen/doc/TopicLinearAlgebraDecompositions.dox
+++ b/libs/eigen/doc/TopicLinearAlgebraDecompositions.dox
@@ -272,7 +272,7 @@ To get an overview of the true relative speed of the different decompositions, c
   <dt><b>Blocking</b></dt>
     <dd>Means the algorithm can work per block, whence guaranteeing a good scaling of the performance for large matrices.</dd>
   <dt><b>Implicit Multi Threading (MT)</b></dt>
-    <dd>Means the algorithm can take advantage of multicore processors via OpenMP. "Implicit" means the algortihm itself is not parallelized, but that it relies on parallelized matrix-matrix product routines.</dd>
+    <dd>Means the algorithm can take advantage of multicore processors via OpenMP. "Implicit" means the algorithm itself is not parallelized, but that it relies on parallelized matrix-matrix product routines.</dd>
   <dt><b>Explicit Multi Threading (MT)</b></dt>
     <dd>Means the algorithm is explicitly parallelized to take advantage of multicore processors via OpenMP.</dd>
   <dt><b>Meta-unroller</b></dt>
diff --git a/libs/eigen/doc/TutorialMatrixArithmetic.dox b/libs/eigen/doc/TutorialMatrixArithmetic.dox
index 5fc569a..53916c2 100644
--- a/libs/eigen/doc/TutorialMatrixArithmetic.dox
+++ b/libs/eigen/doc/TutorialMatrixArithmetic.dox
@@ -158,7 +158,7 @@ For dot product and cross product, you need the \link MatrixBase::dot() dot()\en
 \verbinclude tut_arithmetic_dot_cross.out
 </td></tr></table>
 
-Remember that cross product is only for vectors of size 3. Dot product is for vectors of any sizes.
+Cross product is defined in Eigen not only for vectors of size 3 but also for those of size 2, check \link MatrixBase::cross() the doc\endlink for details. Dot product is for vectors of any sizes.
 When using complex numbers, Eigen's dot product is conjugate-linear in the first variable and linear in the
 second variable.
 
diff --git a/libs/eigen/doc/TutorialMatrixClass.dox b/libs/eigen/doc/TutorialMatrixClass.dox
index 2c45222..e4e4f98 100644
--- a/libs/eigen/doc/TutorialMatrixClass.dox
+++ b/libs/eigen/doc/TutorialMatrixClass.dox
@@ -111,9 +111,9 @@ Vector4d c(5.0, 6.0, 7.0, 8.0);
 
 If C++11 is enabled, fixed-size column or row vectors of arbitrary size can be initialized by passing an arbitrary number of coefficients:
 \code
-Vector2i a(1, 2);                      // A column vector containing the elements {1, 2}
-Matrix<int, 5, 1> b {1, 2, 3, 4, 5};   // A row-vector containing the elements {1, 2, 3, 4, 5}
-Matrix<int, 1, 5> c = {1, 2, 3, 4, 5}; // A column vector containing the elements {1, 2, 3, 4, 5}
+Vector2i a(1, 2);                      // A column-vector containing the elements {1, 2}
+Matrix<int, 5, 1> b {1, 2, 3, 4, 5};   // A column-vector containing the elements {1, 2, 3, 4, 5}
+Matrix<int, 1, 5> c = {1, 2, 3, 4, 5}; // A row-vector containing the elements {1, 2, 3, 4, 5}
 \endcode
 
 In the general case of matrices and vectors with either fixed or runtime sizes,
@@ -151,14 +151,14 @@ The numbering starts at 0. This example is self-explanatory:
 \verbinclude tut_matrix_coefficient_accessors.out
 </td></tr></table>
 
-Note that the syntax <tt> m(index) </tt>
+Note that the syntax `m(index)` 
 is not restricted to vectors, it is also available for general matrices, meaning index-based access
 in the array of coefficients. This however depends on the matrix's storage order. All Eigen matrices default to
 column-major storage order, but this can be changed to row-major, see \ref TopicStorageOrders "Storage orders".
 
-The operator[] is also overloaded for index-based access in vectors, but keep in mind that C++ doesn't allow operator[] to
-take more than one argument. We restrict operator[] to vectors, because an awkwardness in the C++ language
-would make matrix[i,j] compile to the same thing as matrix[j] !
+The `operator[]` is also overloaded for index-based access in vectors, but keep in mind that C++ doesn't allow `operator[]` to
+take more than one argument. We restrict `operator[]` to vectors, because an awkwardness in the C++ language
+would make `matrix[i,j]` compile to the same thing as `matrix[j]`!
 
 \section TutorialMatrixCommaInitializer Comma-initialization
 
@@ -186,8 +186,8 @@ The current size of a matrix can be retrieved by \link EigenBase::rows() rows()\
 <td>\verbinclude tut_matrix_resize.out </td>
 </tr></table>
 
-The resize() method is a no-operation if the actual matrix size doesn't change; otherwise it is destructive: the values of the coefficients may change.
-If you want a conservative variant of resize() which does not change the coefficients, use \link PlainObjectBase::conservativeResize() conservativeResize()\endlink, see \ref TopicResizing "this page" for more details.
+The `resize()` method is a no-operation if the actual matrix size doesn't change; otherwise it is destructive: the values of the coefficients may change.
+If you want a conservative variant of `resize()` which does not change the coefficients, use \link PlainObjectBase::conservativeResize() conservativeResize()\endlink, see \ref TopicResizing "this page" for more details.
 
 All these methods are still available on fixed-size matrices, for the sake of API uniformity. Of course, you can't actually
 resize a fixed-size matrix. Trying to change a fixed size to an actually different value will trigger an assertion failure;
@@ -234,7 +234,7 @@ is always allocated on the heap, so doing
 \code MatrixXf mymatrix(rows,columns); \endcode
 amounts to doing
 \code float *mymatrix = new float[rows*columns]; \endcode
-and in addition to that, the MatrixXf object stores its number of rows and columns as
+and in addition to that, the \c MatrixXf object stores its number of rows and columns as
 member variables.
 
 The limitation of using fixed sizes, of course, is that this is only possible
@@ -276,14 +276,16 @@ Matrix<typename Scalar,
 \section TutorialMatrixTypedefs Convenience typedefs
 
 Eigen defines the following Matrix typedefs:
-\li MatrixNt for Matrix<type, N, N>. For example, MatrixXi for Matrix<int, Dynamic, Dynamic>.
-\li VectorNt for Matrix<type, N, 1>. For example, Vector2f for Matrix<float, 2, 1>.
-\li RowVectorNt for Matrix<type, 1, N>. For example, RowVector3d for Matrix<double, 1, 3>.
+\li \c MatrixNt for `Matrix<type, N, N>`. For example, \c MatrixXi for `Matrix<int, Dynamic, Dynamic>`.
+\li \c MatrixXNt for `Matrix<type, Dynamic, N>`. For example, \c MatrixX3i for `Matrix<int, Dynamic, 3>`.
+\li \c MatrixNXt for `Matrix<type, N, Dynamic>`. For example, \c Matrix4Xd for `Matrix<d, 4, Dynamic>`.
+\li \c VectorNt for `Matrix<type, N, 1>`. For example, \c Vector2f for `Matrix<float, 2, 1>`.
+\li \c RowVectorNt for `Matrix<type, 1, N>`. For example, \c RowVector3d for `Matrix<double, 1, 3>`.
 
 Where:
-\li N can be any one of \c 2, \c 3, \c 4, or \c X (meaning \c Dynamic).
-\li t can be any one of \c i (meaning int), \c f (meaning float), \c d (meaning double),
-      \c cf (meaning complex<float>), or \c cd (meaning complex<double>). The fact that typedefs are only
+\li \c N can be any one of \c 2, \c 3, \c 4, or \c X (meaning \c Dynamic).
+\li \c t can be any one of \c i (meaning \c int), \c f (meaning \c float), \c d (meaning \c double),
+      \c cf (meaning `complex<float>`), or \c cd (meaning `complex<double>`). The fact that `typedef`s are only
     defined for these five types doesn't mean that they are the only supported scalar types. For example,
     all standard integer types are supported, see \ref TopicScalarTypes "Scalar types".
 
diff --git a/libs/eigen/doc/TutorialReshape.dox b/libs/eigen/doc/TutorialReshape.dox
index 5b4022a..07e5c3c 100644
--- a/libs/eigen/doc/TutorialReshape.dox
+++ b/libs/eigen/doc/TutorialReshape.dox
@@ -3,7 +3,7 @@ namespace Eigen {
 /** \eigenManualPage TutorialReshape Reshape
 
 Since the version 3.4, %Eigen exposes convenient methods to reshape a matrix to another matrix of different sizes or vector.
-All cases are handled via the DenseBase::reshaped(NRowsType,NColsType) and DenseBase::reshaped() functions.
+All cases are handled via the `DenseBase::reshaped(NRowsType,NColsType)` and `DenseBase::reshaped()` functions.
 Those functions do not perform in-place reshaping, but instead return a <i> view </i> on the input expression.
 
 \eigenAutoToc
@@ -23,7 +23,7 @@ Here is an example reshaping a 4x4 matrix to a 2x8 one:
 </td></tr></table>
 
 By default, the input coefficients are always interpreted in column-major order regardless of the storage order of the input expression.
-For more control on ordering, compile-time sizes, and automatic size deduction, please see de documentation of DenseBase::reshaped(NRowsType,NColsType) that contains all the details with many examples.
+For more control on ordering, compile-time sizes, and automatic size deduction, please see de documentation of `DenseBase::reshaped(NRowsType,NColsType)` that contains all the details with many examples.
 
 
 \section TutorialReshapeMat2Vec 1D linear views
diff --git a/libs/eigen/doc/TutorialSlicingIndexing.dox b/libs/eigen/doc/TutorialSlicingIndexing.dox
index 98ace43..8b067df 100644
--- a/libs/eigen/doc/TutorialSlicingIndexing.dox
+++ b/libs/eigen/doc/TutorialSlicingIndexing.dox
@@ -15,7 +15,7 @@ All the aforementioned operations are handled through the generic DenseBase::ope
 Each argument can be:
   - An integer indexing a single row or column, including symbolic indices.
   - The symbol Eigen::all representing the whole set of respective rows or columns in increasing order.
-  - An ArithmeticSequence as constructed by the Eigen::seq, Eigen::seqN, or Eigen::lastN functions.
+  - An ArithmeticSequence as constructed by the Eigen::seq, Eigen::seqN, or Eigen::placeholders::lastN functions.
   - Any 1D vector/array of integers including %Eigen's vector/array, expressions, std::vector, std::array, as well as plain C arrays: `int[N]`.
 
 More generally, it can accepts any object exposing the following two member functions:
@@ -72,7 +72,7 @@ Here are some examples for a 2D array/matrix \c A and a 1D array/vector \c v.
 </tr>
 <tr>
   <td>%Block starting at \c i,j having \c m rows, and \c n columns</td>
-  <td>\code A(seqN(i,m), seqN(i,n) \endcode</td>
+  <td>\code A(seqN(i,m), seqN(i,n)) \endcode</td>
   <td>\code A.block(i,j,m,n) \endcode</td>
 </tr>
 <tr>
@@ -112,9 +112,10 @@ Here are some examples for a 2D array/matrix \c A and a 1D array/vector \c v.
 </tr>
 </table>
 
-As seen in the last exemple, referencing the <i> last n </i> elements (or rows/columns) is a bit cumbersome to write.
+As seen in the last example, referencing the <i> last n </i> elements (or rows/columns) is a bit cumbersome to write.
 This becomes even more tricky and error prone with a non-default increment.
-Here comes \link Eigen::lastN(SizeType) Eigen::lastN(size) \endlink, and \link Eigen::lastN(SizeType,IncrType) Eigen::lastN(size,incr) \endlink:
+Here comes \link Eigen::placeholders::lastN(SizeType) Eigen::placeholders::lastN(size) \endlink, and
+\link Eigen::placeholders::lastN(SizeType,IncrType) Eigen::placeholders::lastN(size,incr) \endlink:
 
 <table class="manual">
 <tr>
@@ -129,12 +130,12 @@ Here comes \link Eigen::lastN(SizeType) Eigen::lastN(size) \endlink, and \link E
 </tr>
 <tr>
   <td>Bottom-right corner of A of size \c m times \c n</td>
-  <td>\code v(lastN(m), lastN(n)) \endcode</td>
+  <td>\code A(lastN(m), lastN(n)) \endcode</td>
   <td>\code A.bottomRightCorner(m,n) \endcode</td>
 </tr>
 <tr>
   <td>Bottom-right corner of A of size \c m times \c n</td>
-  <td>\code v(lastN(m), lastN(n)) \endcode</td>
+  <td>\code A(lastN(m), lastN(n)) \endcode</td>
   <td>\code A.bottomRightCorner(m,n) \endcode</td>
 </tr>
 <tr>
@@ -221,7 +222,7 @@ i = ind[i];
 \endcode
 
 This means you can easily build your own fancy sequence generator and pass it to `operator()`.
-Here is an exemple enlarging a given matrix while padding the additional first rows and columns through repetition:
+Here is an example enlarging a given matrix while padding the additional first rows and columns through repetition:
 
 <table class="example">
 <tr><th>Example:</th><th>Output:</th></tr>
diff --git a/libs/eigen/doc/TutorialSparse.dox b/libs/eigen/doc/TutorialSparse.dox
index c69171e..a00bacd 100644
--- a/libs/eigen/doc/TutorialSparse.dox
+++ b/libs/eigen/doc/TutorialSparse.dox
@@ -44,8 +44,8 @@ This storage scheme is better explained on an example. The following matrix
 
 and one of its possible sparse, \b column \b major representation:
 <table class="manual">
-<tr><td>Values:</td>        <td>22</td><td>7</td><td>_</td><td>3</td><td>5</td><td>14</td><td>_</td><td>_</td><td>1</td><td>_</td><td>17</td><td>8</td></tr>
-<tr><td>InnerIndices:</td>  <td> 1</td><td>2</td><td>_</td><td>0</td><td>2</td><td> 4</td><td>_</td><td>_</td><td>2</td><td>_</td><td> 1</td><td>4</td></tr>
+<tr><td>Values:</td>        <td>22</td><td>7</td><td>_</td><td>3</td><td>5</td><td>_</td><td>14</td><td>_</td><td>1</td><td>_</td><td>17</td><td>8</td></tr>
+<tr><td>InnerIndices:</td>  <td> 1</td><td>2</td><td>_</td><td>0</td><td>2</td><td>_</td><td>4</td><td>_</td><td>2</td><td>_</td><td> 1</td><td>4</td></tr>
 </table>
 <table class="manual">
 <tr><td>OuterStarts:</td><td>0</td><td>3</td><td>5</td><td>8</td><td>10</td><td>\em 12 </td></tr>
@@ -54,13 +54,13 @@ and one of its possible sparse, \b column \b major representation:
 
 Currently the elements of a given inner vector are guaranteed to be always sorted by increasing inner indices.
 The \c "_" indicates available free space to quickly insert new elements.
-Assuming no reallocation is needed, the insertion of a random element is therefore in O(nnz_j) where nnz_j is the number of nonzeros of the respective inner vector.
-On the other hand, inserting elements with increasing inner indices in a given inner vector is much more efficient since this only requires to increase the respective \c InnerNNZs entry that is a O(1) operation.
+Assuming no reallocation is needed, the insertion of a random element is therefore in `O(nnz_j)` where `nnz_j` is the number of nonzeros of the respective inner vector.
+On the other hand, inserting elements with increasing inner indices in a given inner vector is much more efficient since this only requires to increase the respective \c InnerNNZs entry that is a `O(1)` operation.
 
 The case where no empty space is available is a special case, and is referred as the \em compressed mode.
 It corresponds to the widely used Compressed Column (or Row) Storage schemes (CCS or CRS).
 Any SparseMatrix can be turned to this form by calling the SparseMatrix::makeCompressed() function.
-In this case, one can remark that the \c InnerNNZs array is redundant with \c OuterStarts because we have the equality: \c InnerNNZs[j] = \c OuterStarts[j+1]-\c OuterStarts[j].
+In this case, one can remark that the \c InnerNNZs array is redundant with \c OuterStarts because we have the equality: `InnerNNZs[j] == OuterStarts[j+1] - OuterStarts[j]`.
 Therefore, in practice a call to SparseMatrix::makeCompressed() frees this buffer.
 
 It is worth noting that most of our wrappers to external libraries requires compressed matrices as inputs.
@@ -221,9 +221,9 @@ A typical scenario of this approach is illustrated below:
 5: mat.makeCompressed();                        // optional
 \endcode
 
-- The key ingredient here is the line 2 where we reserve room for 6 non-zeros per column. In many cases, the number of non-zeros per column or row can easily be known in advance. If it varies significantly for each inner vector, then it is possible to specify a reserve size for each inner vector by providing a vector object with an operator[](int j) returning the reserve size of the \c j-th inner vector (e.g., via a VectorXi or std::vector<int>). If only a rought estimate of the number of nonzeros per inner-vector can be obtained, it is highly recommended to overestimate it rather than the opposite. If this line is omitted, then the first insertion of a new element will reserve room for 2 elements per inner vector.
+- The key ingredient here is the line 2 where we reserve room for 6 non-zeros per column. In many cases, the number of non-zeros per column or row can easily be known in advance. If it varies significantly for each inner vector, then it is possible to specify a reserve size for each inner vector by providing a vector object with an `operator[](int j)` returning the reserve size of the \c j-th inner vector (e.g., via a `VectorXi` or `std::vector<int>`). If only a rought estimate of the number of nonzeros per inner-vector can be obtained, it is highly recommended to overestimate it rather than the opposite. If this line is omitted, then the first insertion of a new element will reserve room for 2 elements per inner vector.
 - The line 4 performs a sorted insertion. In this example, the ideal case is when the \c j-th column is not full and contains non-zeros whose inner-indices are smaller than \c i. In this case, this operation boils down to trivial O(1) operation.
-- When calling insert(i,j) the element \c i \c ,j must not already exists, otherwise use the coeffRef(i,j) method that will allow to, e.g., accumulate values. This method first performs a binary search and finally calls insert(i,j) if the element does not already exist. It is more flexible than insert() but also more costly.
+- When calling `insert(i,j)` the element `i`, `j` must not already exists, otherwise use the `coeffRef(i,j)` method that will allow to, e.g., accumulate values. This method first performs a binary search and finally calls `insert(i,j)` if the element does not already exist. It is more flexible than `insert()` but also more costly.
 - The line 5 suppresses the remaining empty space and transforms the matrix into a compressed column storage.
 
 
@@ -259,7 +259,7 @@ sm2 = sm1.cwiseProduct(dm1);
 dm2 = sm1 + dm1;
 dm2 = dm1 - sm1;
 \endcode
-Performance-wise, the adding/subtracting sparse and dense matrices is better performed in two steps. For instance, instead of doing <tt>dm2 = sm1 + dm1</tt>, better write:
+Performance-wise, the adding/subtracting sparse and dense matrices is better performed in two steps. For instance, instead of doing `dm2 = sm1 + dm1`, better write:
 \code
 dm2 = dm1;
 dm2 += sm1;
@@ -272,7 +272,7 @@ This version has the advantage to fully exploit the higher performance of dense
 sm1 = sm2.transpose();
 sm1 = sm2.adjoint();
 \endcode
-However, there is no transposeInPlace() method.
+However, there is no `transposeInPlace()` method.
 
 
 \subsection TutorialSparse_Products Matrix products
@@ -284,18 +284,18 @@ dv2 = sm1 * dv1;
 dm2 = dm1 * sm1.adjoint();
 dm2 = 2. * sm1 * dm1;
     \endcode
-  - \b symmetric \b sparse-dense. The product of a sparse symmetric matrix with a dense matrix (or vector) can also be optimized by specifying the symmetry with selfadjointView():
+  - \b symmetric \b sparse-dense. The product of a sparse symmetric matrix with a dense matrix (or vector) can also be optimized by specifying the symmetry with `selfadjointView()`:
     \code
-dm2 = sm1.selfadjointView<>() * dm1;        // if all coefficients of A are stored
-dm2 = A.selfadjointView<Upper>() * dm1;     // if only the upper part of A is stored
-dm2 = A.selfadjointView<Lower>() * dm1;     // if only the lower part of A is stored
+dm2 = sm1.selfadjointView<>() * dm1;          // if all coefficients of sm1 are stored
+dm2 = sm1.selfadjointView<Upper>() * dm1;     // if only the upper part of sm1 is stored
+dm2 = sm1.selfadjointView<Lower>() * dm1;     // if only the lower part of sm1 is stored
     \endcode
   - \b sparse-sparse. For sparse-sparse products, two different algorithms are available. The default one is conservative and preserve the explicit zeros that might appear:
     \code
 sm3 = sm1 * sm2;
 sm3 = 4 * sm1.adjoint() * sm2;
     \endcode
-    The second algorithm prunes on the fly the explicit zeros, or the values smaller than a given threshold. It is enabled and controlled through the prune() functions:
+    The second algorithm prunes on the fly the explicit zeros, or the values smaller than a given threshold. It is enabled and controlled through the `prune()` functions:
     \code
 sm3 = (sm1 * sm2).pruned();                  // removes numerical zeros
 sm3 = (sm1 * sm2).pruned(ref);               // removes elements much smaller than ref
@@ -314,7 +314,7 @@ sm2 = sm1.transpose() * P;
 \subsection TutorialSparse_SubMatrices Block operations
 
 Regarding read-access, sparse matrices expose the same API than for dense matrices to access to sub-matrices such as blocks, columns, and rows. See \ref TutorialBlockOperations for a detailed introduction.
-However, for performance reasons, writing to a sub-sparse-matrix is much more limited, and currently only contiguous sets of columns (resp. rows) of a column-major (resp. row-major) SparseMatrix are writable. Moreover, this information has to be known at compile-time, leaving out methods such as <tt>block(...)</tt> and <tt>corner*(...)</tt>. The available API for write-access to a SparseMatrix are summarized below:
+However, for performance reasons, writing to a sub-sparse-matrix is much more limited, and currently only contiguous sets of columns (resp. rows) of a column-major (resp. row-major) SparseMatrix are writable. Moreover, this information has to be known at compile-time, leaving out methods such as `block(...)` and `corner*(...)`. The available API for write-access to a SparseMatrix are summarized below:
 \code
 SparseMatrix<double,ColMajor> sm1;
 sm1.col(j) = ...;
@@ -329,22 +329,22 @@ sm2.middleRows(i,nrows) = ...;
 sm2.bottomRows(nrows) = ...;
 \endcode
 
-In addition, sparse matrices expose the SparseMatrixBase::innerVector() and SparseMatrixBase::innerVectors() methods, which are aliases to the col/middleCols methods for a column-major storage, and to the row/middleRows methods for a row-major storage.
+In addition, sparse matrices expose the `SparseMatrixBase::innerVector()` and `SparseMatrixBase::innerVectors()` methods, which are aliases to the `col`/`middleCols` methods for a column-major storage, and to the `row`/`middleRows` methods for a row-major storage.
 
 \subsection TutorialSparse_TriangularSelfadjoint Triangular and selfadjoint views
 
-Just as with dense matrices, the triangularView() function can be used to address a triangular part of the matrix, and perform triangular solves with a dense right hand side:
+Just as with dense matrices, the `triangularView()` function can be used to address a triangular part of the matrix, and perform triangular solves with a dense right hand side:
 \code
 dm2 = sm1.triangularView<Lower>(dm1);
 dv2 = sm1.transpose().triangularView<Upper>(dv1);
 \endcode
 
-The selfadjointView() function permits various operations:
+The `selfadjointView()` function permits various operations:
  - optimized sparse-dense matrix products:
     \code
-dm2 = sm1.selfadjointView<>() * dm1;        // if all coefficients of A are stored
-dm2 = A.selfadjointView<Upper>() * dm1;     // if only the upper part of A is stored
-dm2 = A.selfadjointView<Lower>() * dm1;     // if only the lower part of A is stored
+dm2 = sm1.selfadjointView<>() * dm1;          // if all coefficients of sm1 are stored
+dm2 = sm1.selfadjointView<Upper>() * dm1;     // if only the upper part of sm1 is stored
+dm2 = sm1.selfadjointView<Lower>() * dm1;     // if only the lower part of sm1 is stored
     \endcode
  - copy of triangular parts:
     \code
diff --git a/libs/eigen/doc/UsingBlasLapackBackends.dox b/libs/eigen/doc/UsingBlasLapackBackends.dox
index caa5971..c700d85 100644
--- a/libs/eigen/doc/UsingBlasLapackBackends.dox
+++ b/libs/eigen/doc/UsingBlasLapackBackends.dox
@@ -101,11 +101,17 @@ m1.colPivHouseholderQr();
 ?geqp3
 \endcode</td></tr>
 <tr class="alt"><td>Singular value decomposition \n \c EIGEN_USE_LAPACKE </td><td>\code
-JacobiSVD<MatrixXd> svd;
-svd.compute(m1, ComputeThinV);
+JacobiSVD<MatrixXd, ComputeThinV> svd;
+svd.compute(m1);
 \endcode</td><td>\code
 ?gesvd
 \endcode</td></tr>
+<tr class="alt"><td>Singular value decomposition \n \c EIGEN_USE_LAPACKE \n \c EIGEN_USE_LAPACKE_STRICT </td><td>\code
+BDCSVD<MatrixXd> svd;
+svd.compute(m1);
+\endcode</td><td>\code
+?gesdd
+\endcode</td></tr>
 <tr><td>Eigen-value decompositions \n \c EIGEN_USE_LAPACKE \n \c EIGEN_USE_LAPACKE_STRICT </td><td>\code
 EigenSolver<MatrixXd> es(m1);
 ComplexEigenSolver<MatrixXcd> ces(m1);
diff --git a/libs/eigen/doc/eigen_navtree_hacks.js b/libs/eigen/doc/eigen_navtree_hacks.js
index afb97ed..f36b332 100644
--- a/libs/eigen/doc/eigen_navtree_hacks.js
+++ b/libs/eigen/doc/eigen_navtree_hacks.js
@@ -62,23 +62,161 @@ function getNode(o, po)
   }
 }
 
-// Overloaded to adjust the size of the navtree wrt the toc
-function resizeHeight() 
-{
-  var header  = $("#top");
-  var sidenav = $("#side-nav");
-  var content = $("#doc-content");
-  var navtree = $("#nav-tree");
-  var footer  = $("#nav-path");
-  var toc     = $("#nav-toc");
+/*
+ @licstart  The following is the entire license notice for the JavaScript code in this file.
 
-  var headerHeight = header.outerHeight();
-  var footerHeight = footer.outerHeight();
-  var tocHeight    = toc.height();
-  var windowHeight = $(window).height() - headerHeight - footerHeight;
-  content.css({height:windowHeight + "px"});
-  navtree.css({height:(windowHeight-tocHeight) + "px"});
-  sidenav.css({height:windowHeight + "px"});
+ The MIT License (MIT)
+
+ Copyright (C) 1997-2020 by Dimitri van Heesch
+
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software
+ and associated documentation files (the "Software"), to deal in the Software without restriction,
+ including without limitation the rights to use, copy, modify, merge, publish, distribute,
+ sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
+ furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be included in all copies or
+ substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+ DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ @licend  The above is the entire license notice for the JavaScript code in this file
+ */
+// We need to override entire resizable just so we can change the height to account for the TOC.
+function initResizable()
+{
+  var cookie_namespace = 'doxygen';
+  var sidenav,navtree,content,header,collapsed,collapsedWidth=0,barWidth=6,desktop_vp=768,titleHeight;
+
+  function readCookie(cookie)
+  {
+    var myCookie = cookie_namespace+"_"+cookie+"=";
+    if (document.cookie) {
+      var index = document.cookie.indexOf(myCookie);
+      if (index != -1) {
+        var valStart = index + myCookie.length;
+        var valEnd = document.cookie.indexOf(";", valStart);
+        if (valEnd == -1) {
+          valEnd = document.cookie.length;
+        }
+        var val = document.cookie.substring(valStart, valEnd);
+        return val;
+      }
+    }
+    return 0;
+  }
+
+  function writeCookie(cookie, val, expiration)
+  {
+    if (val==undefined) return;
+    if (expiration == null) {
+      var date = new Date();
+      date.setTime(date.getTime()+(10*365*24*60*60*1000)); // default expiration is one week
+      expiration = date.toGMTString();
+    }
+    document.cookie = cookie_namespace + "_" + cookie + "=" + val + "; expires=" + expiration+"; path=/";
+  }
+
+  function resizeWidth()
+  {
+    var windowWidth = $(window).width() + "px";
+    var sidenavWidth = $(sidenav).outerWidth();
+    content.css({marginLeft:parseInt(sidenavWidth)+"px"});
+    writeCookie('width',sidenavWidth-barWidth, null);
+  }
+
+  function restoreWidth(navWidth)
+  {
+    var windowWidth = $(window).width() + "px";
+    content.css({marginLeft:parseInt(navWidth)+barWidth+"px"});
+    sidenav.css({width:navWidth + "px"});
+  }
+
+  function resizeHeight()
+  {  
+    var headerHeight = header.outerHeight();
+    var footerHeight = footer.outerHeight();
+    var windowHeight = $(window).height() - headerHeight - footerHeight;
+    //==========================================================================
+    // MODIFICATION:
+    // This small section is the only portion modified within initResizable().
+    // The rest is copy-pasted from the doxygen-generated resize.js.
+    //
+    // Adjust nav height to make room for TOC.
+    var toc = $("#nav-toc");
+    var tocHeight = toc.height();
+    var navHeight = windowHeight;
+    // tocHeight is not always defined (e.g. if empty)
+    if (tocHeight) {
+      navHeight = windowHeight - tocHeight;
+    }
+   //==========================================================================
+    
+    content.css({height:windowHeight + "px"});
+    navtree.css({height:navHeight + "px"});
+    sidenav.css({height:windowHeight + "px"});
+    
+    var width=$(window).width();
+    if (width!=collapsedWidth) {
+      if (width<desktop_vp && collapsedWidth>=desktop_vp) {
+        if (!collapsed) {
+          collapseExpand();
+        }
+      } else if (width>desktop_vp && collapsedWidth<desktop_vp) {
+        if (collapsed) {
+          collapseExpand();
+        }
+      }
+      collapsedWidth=width;
+    }
+    if (location.hash.slice(1)) {
+      (document.getElementById(location.hash.slice(1))||document.body).scrollIntoView();
+    }
+  }
+
+  function collapseExpand()
+  {
+    if (sidenav.width()>0) {
+      restoreWidth(0);
+      collapsed=true;
+    }
+    else {
+      var width = readCookie('width');
+      if (width>200 && width<$(window).width()) { restoreWidth(width); } else { restoreWidth(200); }
+      collapsed=false;
+    }
+  }
+  header  = $("#top");
+  sidenav = $("#side-nav");
+  content = $("#doc-content");
+  navtree = $("#nav-tree");
+  footer  = $("#nav-path");
+
+  $(".side-nav-resizable").resizable({resize: function(e, ui) { resizeWidth(); } });
+  $(sidenav).resizable({ minWidth: 0 });
+  $(window).resize(function() { resizeHeight(); });
+  var device = navigator.userAgent.toLowerCase();
+  var touch_device = device.match(/(iphone|ipod|ipad|android)/);
+  if (touch_device) { /* wider split bar for touch only devices */
+    $(sidenav).css({ paddingRight:'20px' });
+    $('.ui-resizable-e').css({ width:'20px' });
+    $('#nav-sync').css({ right:'34px' });
+    barWidth=20;
+  }
+  var width = readCookie('width');
+  if (width) { restoreWidth(width); } else { resizeWidth(); }
+  resizeHeight();
+  var url = location.href;
+  var i=url.indexOf("#");
+  if (i>=0) window.location.hash=url.substr(i);
+  var _preventDefault = function(evt) { evt.preventDefault(); };
+  $("#splitbar").bind("dragstart", _preventDefault).bind("selectstart", _preventDefault);
+  $(".ui-resizable-handle").dblclick(collapseExpand);
+  $(window).on('load',resizeHeight);
 }
 
 // Overloaded to save the root node into global_navtree_object
@@ -241,7 +379,4 @@ $(document).ready(function() {
       setTimeout(arguments.callee, 10);
     }
   })();
-
-  $(window).on("load", resizeHeight);
 });
-
diff --git a/libs/eigen/doc/eigendoxy.css b/libs/eigen/doc/eigendoxy.css
index 4e9d7d1..c746194 100644
--- a/libs/eigen/doc/eigendoxy.css
+++ b/libs/eigen/doc/eigendoxy.css
@@ -160,9 +160,7 @@ div.toc {
   margin:0;
   padding: 0.3em 0 0 0;
   width:100%;
-  float:none;
-  position:absolute;
-  bottom:0;
+  float: none;
   border-radius:0px;
   border-style: solid none none none;
   max-height:50%;
diff --git a/libs/eigen/doc/eigendoxy_header.html.in b/libs/eigen/doc/eigendoxy_header.html.in
index a6b1c1d..53e95c3 100644
--- a/libs/eigen/doc/eigendoxy_header.html.in
+++ b/libs/eigen/doc/eigendoxy_header.html.in
@@ -21,8 +21,6 @@ $mathjax
 </head>
 <body>
 
-<div style="background:#FFDDDD;font-size:120%;text-align:center;margin:0;padding:5px">Please, help us to better know about our user community by answering the following short survey:  <a href="https://forms.gle/wpyrxWi18ox9Z5ae9">https://forms.gle/wpyrxWi18ox9Z5ae9</a></div>
-
 <div id="top"><!-- do not remove this div, it is closed by doxygen! -->
 
 <!--BEGIN TITLEAREA-->
diff --git a/libs/eigen/doc/examples/CMakeLists.txt b/libs/eigen/doc/examples/CMakeLists.txt
index a2c9d05..dd49e3c 100644
--- a/libs/eigen/doc/examples/CMakeLists.txt
+++ b/libs/eigen/doc/examples/CMakeLists.txt
@@ -14,7 +14,3 @@ foreach(example_src ${examples_SRCS})
   )
   add_dependencies(all_examples ${example})
 endforeach()
-
-if(EIGEN_COMPILER_SUPPORT_CPP11)
-ei_add_target_property(nullary_indexing COMPILE_FLAGS "-std=c++11")
-endif()
\ No newline at end of file
diff --git a/libs/eigen/doc/examples/Cwise_erf.cpp b/libs/eigen/doc/examples/Cwise_erf.cpp
index e7cd2c1..9ddc57d 100644
--- a/libs/eigen/doc/examples/Cwise_erf.cpp
+++ b/libs/eigen/doc/examples/Cwise_erf.cpp
@@ -1,9 +1,8 @@
 #include <Eigen/Core>
 #include <unsupported/Eigen/SpecialFunctions>
 #include <iostream>
-using namespace Eigen;
 int main()
 {
-  Array4d v(-0.5,2,0,-7);
+  Eigen::Array4d v(-0.5,2,0,-7);
   std::cout << v.erf() << std::endl;
 }
diff --git a/libs/eigen/doc/examples/Cwise_erfc.cpp b/libs/eigen/doc/examples/Cwise_erfc.cpp
index d8bb04c..4b7902c 100644
--- a/libs/eigen/doc/examples/Cwise_erfc.cpp
+++ b/libs/eigen/doc/examples/Cwise_erfc.cpp
@@ -1,9 +1,8 @@
 #include <Eigen/Core>
 #include <unsupported/Eigen/SpecialFunctions>
 #include <iostream>
-using namespace Eigen;
 int main()
 {
-  Array4d v(-0.5,2,0,-7);
+  Eigen::Array4d v(-0.5,2,0,-7);
   std::cout << v.erfc() << std::endl;
 }
diff --git a/libs/eigen/doc/examples/Cwise_lgamma.cpp b/libs/eigen/doc/examples/Cwise_lgamma.cpp
index 6bfaccb..f3c9fe6 100644
--- a/libs/eigen/doc/examples/Cwise_lgamma.cpp
+++ b/libs/eigen/doc/examples/Cwise_lgamma.cpp
@@ -1,9 +1,8 @@
 #include <Eigen/Core>
 #include <unsupported/Eigen/SpecialFunctions>
 #include <iostream>
-using namespace Eigen;
 int main()
 {
-  Array4d v(0.5,10,0,-1);
+  Eigen::Array4d v(0.5,10,0,-1);
   std::cout << v.lgamma() << std::endl;
 }
diff --git a/libs/eigen/doc/examples/DenseBase_middleCols_int.cpp b/libs/eigen/doc/examples/DenseBase_middleCols_int.cpp
index 0ebd955..d05a552 100644
--- a/libs/eigen/doc/examples/DenseBase_middleCols_int.cpp
+++ b/libs/eigen/doc/examples/DenseBase_middleCols_int.cpp
@@ -1,15 +1,12 @@
 #include <Eigen/Core>
 #include <iostream>
 
-using namespace Eigen;
-using namespace std;
-
-int main(void)
+int main()
 {
     int const N = 5;
-    MatrixXi A(N,N);
+    Eigen::MatrixXi A(N,N);
     A.setRandom();
-    cout << "A =\n" << A << '\n' << endl;
-    cout << "A(1..3,:) =\n" << A.middleCols(1,3) << endl;
+    std::cout << "A =\n" << A << '\n' << std::endl;
+    std::cout << "A(1..3,:) =\n" << A.middleCols(1,3) << std::endl;
     return 0;
 }
diff --git a/libs/eigen/doc/examples/DenseBase_middleRows_int.cpp b/libs/eigen/doc/examples/DenseBase_middleRows_int.cpp
index a6fe9e8..8651629 100644
--- a/libs/eigen/doc/examples/DenseBase_middleRows_int.cpp
+++ b/libs/eigen/doc/examples/DenseBase_middleRows_int.cpp
@@ -1,15 +1,12 @@
 #include <Eigen/Core>
 #include <iostream>
 
-using namespace Eigen;
-using namespace std;
-
-int main(void)
+int main()
 {
     int const N = 5;
-    MatrixXi A(N,N);
+    Eigen::MatrixXi A(N,N);
     A.setRandom();
-    cout << "A =\n" << A << '\n' << endl;
-    cout << "A(2..3,:) =\n" << A.middleRows(2,2) << endl;
+    std::cout << "A =\n" << A << '\n' << std::endl;
+    std::cout << "A(2..3,:) =\n" << A.middleRows(2,2) << std::endl;
     return 0;
 }
diff --git a/libs/eigen/doc/examples/DenseBase_template_int_middleCols.cpp b/libs/eigen/doc/examples/DenseBase_template_int_middleCols.cpp
index 6191d79..caefabf 100644
--- a/libs/eigen/doc/examples/DenseBase_template_int_middleCols.cpp
+++ b/libs/eigen/doc/examples/DenseBase_template_int_middleCols.cpp
@@ -1,15 +1,12 @@
 #include <Eigen/Core>
 #include <iostream>
 
-using namespace Eigen;
-using namespace std;
-
-int main(void)
+int main()
 {
     int const N = 5;
-    MatrixXi A(N,N);
+    Eigen::MatrixXi A(N,N);
     A.setRandom();
-    cout << "A =\n" << A << '\n' << endl;
-    cout << "A(:,1..3) =\n" << A.middleCols<3>(1) << endl;
+    std::cout << "A =\n" << A << '\n' << std::endl;
+    std::cout << "A(:,1..3) =\n" << A.middleCols<3>(1) << std::endl;
     return 0;
 }
diff --git a/libs/eigen/doc/examples/DenseBase_template_int_middleRows.cpp b/libs/eigen/doc/examples/DenseBase_template_int_middleRows.cpp
index 7e8b657..ed5b295 100644
--- a/libs/eigen/doc/examples/DenseBase_template_int_middleRows.cpp
+++ b/libs/eigen/doc/examples/DenseBase_template_int_middleRows.cpp
@@ -1,15 +1,12 @@
 #include <Eigen/Core>
 #include <iostream>
 
-using namespace Eigen;
-using namespace std;
-
-int main(void)
+int main()
 {
     int const N = 5;
-    MatrixXi A(N,N);
+    Eigen::MatrixXi A(N,N);
     A.setRandom();
-    cout << "A =\n" << A << '\n' << endl;
-    cout << "A(1..3,:) =\n" << A.middleRows<3>(1) << endl;
+    std::cout << "A =\n" << A << '\n' << std::endl;
+    std::cout << "A(1..3,:) =\n" << A.middleRows<3>(1) << std::endl;
     return 0;
 }
diff --git a/libs/eigen/doc/examples/QuickStart_example2_dynamic.cpp b/libs/eigen/doc/examples/QuickStart_example2_dynamic.cpp
index ff6746e..bc8d326 100644
--- a/libs/eigen/doc/examples/QuickStart_example2_dynamic.cpp
+++ b/libs/eigen/doc/examples/QuickStart_example2_dynamic.cpp
@@ -1,15 +1,15 @@
 #include <iostream>
 #include <Eigen/Dense>
 
-using namespace Eigen;
-using namespace std;
+using Eigen::MatrixXd;
+using Eigen::VectorXd;
 
 int main()
 {
   MatrixXd m = MatrixXd::Random(3,3);
   m = (m + MatrixXd::Constant(3,3,1.2)) * 50;
-  cout << "m =" << endl << m << endl;
+  std::cout << "m =" << std::endl << m << std::endl;
   VectorXd v(3);
   v << 1, 2, 3;
-  cout << "m * v =" << endl << m * v << endl;
+  std::cout << "m * v =" << std::endl << m * v << std::endl;
 }
diff --git a/libs/eigen/doc/examples/QuickStart_example2_fixed.cpp b/libs/eigen/doc/examples/QuickStart_example2_fixed.cpp
index d911752..af6f9a9 100644
--- a/libs/eigen/doc/examples/QuickStart_example2_fixed.cpp
+++ b/libs/eigen/doc/examples/QuickStart_example2_fixed.cpp
@@ -1,15 +1,15 @@
 #include <iostream>
 #include <Eigen/Dense>
 
-using namespace Eigen;
-using namespace std;
+using Eigen::Matrix3d;
+using Eigen::Vector3d;
 
 int main()
 {
   Matrix3d m = Matrix3d::Random();
   m = (m + Matrix3d::Constant(1.2)) * 50;
-  cout << "m =" << endl << m << endl;
+  std::cout << "m =" << std::endl << m << std::endl;
   Vector3d v(1,2,3);
   
-  cout << "m * v =" << endl << m * v << endl;
+  std::cout << "m * v =" << std::endl << m * v << std::endl;
 }
diff --git a/libs/eigen/doc/examples/TemplateKeyword_flexible.cpp b/libs/eigen/doc/examples/TemplateKeyword_flexible.cpp
index 9d85292..efe458b 100644
--- a/libs/eigen/doc/examples/TemplateKeyword_flexible.cpp
+++ b/libs/eigen/doc/examples/TemplateKeyword_flexible.cpp
@@ -1,19 +1,17 @@
 #include <Eigen/Dense>
 #include <iostream>
 
-using namespace Eigen;
-
 template <typename Derived1, typename Derived2>
-void copyUpperTriangularPart(MatrixBase<Derived1>& dst, const MatrixBase<Derived2>& src)
+void copyUpperTriangularPart(Eigen::MatrixBase<Derived1>& dst, const Eigen::MatrixBase<Derived2>& src)
 {
   /* Note the 'template' keywords in the following line! */
-  dst.template triangularView<Upper>() = src.template triangularView<Upper>();
+  dst.template triangularView<Eigen::Upper>() = src.template triangularView<Eigen::Upper>();
 }
 
 int main()
 {
-  MatrixXi m1 = MatrixXi::Ones(5,5);
-  MatrixXi m2 = MatrixXi::Random(4,4);
+  Eigen::MatrixXi m1 = Eigen::MatrixXi::Ones(5,5);
+  Eigen::MatrixXi m2 = Eigen::MatrixXi::Random(4,4);
   std::cout << "m2 before copy:" << std::endl;
   std::cout << m2 << std::endl << std::endl;
   copyUpperTriangularPart(m2, m1.topLeftCorner(4,4));
diff --git a/libs/eigen/doc/examples/TemplateKeyword_simple.cpp b/libs/eigen/doc/examples/TemplateKeyword_simple.cpp
index 6998c17..6b946ad 100644
--- a/libs/eigen/doc/examples/TemplateKeyword_simple.cpp
+++ b/libs/eigen/doc/examples/TemplateKeyword_simple.cpp
@@ -1,11 +1,11 @@
 #include <Eigen/Dense>
 #include <iostream>
 
-using namespace Eigen;
+using Eigen::MatrixXf;
 
 void copyUpperTriangularPart(MatrixXf& dst, const MatrixXf& src)
 {
-  dst.triangularView<Upper>() = src.triangularView<Upper>();
+  dst.triangularView<Eigen::Upper>() = src.triangularView<Eigen::Upper>();
 }
 
 int main()
diff --git a/libs/eigen/doc/examples/TutorialInplaceLU.cpp b/libs/eigen/doc/examples/TutorialInplaceLU.cpp
index cb9c59b..72bead2 100644
--- a/libs/eigen/doc/examples/TutorialInplaceLU.cpp
+++ b/libs/eigen/doc/examples/TutorialInplaceLU.cpp
@@ -1,61 +1,57 @@
 #include <iostream>
 struct init {
-  init() { std::cout << "[" << "init" << "]" << std::endl; }
+  init() { std::cout << "[init]\n"; }
 };
 init init_obj;
 // [init]
-#include <iostream>
 #include <Eigen/Dense>
 
-using namespace std;
-using namespace Eigen;
-
 int main()
 {
-  MatrixXd A(2,2);
+  Eigen::MatrixXd A(2,2);
   A << 2, -1, 1, 3;
-  cout << "Here is the input matrix A before decomposition:\n" << A << endl;
-cout << "[init]" << endl;
+  std::cout << "Here is the input matrix A before decomposition:\n" << A << "\n";
+  std::cout << "[init]\n";
 
-cout << "[declaration]" << endl;
-  PartialPivLU<Ref<MatrixXd> > lu(A);
-  cout << "Here is the input matrix A after decomposition:\n" << A << endl;
-cout << "[declaration]" << endl;
+  std::cout << "[declaration]\n";
+  Eigen::PartialPivLU<Eigen::Ref<Eigen::MatrixXd> > lu(A);
+  std::cout << "Here is the input matrix A after decomposition:\n" << A << "\n";
+  std::cout << "[declaration]\n";
 
-cout << "[matrixLU]" << endl;
-  cout << "Here is the matrix storing the L and U factors:\n" << lu.matrixLU() << endl;
-cout << "[matrixLU]" << endl;
+  std::cout << "[matrixLU]\n";
+  std::cout << "Here is the matrix storing the L and U factors:\n" << lu.matrixLU() << "\n";
+  std::cout << "[matrixLU]\n";
 
-cout << "[solve]" << endl;
-  MatrixXd A0(2,2); A0 << 2, -1, 1, 3;
-  VectorXd b(2);    b << 1, 2;
-  VectorXd x = lu.solve(b);
-  cout << "Residual: " << (A0 * x - b).norm() << endl;
-cout << "[solve]" << endl;
+  std::cout << "[solve]\n";
+  Eigen::MatrixXd A0(2,2); A0 << 2, -1, 1, 3;
+  Eigen::VectorXd b(2);    b << 1, 2;
+  Eigen::VectorXd x = lu.solve(b);
+  std::cout << "Residual: " << (A0 * x - b).norm() << "\n";
+  std::cout << "[solve]\n";
 
-cout << "[modifyA]" << endl;
+  std::cout << "[modifyA]\n";
   A << 3, 4, -2, 1;
   x = lu.solve(b);
-  cout << "Residual: " << (A0 * x - b).norm() << endl;
-cout << "[modifyA]" << endl;
+  std::cout << "Residual: " << (A0 * x - b).norm() << "\n";
+  std::cout << "[modifyA]\n";
 
-cout << "[recompute]" << endl;
+  std::cout << "[recompute]\n";
   A0 = A; // save A
   lu.compute(A);
   x = lu.solve(b);
-  cout << "Residual: " << (A0 * x - b).norm() << endl;
-cout << "[recompute]" << endl;
+  std::cout << "Residual: " << (A0 * x - b).norm() << "\n";
+  std::cout << "[recompute]\n";
 
-cout << "[recompute_bis0]" << endl;
-  MatrixXd A1(2,2);
+  std::cout << "[recompute_bis0]\n";
+  Eigen::MatrixXd A1(2,2);
   A1 << 5,-2,3,4;
   lu.compute(A1);
-  cout << "Here is the input matrix A1 after decomposition:\n" << A1 << endl;
-cout << "[recompute_bis0]" << endl;
+  std::cout << "Here is the input matrix A1 after decomposition:\n" << A1 << "\n";
+  std::cout << "[recompute_bis0]\n";
 
-cout << "[recompute_bis1]" << endl;
+  std::cout << "[recompute_bis1]\n";
   x = lu.solve(b);
-  cout << "Residual: " << (A1 * x - b).norm() << endl;
-cout << "[recompute_bis1]" << endl;
+  std::cout << "Residual: " << (A1 * x - b).norm() << "\n";
+  std::cout << "[recompute_bis1]\n";
 
 }
diff --git a/libs/eigen/doc/examples/TutorialLinAlgComputeTwice.cpp b/libs/eigen/doc/examples/TutorialLinAlgComputeTwice.cpp
index 06ba646..a561f08 100644
--- a/libs/eigen/doc/examples/TutorialLinAlgComputeTwice.cpp
+++ b/libs/eigen/doc/examples/TutorialLinAlgComputeTwice.cpp
@@ -1,23 +1,20 @@
 #include <iostream>
 #include <Eigen/Dense>
 
-using namespace std;
-using namespace Eigen;
-
 int main()
 {
-   Matrix2f A, b;
-   LLT<Matrix2f> llt;
+   Eigen::Matrix2f A, b;
+   Eigen::LLT<Eigen::Matrix2f> llt;
    A << 2, -1, -1, 3;
    b << 1, 2, 3, 1;
-   cout << "Here is the matrix A:\n" << A << endl;
-   cout << "Here is the right hand side b:\n" << b << endl;
-   cout << "Computing LLT decomposition..." << endl;
+   std::cout << "Here is the matrix A:\n" << A << std::endl;
+   std::cout << "Here is the right hand side b:\n" << b << std::endl;
+   std::cout << "Computing LLT decomposition..." << std::endl;
    llt.compute(A);
-   cout << "The solution is:\n" << llt.solve(b) << endl;
+   std::cout << "The solution is:\n" << llt.solve(b) << std::endl;
    A(1,1)++;
-   cout << "The matrix A is now:\n" << A << endl;
-   cout << "Computing LLT decomposition..." << endl;
+   std::cout << "The matrix A is now:\n" << A << std::endl;
+   std::cout << "Computing LLT decomposition..." << std::endl;
    llt.compute(A);
-   cout << "The solution is now:\n" << llt.solve(b) << endl;
+   std::cout << "The solution is now:\n" << llt.solve(b) << std::endl;
 }
diff --git a/libs/eigen/doc/examples/TutorialLinAlgExComputeSolveError.cpp b/libs/eigen/doc/examples/TutorialLinAlgExComputeSolveError.cpp
index f362fb7..199f3f5 100644
--- a/libs/eigen/doc/examples/TutorialLinAlgExComputeSolveError.cpp
+++ b/libs/eigen/doc/examples/TutorialLinAlgExComputeSolveError.cpp
@@ -1,8 +1,7 @@
 #include <iostream>
 #include <Eigen/Dense>
 
-using namespace std;
-using namespace Eigen;
+using Eigen::MatrixXd;
 
 int main()
 {
@@ -10,5 +9,5 @@ int main()
    MatrixXd b = MatrixXd::Random(100,50);
    MatrixXd x = A.fullPivLu().solve(b);
    double relative_error = (A*x - b).norm() / b.norm(); // norm() is L2 norm
-   cout << "The relative error is:\n" << relative_error << endl;
+   std::cout << "The relative error is:\n" << relative_error << std::endl;
 }
diff --git a/libs/eigen/doc/examples/TutorialLinAlgExSolveColPivHouseholderQR.cpp b/libs/eigen/doc/examples/TutorialLinAlgExSolveColPivHouseholderQR.cpp
index 3a99a94..5ee6b6a 100644
--- a/libs/eigen/doc/examples/TutorialLinAlgExSolveColPivHouseholderQR.cpp
+++ b/libs/eigen/doc/examples/TutorialLinAlgExSolveColPivHouseholderQR.cpp
@@ -1,17 +1,14 @@
 #include <iostream>
 #include <Eigen/Dense>
 
-using namespace std;
-using namespace Eigen;
-
 int main()
 {
-   Matrix3f A;
-   Vector3f b;
+   Eigen::Matrix3f A;
+   Eigen::Vector3f b;
    A << 1,2,3,  4,5,6,  7,8,10;
    b << 3, 3, 4;
-   cout << "Here is the matrix A:\n" << A << endl;
-   cout << "Here is the vector b:\n" << b << endl;
-   Vector3f x = A.colPivHouseholderQr().solve(b);
-   cout << "The solution is:\n" << x << endl;
+   std::cout << "Here is the matrix A:\n" << A << std::endl;
+   std::cout << "Here is the vector b:\n" << b << std::endl;
+   Eigen::Vector3f x = A.colPivHouseholderQr().solve(b);
+   std::cout << "The solution is:\n" << x << std::endl;
 }
diff --git a/libs/eigen/doc/examples/TutorialLinAlgExSolveLDLT.cpp b/libs/eigen/doc/examples/TutorialLinAlgExSolveLDLT.cpp
index f8beacd..82186d4 100644
--- a/libs/eigen/doc/examples/TutorialLinAlgExSolveLDLT.cpp
+++ b/libs/eigen/doc/examples/TutorialLinAlgExSolveLDLT.cpp
@@ -1,16 +1,13 @@
 #include <iostream>
 #include <Eigen/Dense>
 
-using namespace std;
-using namespace Eigen;
-
 int main()
 {
-   Matrix2f A, b;
+   Eigen::Matrix2f A, b;
    A << 2, -1, -1, 3;
    b << 1, 2, 3, 1;
-   cout << "Here is the matrix A:\n" << A << endl;
-   cout << "Here is the right hand side b:\n" << b << endl;
-   Matrix2f x = A.ldlt().solve(b);
-   cout << "The solution is:\n" << x << endl;
+   std::cout << "Here is the matrix A:\n" << A << std::endl;
+   std::cout << "Here is the right hand side b:\n" << b << std::endl;
+   Eigen::Matrix2f x = A.ldlt().solve(b);
+   std::cout << "The solution is:\n" << x << std::endl;
 }
diff --git a/libs/eigen/doc/examples/TutorialLinAlgInverseDeterminant.cpp b/libs/eigen/doc/examples/TutorialLinAlgInverseDeterminant.cpp
index 14dde5b..b31a92a 100644
--- a/libs/eigen/doc/examples/TutorialLinAlgInverseDeterminant.cpp
+++ b/libs/eigen/doc/examples/TutorialLinAlgInverseDeterminant.cpp
@@ -1,16 +1,13 @@
 #include <iostream>
 #include <Eigen/Dense>
 
-using namespace std;
-using namespace Eigen;
-
 int main()
 {
-   Matrix3f A;
+   Eigen::Matrix3f A;
    A << 1, 2, 1,
         2, 1, 0,
         -1, 1, 2;
-   cout << "Here is the matrix A:\n" << A << endl;
-   cout << "The determinant of A is " << A.determinant() << endl;
-   cout << "The inverse of A is:\n" << A.inverse() << endl;
+   std::cout << "Here is the matrix A:\n" << A << std::endl;
+   std::cout << "The determinant of A is " << A.determinant() << std::endl;
+   std::cout << "The inverse of A is:\n" << A.inverse() << std::endl;
 }
diff --git a/libs/eigen/doc/examples/TutorialLinAlgRankRevealing.cpp b/libs/eigen/doc/examples/TutorialLinAlgRankRevealing.cpp
index c516507..fea52ab 100644
--- a/libs/eigen/doc/examples/TutorialLinAlgRankRevealing.cpp
+++ b/libs/eigen/doc/examples/TutorialLinAlgRankRevealing.cpp
@@ -1,20 +1,17 @@
 #include <iostream>
 #include <Eigen/Dense>
 
-using namespace std;
-using namespace Eigen;
-
 int main()
 {
-   Matrix3f A;
+   Eigen::Matrix3f A;
    A << 1, 2, 5,
         2, 1, 4,
         3, 0, 3;
-   cout << "Here is the matrix A:\n" << A << endl;
-   FullPivLU<Matrix3f> lu_decomp(A);
-   cout << "The rank of A is " << lu_decomp.rank() << endl;
-   cout << "Here is a matrix whose columns form a basis of the null-space of A:\n"
-        << lu_decomp.kernel() << endl;
-   cout << "Here is a matrix whose columns form a basis of the column-space of A:\n"
-        << lu_decomp.image(A) << endl; // yes, have to pass the original A
+   std::cout << "Here is the matrix A:\n" << A << std::endl;
+   Eigen::FullPivLU<Eigen::Matrix3f> lu_decomp(A);
+   std::cout << "The rank of A is " << lu_decomp.rank() << std::endl;
+   std::cout << "Here is a matrix whose columns form a basis of the null-space of A:\n"
+        << lu_decomp.kernel() << std::endl;
+   std::cout << "Here is a matrix whose columns form a basis of the column-space of A:\n"
+        << lu_decomp.image(A) << std::endl; // yes, have to pass the original A
 }
diff --git a/libs/eigen/doc/examples/TutorialLinAlgSVDSolve.cpp b/libs/eigen/doc/examples/TutorialLinAlgSVDSolve.cpp
index f109f04..04cbe1b 100644
--- a/libs/eigen/doc/examples/TutorialLinAlgSVDSolve.cpp
+++ b/libs/eigen/doc/examples/TutorialLinAlgSVDSolve.cpp
@@ -1,15 +1,12 @@
 #include <iostream>
 #include <Eigen/Dense>
 
-using namespace std;
-using namespace Eigen;
-
 int main()
 {
-   MatrixXf A = MatrixXf::Random(3, 2);
-   cout << "Here is the matrix A:\n" << A << endl;
-   VectorXf b = VectorXf::Random(3);
-   cout << "Here is the right hand side b:\n" << b << endl;
-   cout << "The least-squares solution is:\n"
-        << A.bdcSvd(ComputeThinU | ComputeThinV).solve(b) << endl;
+   Eigen::MatrixXf A = Eigen::MatrixXf::Random(3, 2);
+   std::cout << "Here is the matrix A:\n" << A << std::endl;
+   Eigen::VectorXf b = Eigen::VectorXf::Random(3);
+   std::cout << "Here is the right hand side b:\n" << b << std::endl;
+   std::cout << "The least-squares solution is:\n"
+        << A.template bdcSvd<Eigen::ComputeThinU | Eigen::ComputeThinV>().solve(b) << std::endl;
 }
diff --git a/libs/eigen/doc/examples/TutorialLinAlgSelfAdjointEigenSolver.cpp b/libs/eigen/doc/examples/TutorialLinAlgSelfAdjointEigenSolver.cpp
index 8d1d1ed..fcf2f33 100644
--- a/libs/eigen/doc/examples/TutorialLinAlgSelfAdjointEigenSolver.cpp
+++ b/libs/eigen/doc/examples/TutorialLinAlgSelfAdjointEigenSolver.cpp
@@ -1,18 +1,15 @@
 #include <iostream>
 #include <Eigen/Dense>
 
-using namespace std;
-using namespace Eigen;
-
 int main()
 {
-   Matrix2f A;
+   Eigen::Matrix2f A;
    A << 1, 2, 2, 3;
-   cout << "Here is the matrix A:\n" << A << endl;
-   SelfAdjointEigenSolver<Matrix2f> eigensolver(A);
-   if (eigensolver.info() != Success) abort();
-   cout << "The eigenvalues of A are:\n" << eigensolver.eigenvalues() << endl;
-   cout << "Here's a matrix whose columns are eigenvectors of A \n"
+   std::cout << "Here is the matrix A:\n" << A << std::endl;
+   Eigen::SelfAdjointEigenSolver<Eigen::Matrix2f> eigensolver(A);
+   if (eigensolver.info() != Eigen::Success) abort();
+   std::cout << "The eigenvalues of A are:\n" << eigensolver.eigenvalues() << std::endl;
+   std::cout << "Here's a matrix whose columns are eigenvectors of A \n"
         << "corresponding to these eigenvalues:\n"
-        << eigensolver.eigenvectors() << endl;
+        << eigensolver.eigenvectors() << std::endl;
 }
diff --git a/libs/eigen/doc/examples/TutorialLinAlgSetThreshold.cpp b/libs/eigen/doc/examples/TutorialLinAlgSetThreshold.cpp
index 3956b13..e1335e7 100644
--- a/libs/eigen/doc/examples/TutorialLinAlgSetThreshold.cpp
+++ b/libs/eigen/doc/examples/TutorialLinAlgSetThreshold.cpp
@@ -1,16 +1,13 @@
 #include <iostream>
 #include <Eigen/Dense>
 
-using namespace std;
-using namespace Eigen;
-
 int main()
 {
-   Matrix2d A;
+   Eigen::Matrix2d A;
    A << 2, 1,
         2, 0.9999999999;
-   FullPivLU<Matrix2d> lu(A);
-   cout << "By default, the rank of A is found to be " << lu.rank() << endl;
+   Eigen::FullPivLU<Eigen::Matrix2d> lu(A);
+   std::cout << "By default, the rank of A is found to be " << lu.rank() << std::endl;
    lu.setThreshold(1e-5);
-   cout << "With threshold 1e-5, the rank of A is found to be " << lu.rank() << endl;
+   std::cout << "With threshold 1e-5, the rank of A is found to be " << lu.rank() << std::endl;
 }
diff --git a/libs/eigen/doc/examples/Tutorial_ArrayClass_accessors.cpp b/libs/eigen/doc/examples/Tutorial_ArrayClass_accessors.cpp
index dc720ff..0db52a3 100644
--- a/libs/eigen/doc/examples/Tutorial_ArrayClass_accessors.cpp
+++ b/libs/eigen/doc/examples/Tutorial_ArrayClass_accessors.cpp
@@ -1,24 +1,21 @@
 #include <Eigen/Dense>
 #include <iostream>
 
-using namespace Eigen;
-using namespace std;
-
 int main()
 {
-  ArrayXXf  m(2,2);
+  Eigen::ArrayXXf  m(2,2);
   
   // assign some values coefficient by coefficient
   m(0,0) = 1.0; m(0,1) = 2.0;
   m(1,0) = 3.0; m(1,1) = m(0,1) + m(1,0);
   
   // print values to standard output
-  cout << m << endl << endl;
+  std::cout << m << std::endl << std::endl;
  
   // using the comma-initializer is also allowed
   m << 1.0,2.0,
        3.0,4.0;
      
   // print values to standard output
-  cout << m << endl;
+  std::cout << m << std::endl;
 }
diff --git a/libs/eigen/doc/examples/Tutorial_ArrayClass_addition.cpp b/libs/eigen/doc/examples/Tutorial_ArrayClass_addition.cpp
index 480ffb0..4a407a7 100644
--- a/libs/eigen/doc/examples/Tutorial_ArrayClass_addition.cpp
+++ b/libs/eigen/doc/examples/Tutorial_ArrayClass_addition.cpp
@@ -1,13 +1,10 @@
 #include <Eigen/Dense>
 #include <iostream>
 
-using namespace Eigen;
-using namespace std;
-
 int main()
 {
-  ArrayXXf a(3,3);
-  ArrayXXf b(3,3);
+  Eigen::ArrayXXf a(3,3);
+  Eigen::ArrayXXf b(3,3);
   a << 1,2,3,
        4,5,6,
        7,8,9;
@@ -16,8 +13,8 @@ int main()
        1,2,3;
        
   // Adding two arrays
-  cout << "a + b = " << endl << a + b << endl << endl;
+  std::cout << "a + b = " << std::endl << a + b << std::endl << std::endl;
 
   // Subtracting a scalar from an array
-  cout << "a - 2 = " << endl << a - 2 << endl;
+  std::cout << "a - 2 = " << std::endl << a - 2 << std::endl;
 }
diff --git a/libs/eigen/doc/examples/Tutorial_ArrayClass_cwise_other.cpp b/libs/eigen/doc/examples/Tutorial_ArrayClass_cwise_other.cpp
index d9046c6..12483f3 100644
--- a/libs/eigen/doc/examples/Tutorial_ArrayClass_cwise_other.cpp
+++ b/libs/eigen/doc/examples/Tutorial_ArrayClass_cwise_other.cpp
@@ -1,19 +1,16 @@
 #include <Eigen/Dense>
 #include <iostream>
 
-using namespace Eigen;
-using namespace std;
-
 int main()
 {
-  ArrayXf a = ArrayXf::Random(5);
+  Eigen::ArrayXf a = Eigen::ArrayXf::Random(5);
   a *= 2;
-  cout << "a =" << endl 
-       << a << endl;
-  cout << "a.abs() =" << endl 
-       << a.abs() << endl;
-  cout << "a.abs().sqrt() =" << endl 
-       << a.abs().sqrt() << endl;
-  cout << "a.min(a.abs().sqrt()) =" << endl 
-       << a.min(a.abs().sqrt()) << endl;
+  std::cout << "a =" << std::endl
+            << a << std::endl;
+  std::cout << "a.abs() =" << std::endl
+            << a.abs() << std::endl;
+  std::cout << "a.abs().sqrt() =" << std::endl
+            << a.abs().sqrt() << std::endl;
+  std::cout << "a.min(a.abs().sqrt()) =" << std::endl
+            << a.min(a.abs().sqrt()) << std::endl;
 }
diff --git a/libs/eigen/doc/examples/Tutorial_ArrayClass_interop.cpp b/libs/eigen/doc/examples/Tutorial_ArrayClass_interop.cpp
index 371f070..c9a8352 100644
--- a/libs/eigen/doc/examples/Tutorial_ArrayClass_interop.cpp
+++ b/libs/eigen/doc/examples/Tutorial_ArrayClass_interop.cpp
@@ -1,8 +1,7 @@
 #include <Eigen/Dense>
 #include <iostream>
 
-using namespace Eigen;
-using namespace std;
+using Eigen::MatrixXf;
 
 int main()
 {
@@ -16,7 +15,7 @@ int main()
        7,8;
   
   result = (m.array() + 4).matrix() * m;
-  cout << "-- Combination 1: --" << endl << result << endl << endl;
+  std::cout << "-- Combination 1: --\n" << result << "\n\n";
   result = (m.array() * n.array()).matrix() * m;
-  cout << "-- Combination 2: --" << endl << result << endl << endl;
+  std::cout << "-- Combination 2: --\n" << result << "\n\n";
 }
diff --git a/libs/eigen/doc/examples/Tutorial_ArrayClass_interop_matrix.cpp b/libs/eigen/doc/examples/Tutorial_ArrayClass_interop_matrix.cpp
index 1014275..07ec9b0 100644
--- a/libs/eigen/doc/examples/Tutorial_ArrayClass_interop_matrix.cpp
+++ b/libs/eigen/doc/examples/Tutorial_ArrayClass_interop_matrix.cpp
@@ -1,8 +1,7 @@
 #include <Eigen/Dense>
 #include <iostream>
 
-using namespace Eigen;
-using namespace std;
+using Eigen::MatrixXf;
 
 int main()
 {
@@ -16,11 +15,11 @@ int main()
        7,8;
 
   result = m * n;
-  cout << "-- Matrix m*n: --" << endl << result << endl << endl;
+  std::cout << "-- Matrix m*n: --\n" << result << "\n\n";
   result = m.array() * n.array();
-  cout << "-- Array m*n: --" << endl << result << endl << endl;
+  std::cout << "-- Array m*n: --\n" << result << "\n\n";
   result = m.cwiseProduct(n);
-  cout << "-- With cwiseProduct: --" << endl << result << endl << endl;
+  std::cout << "-- With cwiseProduct: --\n" << result << "\n\n";
   result = m.array() + 4;
-  cout << "-- Array m + 4: --" << endl << result << endl << endl;
+  std::cout << "-- Array m + 4: --\n" << result << "\n\n";
 }
diff --git a/libs/eigen/doc/examples/Tutorial_ArrayClass_mult.cpp b/libs/eigen/doc/examples/Tutorial_ArrayClass_mult.cpp
index 6cb439f..bada36c 100644
--- a/libs/eigen/doc/examples/Tutorial_ArrayClass_mult.cpp
+++ b/libs/eigen/doc/examples/Tutorial_ArrayClass_mult.cpp
@@ -1,16 +1,13 @@
 #include <Eigen/Dense>
 #include <iostream>
 
-using namespace Eigen;
-using namespace std;
-
 int main()
 {
-  ArrayXXf a(2,2);
-  ArrayXXf b(2,2);
+  Eigen::ArrayXXf a(2,2);
+  Eigen::ArrayXXf b(2,2);
   a << 1,2,
        3,4;
   b << 5,6,
        7,8;
-  cout << "a * b = " << endl << a * b << endl;
+  std::cout << "a * b = " << std::endl << a * b << std::endl;
 }
diff --git a/libs/eigen/doc/examples/Tutorial_BlockOperations_block_assignment.cpp b/libs/eigen/doc/examples/Tutorial_BlockOperations_block_assignment.cpp
index 0b87313..26ad478 100644
--- a/libs/eigen/doc/examples/Tutorial_BlockOperations_block_assignment.cpp
+++ b/libs/eigen/doc/examples/Tutorial_BlockOperations_block_assignment.cpp
@@ -1,18 +1,15 @@
 #include <Eigen/Dense>
 #include <iostream>
 
-using namespace std;
-using namespace Eigen;
-
 int main()
 {
-  Array22f m;
+  Eigen::Array22f m;
   m << 1,2,
        3,4;
-  Array44f a = Array44f::Constant(0.6);
-  cout << "Here is the array a:" << endl << a << endl << endl;
+  Eigen::Array44f a = Eigen::Array44f::Constant(0.6);
+  std::cout << "Here is the array a:\n" << a << "\n\n";
   a.block<2,2>(1,1) = m;
-  cout << "Here is now a with m copied into its central 2x2 block:" << endl << a << endl << endl;
+  std::cout << "Here is now a with m copied into its central 2x2 block:\n" << a << "\n\n";
   a.block(0,0,2,3) = a.block(2,1,2,3);
-  cout << "Here is now a with bottom-right 2x3 block copied into top-left 2x3 block:" << endl << a << endl << endl;
+  std::cout << "Here is now a with bottom-right 2x3 block copied into top-left 2x3 block:\n" << a << "\n\n";
 }
diff --git a/libs/eigen/doc/examples/Tutorial_PartialLU_solve.cpp b/libs/eigen/doc/examples/Tutorial_PartialLU_solve.cpp
index a560879..ca72c99 100644
--- a/libs/eigen/doc/examples/Tutorial_PartialLU_solve.cpp
+++ b/libs/eigen/doc/examples/Tutorial_PartialLU_solve.cpp
@@ -2,17 +2,14 @@
 #include <Eigen/LU>
 #include <iostream>
 
-using namespace std;
-using namespace Eigen;
-
 int main()
 {
-   Matrix3f A;
-   Vector3f b;
+   Eigen::Matrix3f A;
+   Eigen::Vector3f b;
    A << 1,2,3,  4,5,6,  7,8,10;
    b << 3, 3, 4;
-   cout << "Here is the matrix A:" << endl << A << endl;
-   cout << "Here is the vector b:" << endl << b << endl;
-   Vector3f x = A.lu().solve(b);
-   cout << "The solution is:" << endl << x << endl;
+   std::cout << "Here is the matrix A:" << std::endl << A << std::endl;
+   std::cout << "Here is the vector b:" << std::endl << b << std::endl;
+   Eigen::Vector3f x = A.lu().solve(b);
+   std::cout << "The solution is:" << std::endl << x << std::endl;
 }
diff --git a/libs/eigen/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_broadcast_1nn.cpp b/libs/eigen/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_broadcast_1nn.cpp
index 334b4d8..8ef06be 100644
--- a/libs/eigen/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_broadcast_1nn.cpp
+++ b/libs/eigen/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_broadcast_1nn.cpp
@@ -1,9 +1,6 @@
 #include <iostream>
 #include <Eigen/Dense>
 
-using namespace std;
-using namespace Eigen;
-
 int main()
 {
   Eigen::MatrixXf m(2,4);
@@ -15,10 +12,10 @@ int main()
   v << 2,
        3;
 
-  MatrixXf::Index index;
+  Eigen::Index index;
   // find nearest neighbour
   (m.colwise() - v).colwise().squaredNorm().minCoeff(&index);
 
-  cout << "Nearest neighbour is column " << index << ":" << endl;
-  cout << m.col(index) << endl;
+  std::cout << "Nearest neighbour is column " << index << ":" << std::endl;
+  std::cout << m.col(index) << std::endl;
 }
diff --git a/libs/eigen/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_maxnorm.cpp b/libs/eigen/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_maxnorm.cpp
index 049c747..b5d88c3 100644
--- a/libs/eigen/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_maxnorm.cpp
+++ b/libs/eigen/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_maxnorm.cpp
@@ -1,15 +1,13 @@
 #include <iostream>
 #include <Eigen/Dense>
 
-using namespace std;
-using namespace Eigen;
 int main()
 {
-  MatrixXf mat(2,4);
+  Eigen::MatrixXf mat(2,4);
   mat << 1, 2, 6, 9,
          3, 1, 7, 2;
   
-  MatrixXf::Index   maxIndex;
+  Eigen::Index   maxIndex;
   float maxNorm = mat.colwise().sum().maxCoeff(&maxIndex);
   
   std::cout << "Maximum sum at position " << maxIndex << std::endl;
diff --git a/libs/eigen/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_reductions_bool.cpp b/libs/eigen/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_reductions_bool.cpp
index 0cca37f..7b89bcf 100644
--- a/libs/eigen/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_reductions_bool.cpp
+++ b/libs/eigen/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_reductions_bool.cpp
@@ -1,21 +1,18 @@
 #include <Eigen/Dense>
 #include <iostream>
 
-using namespace std;
-using namespace Eigen;
-
 int main()
 {
-  ArrayXXf a(2,2);
+  Eigen::ArrayXXf a(2,2);
   
   a << 1,2,
        3,4;
 
-  cout << "(a > 0).all()   = " << (a > 0).all() << endl;
-  cout << "(a > 0).any()   = " << (a > 0).any() << endl;
-  cout << "(a > 0).count() = " << (a > 0).count() << endl;
-  cout << endl;
-  cout << "(a > 2).all()   = " << (a > 2).all() << endl;
-  cout << "(a > 2).any()   = " << (a > 2).any() << endl;
-  cout << "(a > 2).count() = " << (a > 2).count() << endl;
+  std::cout << "(a > 0).all()   = " << (a > 0).all() << std::endl;
+  std::cout << "(a > 0).any()   = " << (a > 0).any() << std::endl;
+  std::cout << "(a > 0).count() = " << (a > 0).count() << std::endl;
+  std::cout << std::endl;
+  std::cout << "(a > 2).all()   = " << (a > 2).all() << std::endl;
+  std::cout << "(a > 2).any()   = " << (a > 2).any() << std::endl;
+  std::cout << "(a > 2).count() = " << (a > 2).count() << std::endl;
 }
diff --git a/libs/eigen/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_reductions_norm.cpp b/libs/eigen/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_reductions_norm.cpp
index 740439f..7519137 100644
--- a/libs/eigen/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_reductions_norm.cpp
+++ b/libs/eigen/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_reductions_norm.cpp
@@ -1,13 +1,10 @@
 #include <Eigen/Dense>
 #include <iostream>
 
-using namespace std;
-using namespace Eigen;
-
 int main()
 {
-  VectorXf v(2);
-  MatrixXf m(2,2), n(2,2);
+  Eigen::VectorXf v(2);
+  Eigen::MatrixXf m(2,2), n(2,2);
   
   v << -1,
        2;
@@ -15,14 +12,14 @@ int main()
   m << 1,-2,
        -3,4;
 
-  cout << "v.squaredNorm() = " << v.squaredNorm() << endl;
-  cout << "v.norm() = " << v.norm() << endl;
-  cout << "v.lpNorm<1>() = " << v.lpNorm<1>() << endl;
-  cout << "v.lpNorm<Infinity>() = " << v.lpNorm<Infinity>() << endl;
+  std::cout << "v.squaredNorm() = " << v.squaredNorm() << std::endl;
+  std::cout << "v.norm() = " << v.norm() << std::endl;
+  std::cout << "v.lpNorm<1>() = " << v.lpNorm<1>() << std::endl;
+  std::cout << "v.lpNorm<Infinity>() = " << v.lpNorm<Eigen::Infinity>() << std::endl;
 
-  cout << endl;
-  cout << "m.squaredNorm() = " << m.squaredNorm() << endl;
-  cout << "m.norm() = " << m.norm() << endl;
-  cout << "m.lpNorm<1>() = " << m.lpNorm<1>() << endl;
-  cout << "m.lpNorm<Infinity>() = " << m.lpNorm<Infinity>() << endl;
+  std::cout << std::endl;
+  std::cout << "m.squaredNorm() = " << m.squaredNorm() << std::endl;
+  std::cout << "m.norm() = " << m.norm() << std::endl;
+  std::cout << "m.lpNorm<1>() = " << m.lpNorm<1>() << std::endl;
+  std::cout << "m.lpNorm<Infinity>() = " << m.lpNorm<Eigen::Infinity>() << std::endl;
 }
diff --git a/libs/eigen/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_reductions_operatornorm.cpp b/libs/eigen/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_reductions_operatornorm.cpp
index 62e28fc..8faa5a1 100644
--- a/libs/eigen/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_reductions_operatornorm.cpp
+++ b/libs/eigen/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_reductions_operatornorm.cpp
@@ -1,18 +1,15 @@
 #include <Eigen/Dense>
 #include <iostream>
 
-using namespace Eigen;
-using namespace std;
-
 int main()
 {
-  MatrixXf m(2,2);
+  Eigen::MatrixXf m(2,2);
   m << 1,-2,
        -3,4;
 
-  cout << "1-norm(m)     = " << m.cwiseAbs().colwise().sum().maxCoeff()
-       << " == "             << m.colwise().lpNorm<1>().maxCoeff() << endl;
+  std::cout << "1-norm(m)     = " << m.cwiseAbs().colwise().sum().maxCoeff()
+            << " == "             << m.colwise().lpNorm<1>().maxCoeff() << std::endl;
 
-  cout << "infty-norm(m) = " << m.cwiseAbs().rowwise().sum().maxCoeff()
-       << " == "             << m.rowwise().lpNorm<1>().maxCoeff() << endl;
+  std::cout << "infty-norm(m) = " << m.cwiseAbs().rowwise().sum().maxCoeff()
+            << " == "             << m.rowwise().lpNorm<1>().maxCoeff() << std::endl;
 }
diff --git a/libs/eigen/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_visitors.cpp b/libs/eigen/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_visitors.cpp
index b54e9aa..bd294bd 100644
--- a/libs/eigen/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_visitors.cpp
+++ b/libs/eigen/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_visitors.cpp
@@ -1,9 +1,6 @@
 #include <iostream>
 #include <Eigen/Dense>
 
-using namespace std;
-using namespace Eigen;
-
 int main()
 {
   Eigen::MatrixXf m(2,2);
@@ -12,15 +9,15 @@ int main()
        3, 4;
 
   //get location of maximum
-  MatrixXf::Index maxRow, maxCol;
+  Eigen::Index maxRow, maxCol;
   float max = m.maxCoeff(&maxRow, &maxCol);
 
   //get location of minimum
-  MatrixXf::Index minRow, minCol;
+  Eigen::Index minRow, minCol;
   float min = m.minCoeff(&minRow, &minCol);
 
-  cout << "Max: " << max <<  ", at: " <<
-     maxRow << "," << maxCol << endl;
-  cout << "Min: " << min << ", at: " <<
-     minRow << "," << minCol << endl;
+  std::cout << "Max: " << max <<  ", at: " <<
+     maxRow << "," << maxCol << std::endl;
+  std:: cout << "Min: " << min << ", at: " <<
+     minRow << "," << minCol << std::endl;
 }
diff --git a/libs/eigen/doc/examples/Tutorial_simple_example_dynamic_size.cpp b/libs/eigen/doc/examples/Tutorial_simple_example_dynamic_size.cpp
index defcb1e..796bd87 100644
--- a/libs/eigen/doc/examples/Tutorial_simple_example_dynamic_size.cpp
+++ b/libs/eigen/doc/examples/Tutorial_simple_example_dynamic_size.cpp
@@ -1,13 +1,11 @@
 #include <Eigen/Core>
 #include <iostream>
 
-using namespace Eigen;
-
 int main()
 {
   for (int size=1; size<=4; ++size)
   {
-    MatrixXi m(size,size+1);         // a (size)x(size+1)-matrix of int's
+    Eigen::MatrixXi m(size,size+1);         // a (size)x(size+1)-matrix of int's
     for (int j=0; j<m.cols(); ++j)   // loop over columns
       for (int i=0; i<m.rows(); ++i) // loop over rows
         m(i,j) = i+j*size;           // to access matrix coefficients,
@@ -15,7 +13,7 @@ int main()
     std::cout << m << "\n\n";
   }
 
-  VectorXf v(4); // a vector of 4 float's
+  Eigen::VectorXf v(4); // a vector of 4 float's
   // to access vector coefficients, use either operator () or operator []
   v[0] = 1; v[1] = 2; v(2) = 3; v(3) = 4;
   std::cout << "\nv:\n" << v << std::endl;
diff --git a/libs/eigen/doc/examples/Tutorial_simple_example_fixed_size.cpp b/libs/eigen/doc/examples/Tutorial_simple_example_fixed_size.cpp
index bc4f95d..99a974d 100644
--- a/libs/eigen/doc/examples/Tutorial_simple_example_fixed_size.cpp
+++ b/libs/eigen/doc/examples/Tutorial_simple_example_fixed_size.cpp
@@ -1,14 +1,12 @@
 #include <Eigen/Core>
 #include <iostream>
 
-using namespace Eigen;
-
 int main()
 {
-  Matrix3f m3;
+  Eigen::Matrix3f m3;
   m3 << 1, 2, 3, 4, 5, 6, 7, 8, 9;
-  Matrix4f m4 = Matrix4f::Identity();
-  Vector4i v4(1, 2, 3, 4);
+  Eigen::Matrix4f m4 = Eigen::Matrix4f::Identity();
+  Eigen::Vector4i v4(1, 2, 3, 4);
 
   std::cout << "m3\n" << m3 << "\nm4:\n"
     << m4 << "\nv4:\n" << v4 << std::endl;
diff --git a/libs/eigen/doc/examples/class_Block.cpp b/libs/eigen/doc/examples/class_Block.cpp
index ace719a..9ace0da 100644
--- a/libs/eigen/doc/examples/class_Block.cpp
+++ b/libs/eigen/doc/examples/class_Block.cpp
@@ -1,27 +1,25 @@
 #include <Eigen/Core>
 #include <iostream>
-using namespace Eigen;
-using namespace std;
 
 template<typename Derived>
 Eigen::Block<Derived>
-topLeftCorner(MatrixBase<Derived>& m, int rows, int cols)
+topLeftCorner(Eigen::MatrixBase<Derived>& m, int rows, int cols)
 {
   return Eigen::Block<Derived>(m.derived(), 0, 0, rows, cols);
 }
 
 template<typename Derived>
 const Eigen::Block<const Derived>
-topLeftCorner(const MatrixBase<Derived>& m, int rows, int cols)
+topLeftCorner(const Eigen::MatrixBase<Derived>& m, int rows, int cols)
 {
   return Eigen::Block<const Derived>(m.derived(), 0, 0, rows, cols);
 }
 
 int main(int, char**)
 {
-  Matrix4d m = Matrix4d::Identity();
-  cout << topLeftCorner(4*m, 2, 3) << endl; // calls the const version
+  Eigen::Matrix4d m = Eigen::Matrix4d::Identity();
+  std::cout << topLeftCorner(4*m, 2, 3) << std::endl; // calls the const version
   topLeftCorner(m, 2, 3) *= 5;              // calls the non-const version
-  cout << "Now the matrix m is:" << endl << m << endl;
+  std::cout << "Now the matrix m is:" << std::endl << m << std::endl;
   return 0;
 }
diff --git a/libs/eigen/doc/examples/class_CwiseBinaryOp.cpp b/libs/eigen/doc/examples/class_CwiseBinaryOp.cpp
index 682af46..973befd 100644
--- a/libs/eigen/doc/examples/class_CwiseBinaryOp.cpp
+++ b/libs/eigen/doc/examples/class_CwiseBinaryOp.cpp
@@ -1,18 +1,17 @@
 #include <Eigen/Core>
 #include <iostream>
-using namespace Eigen;
-using namespace std;
+
+using Eigen::Matrix4d;
 
 // define a custom template binary functor
 template<typename Scalar> struct MakeComplexOp {
-  EIGEN_EMPTY_STRUCT_CTOR(MakeComplexOp)
-  typedef complex<Scalar> result_type;
-  complex<Scalar> operator()(const Scalar& a, const Scalar& b) const { return complex<Scalar>(a,b); }
+  typedef std::complex<Scalar> result_type;
+  result_type operator()(const Scalar& a, const Scalar& b) const { return result_type(a,b); }
 };
 
 int main(int, char**)
 {
   Matrix4d m1 = Matrix4d::Random(), m2 = Matrix4d::Random();
-  cout << m1.binaryExpr(m2, MakeComplexOp<double>()) << endl;
+  std::cout << m1.binaryExpr(m2, MakeComplexOp<double>()) << std::endl;
   return 0;
 }
diff --git a/libs/eigen/doc/examples/class_CwiseUnaryOp.cpp b/libs/eigen/doc/examples/class_CwiseUnaryOp.cpp
index a5fcc15..6c65f2e 100644
--- a/libs/eigen/doc/examples/class_CwiseUnaryOp.cpp
+++ b/libs/eigen/doc/examples/class_CwiseUnaryOp.cpp
@@ -1,7 +1,5 @@
 #include <Eigen/Core>
 #include <iostream>
-using namespace Eigen;
-using namespace std;
 
 // define a custom template unary functor
 template<typename Scalar>
@@ -13,7 +11,7 @@ struct CwiseClampOp {
 
 int main(int, char**)
 {
-  Matrix4d m1 = Matrix4d::Random();
-  cout << m1 << endl << "becomes: " << endl << m1.unaryExpr(CwiseClampOp<double>(-0.5,0.5)) << endl;
+  Eigen::Matrix4d m1 = Eigen::Matrix4d::Random();
+  std::cout << m1 << std::endl << "becomes: " << std::endl << m1.unaryExpr(CwiseClampOp<double>(-0.5,0.5)) << std::endl;
   return 0;
 }
diff --git a/libs/eigen/doc/examples/class_CwiseUnaryOp_ptrfun.cpp b/libs/eigen/doc/examples/class_CwiseUnaryOp_ptrfun.cpp
index 36706d8..e97095e 100644
--- a/libs/eigen/doc/examples/class_CwiseUnaryOp_ptrfun.cpp
+++ b/libs/eigen/doc/examples/class_CwiseUnaryOp_ptrfun.cpp
@@ -1,7 +1,5 @@
 #include <Eigen/Core>
 #include <iostream>
-using namespace Eigen;
-using namespace std;
 
 // define function to be applied coefficient-wise
 double ramp(double x)
@@ -14,7 +12,7 @@ double ramp(double x)
 
 int main(int, char**)
 {
-  Matrix4d m1 = Matrix4d::Random();
-  cout << m1 << endl << "becomes: " << endl << m1.unaryExpr(ptr_fun(ramp)) << endl;
+  Eigen::Matrix4d m1 = Eigen::Matrix4d::Random();
+  std::cout << m1 << std::endl << "becomes: " << std::endl << m1.unaryExpr(std::ptr_fun(ramp)) << std::endl;
   return 0;
 }
diff --git a/libs/eigen/doc/examples/class_FixedBlock.cpp b/libs/eigen/doc/examples/class_FixedBlock.cpp
index 9978b32..4bb2d44 100644
--- a/libs/eigen/doc/examples/class_FixedBlock.cpp
+++ b/libs/eigen/doc/examples/class_FixedBlock.cpp
@@ -1,27 +1,25 @@
 #include <Eigen/Core>
 #include <iostream>
-using namespace Eigen;
-using namespace std;
 
 template<typename Derived>
 Eigen::Block<Derived, 2, 2>
-topLeft2x2Corner(MatrixBase<Derived>& m)
+topLeft2x2Corner(Eigen::MatrixBase<Derived>& m)
 {
   return Eigen::Block<Derived, 2, 2>(m.derived(), 0, 0);
 }
 
 template<typename Derived>
 const Eigen::Block<const Derived, 2, 2>
-topLeft2x2Corner(const MatrixBase<Derived>& m)
+topLeft2x2Corner(const Eigen::MatrixBase<Derived>& m)
 {
   return Eigen::Block<const Derived, 2, 2>(m.derived(), 0, 0);
 }
 
 int main(int, char**)
 {
-  Matrix3d m = Matrix3d::Identity();
-  cout << topLeft2x2Corner(4*m) << endl; // calls the const version
+  Eigen::Matrix3d m = Eigen::Matrix3d::Identity();
+  std::cout << topLeft2x2Corner(4*m) << std::endl; // calls the const version
   topLeft2x2Corner(m) *= 2;              // calls the non-const version
-  cout << "Now the matrix m is:" << endl << m << endl;
+  std::cout << "Now the matrix m is:" << std::endl << m << std::endl;
   return 0;
 }
diff --git a/libs/eigen/doc/examples/class_FixedReshaped.cpp b/libs/eigen/doc/examples/class_FixedReshaped.cpp
index b6d4085..be7069d 100644
--- a/libs/eigen/doc/examples/class_FixedReshaped.cpp
+++ b/libs/eigen/doc/examples/class_FixedReshaped.cpp
@@ -1,22 +1,20 @@
 #include <Eigen/Core>
 #include <iostream>
-using namespace Eigen;
-using namespace std;
 
 template<typename Derived>
 Eigen::Reshaped<Derived, 4, 2>
-reshape_helper(MatrixBase<Derived>& m)
+reshape_helper(Eigen::MatrixBase<Derived>& m)
 {
   return Eigen::Reshaped<Derived, 4, 2>(m.derived());
 }
 
 int main(int, char**)
 {
-  MatrixXd m(2, 4);
+  Eigen::MatrixXd m(2, 4);
   m << 1, 2, 3, 4,
        5, 6, 7, 8;
-  MatrixXd n = reshape_helper(m);
-  cout << "matrix m is:" << endl << m << endl;
-  cout << "matrix n is:" << endl << n << endl;
+  Eigen::MatrixXd n = reshape_helper(m);
+  std::cout << "matrix m is:" << std::endl << m << std::endl;
+  std::cout << "matrix n is:" << std::endl << n << std::endl;
   return 0;
 }
diff --git a/libs/eigen/doc/examples/class_FixedVectorBlock.cpp b/libs/eigen/doc/examples/class_FixedVectorBlock.cpp
index c88c9fb..eed3007 100644
--- a/libs/eigen/doc/examples/class_FixedVectorBlock.cpp
+++ b/libs/eigen/doc/examples/class_FixedVectorBlock.cpp
@@ -1,27 +1,25 @@
 #include <Eigen/Core>
 #include <iostream>
-using namespace Eigen;
-using namespace std;
 
 template<typename Derived>
 Eigen::VectorBlock<Derived, 2>
-firstTwo(MatrixBase<Derived>& v)
+firstTwo(Eigen::MatrixBase<Derived>& v)
 {
   return Eigen::VectorBlock<Derived, 2>(v.derived(), 0);
 }
 
 template<typename Derived>
 const Eigen::VectorBlock<const Derived, 2>
-firstTwo(const MatrixBase<Derived>& v)
+firstTwo(const Eigen::MatrixBase<Derived>& v)
 {
   return Eigen::VectorBlock<const Derived, 2>(v.derived(), 0);
 }
 
 int main(int, char**)
 {
-  Matrix<int,1,6> v; v << 1,2,3,4,5,6;
-  cout << firstTwo(4*v) << endl; // calls the const version
+  Eigen::Matrix<int,1,6> v; v << 1,2,3,4,5,6;
+  std::cout << firstTwo(4*v) << std::endl; // calls the const version
   firstTwo(v) *= 2;              // calls the non-const version
-  cout << "Now the vector v is:" << endl << v << endl;
+  std::cout << "Now the vector v is:" << std::endl << v << std::endl;
   return 0;
 }
diff --git a/libs/eigen/doc/examples/class_Reshaped.cpp b/libs/eigen/doc/examples/class_Reshaped.cpp
index 18fb454..7219853 100644
--- a/libs/eigen/doc/examples/class_Reshaped.cpp
+++ b/libs/eigen/doc/examples/class_Reshaped.cpp
@@ -1,23 +1,21 @@
 #include <Eigen/Core>
 #include <iostream>
-using namespace std;
-using namespace Eigen;
 
 template<typename Derived>
-const Reshaped<const Derived>
-reshape_helper(const MatrixBase<Derived>& m, int rows, int cols)
+const Eigen::Reshaped<const Derived>
+reshape_helper(const Eigen::MatrixBase<Derived>& m, int rows, int cols)
 {
-  return Reshaped<const Derived>(m.derived(), rows, cols);
+  return Eigen::Reshaped<const Derived>(m.derived(), rows, cols);
 }
 
 int main(int, char**)
 {
-  MatrixXd m(3, 4);
+  Eigen::MatrixXd m(3, 4);
   m << 1, 4, 7, 10,
        2, 5, 8, 11,
        3, 6, 9, 12;
-  cout << m << endl;
-  Ref<const MatrixXd> n = reshape_helper(m, 2, 6);
-  cout << "Matrix m is:" << endl << m << endl;
-  cout << "Matrix n is:" << endl << n << endl;
+  std::cout << m << std::endl;
+  Eigen::Ref<const Eigen::MatrixXd> n = reshape_helper(m, 2, 6);
+  std::cout << "Matrix m is:" << std::endl << m << std::endl;
+  std::cout << "Matrix n is:" << std::endl << n << std::endl;
 }
diff --git a/libs/eigen/doc/examples/class_VectorBlock.cpp b/libs/eigen/doc/examples/class_VectorBlock.cpp
index dc213df..5cee147 100644
--- a/libs/eigen/doc/examples/class_VectorBlock.cpp
+++ b/libs/eigen/doc/examples/class_VectorBlock.cpp
@@ -1,27 +1,25 @@
 #include <Eigen/Core>
 #include <iostream>
-using namespace Eigen;
-using namespace std;
 
 template<typename Derived>
 Eigen::VectorBlock<Derived>
-segmentFromRange(MatrixBase<Derived>& v, int start, int end)
+segmentFromRange(Eigen::MatrixBase<Derived>& v, int start, int end)
 {
   return Eigen::VectorBlock<Derived>(v.derived(), start, end-start);
 }
 
 template<typename Derived>
 const Eigen::VectorBlock<const Derived>
-segmentFromRange(const MatrixBase<Derived>& v, int start, int end)
+segmentFromRange(const Eigen::MatrixBase<Derived>& v, int start, int end)
 {
   return Eigen::VectorBlock<const Derived>(v.derived(), start, end-start);
 }
 
 int main(int, char**)
 {
-  Matrix<int,1,6> v; v << 1,2,3,4,5,6;
-  cout << segmentFromRange(2*v, 2, 4) << endl; // calls the const version
+  Eigen::Matrix<int,1,6> v; v << 1,2,3,4,5,6;
+  std::cout << segmentFromRange(2*v, 2, 4) << std::endl; // calls the const version
   segmentFromRange(v, 1, 3) *= 5;              // calls the non-const version
-  cout << "Now the vector v is:" << endl << v << endl;
+  std::cout << "Now the vector v is:" << std::endl << v << std::endl;
   return 0;
 }
diff --git a/libs/eigen/doc/examples/function_taking_eigenbase.cpp b/libs/eigen/doc/examples/function_taking_eigenbase.cpp
index 49d94b3..4e1e5a9 100644
--- a/libs/eigen/doc/examples/function_taking_eigenbase.cpp
+++ b/libs/eigen/doc/examples/function_taking_eigenbase.cpp
@@ -1,9 +1,8 @@
 #include <iostream>
 #include <Eigen/Core>
-using namespace Eigen;
 
 template <typename Derived>
-void print_size(const EigenBase<Derived>& b)
+void print_size(const Eigen::EigenBase<Derived>& b)
 {
   std::cout << "size (rows, cols): " << b.size() << " (" << b.rows()
             << ", " << b.cols() << ")" << std::endl;
@@ -11,7 +10,7 @@ void print_size(const EigenBase<Derived>& b)
 
 int main()
 {
-    Vector3f v;
+    Eigen::Vector3f v;
     print_size(v);
     // v.asDiagonal() returns a 3x3 diagonal matrix pseudo-expression
     print_size(v.asDiagonal());
diff --git a/libs/eigen/doc/examples/function_taking_ref.cpp b/libs/eigen/doc/examples/function_taking_ref.cpp
index 162a202..a837e19 100644
--- a/libs/eigen/doc/examples/function_taking_ref.cpp
+++ b/libs/eigen/doc/examples/function_taking_ref.cpp
@@ -1,19 +1,17 @@
 #include <iostream>
 #include <Eigen/SVD>
-using namespace Eigen;
-using namespace std;
 
-float inv_cond(const Ref<const MatrixXf>& a)
+float inv_cond(const Eigen::Ref<const Eigen::MatrixXf>& a)
 {
-  const VectorXf sing_vals = a.jacobiSvd().singularValues();
+  const Eigen::VectorXf sing_vals = a.jacobiSvd().singularValues();
   return sing_vals(sing_vals.size()-1) / sing_vals(0);
 }
 
 int main()
 {
-  Matrix4f m = Matrix4f::Random();
-  cout << "matrix m:" << endl << m << endl << endl;
-  cout << "inv_cond(m):          " << inv_cond(m)                      << endl;
-  cout << "inv_cond(m(1:3,1:3)): " << inv_cond(m.topLeftCorner(3,3))   << endl;
-  cout << "inv_cond(m+I):        " << inv_cond(m+Matrix4f::Identity()) << endl;
+  Eigen::MatrixXf m = Eigen::MatrixXf::Random(4, 4);
+  std::cout << "matrix m:\n" << m << "\n\n";
+  std::cout << "inv_cond(m):          " << inv_cond(m)                      << "\n";
+  std::cout << "inv_cond(m(1:3,1:3)): " << inv_cond(m.topLeftCorner(3,3))   << "\n";
+  std::cout << "inv_cond(m+I):        " << inv_cond(m+Eigen::MatrixXf::Identity(4, 4)) << "\n";
 }
diff --git a/libs/eigen/doc/examples/make_circulant.cpp.evaluator b/libs/eigen/doc/examples/make_circulant.cpp.evaluator
index 2ba79e7..cd461b9 100644
--- a/libs/eigen/doc/examples/make_circulant.cpp.evaluator
+++ b/libs/eigen/doc/examples/make_circulant.cpp.evaluator
@@ -6,7 +6,7 @@ namespace Eigen {
     {
       typedef Circulant<ArgType> XprType;
       typedef typename nested_eval<ArgType, XprType::ColsAtCompileTime>::type ArgTypeNested;
-      typedef typename remove_all<ArgTypeNested>::type ArgTypeNestedCleaned;
+      typedef remove_all_t<ArgTypeNested> ArgTypeNestedCleaned;
       typedef typename XprType::CoeffReturnType CoeffReturnType;
 
       enum { 
diff --git a/libs/eigen/doc/examples/make_circulant2.cpp b/libs/eigen/doc/examples/make_circulant2.cpp
index 95d3dd3..d86a66b 100644
--- a/libs/eigen/doc/examples/make_circulant2.cpp
+++ b/libs/eigen/doc/examples/make_circulant2.cpp
@@ -1,8 +1,6 @@
 #include <Eigen/Core>
 #include <iostream>
 
-using namespace Eigen;
-
 // [circulant_func]
 template<class ArgType>
 class circulant_functor {
@@ -10,8 +8,8 @@ class circulant_functor {
 public:
   circulant_functor(const ArgType& arg) : m_vec(arg) {}
 
-  const typename ArgType::Scalar& operator() (Index row, Index col) const {
-    Index index = row - col;
+  const typename ArgType::Scalar& operator() (Eigen::Index row, Eigen::Index col) const {
+    Eigen::Index index = row - col;
     if (index < 0) index += m_vec.size();
     return m_vec(index);
   }
@@ -21,10 +19,10 @@ public:
 // [square]
 template<class ArgType>
 struct circulant_helper {
-  typedef Matrix<typename ArgType::Scalar,
+  typedef Eigen::Matrix<typename ArgType::Scalar,
                  ArgType::SizeAtCompileTime,
                  ArgType::SizeAtCompileTime,
-                 ColMajor,
+                 Eigen::ColMajor,
                  ArgType::MaxSizeAtCompileTime,
                  ArgType::MaxSizeAtCompileTime> MatrixType;
 };
@@ -32,7 +30,7 @@ struct circulant_helper {
 
 // [makeCirculant]
 template <class ArgType>
-CwiseNullaryOp<circulant_functor<ArgType>, typename circulant_helper<ArgType>::MatrixType>
+Eigen::CwiseNullaryOp<circulant_functor<ArgType>, typename circulant_helper<ArgType>::MatrixType>
 makeCirculant(const Eigen::MatrixBase<ArgType>& arg)
 {
   typedef typename circulant_helper<ArgType>::MatrixType MatrixType;
diff --git a/libs/eigen/doc/examples/nullary_indexing.cpp b/libs/eigen/doc/examples/nullary_indexing.cpp
index b74db5f..38260af 100644
--- a/libs/eigen/doc/examples/nullary_indexing.cpp
+++ b/libs/eigen/doc/examples/nullary_indexing.cpp
@@ -1,8 +1,6 @@
 #include <Eigen/Core>
 #include <iostream>
 
-using namespace Eigen;
-
 // [functor]
 template<class ArgType, class RowIndexType, class ColIndexType>
 class indexing_functor {
@@ -10,10 +8,10 @@ class indexing_functor {
   const RowIndexType &m_rowIndices;
   const ColIndexType &m_colIndices;
 public:
-  typedef Matrix<typename ArgType::Scalar,
+  typedef Eigen::Matrix<typename ArgType::Scalar,
                  RowIndexType::SizeAtCompileTime,
                  ColIndexType::SizeAtCompileTime,
-                 ArgType::Flags&RowMajorBit?RowMajor:ColMajor,
+                 ArgType::Flags&Eigen::RowMajorBit?Eigen::RowMajor:Eigen::ColMajor,
                  RowIndexType::MaxSizeAtCompileTime,
                  ColIndexType::MaxSizeAtCompileTime> MatrixType;
 
@@ -21,7 +19,7 @@ public:
     : m_arg(arg), m_rowIndices(row_indices), m_colIndices(col_indices)
   {}
 
-  const typename ArgType::Scalar& operator() (Index row, Index col) const {
+  const typename ArgType::Scalar& operator() (Eigen::Index row, Eigen::Index col) const {
     return m_arg(m_rowIndices[row], m_colIndices[col]);
   }
 };
@@ -29,7 +27,7 @@ public:
 
 // [function]
 template <class ArgType, class RowIndexType, class ColIndexType>
-CwiseNullaryOp<indexing_functor<ArgType,RowIndexType,ColIndexType>, typename indexing_functor<ArgType,RowIndexType,ColIndexType>::MatrixType>
+Eigen::CwiseNullaryOp<indexing_functor<ArgType,RowIndexType,ColIndexType>, typename indexing_functor<ArgType,RowIndexType,ColIndexType>::MatrixType>
 mat_indexing(const Eigen::MatrixBase<ArgType>& arg, const RowIndexType& row_indices, const ColIndexType& col_indices)
 {
   typedef indexing_functor<ArgType,RowIndexType,ColIndexType> Func;
@@ -43,8 +41,8 @@ int main()
 {
   std::cout << "[main1]\n";
   Eigen::MatrixXi A = Eigen::MatrixXi::Random(4,4);
-  Array3i ri(1,2,1);
-  ArrayXi ci(6); ci << 3,2,1,0,0,2;
+  Eigen::Array3i ri(1,2,1);
+  Eigen::ArrayXi ci(6); ci << 3,2,1,0,0,2;
   Eigen::MatrixXi B = mat_indexing(A, ri, ci);
   std::cout << "A =" << std::endl;
   std::cout << A << std::endl << std::endl;
@@ -56,11 +54,9 @@ int main()
   B =  mat_indexing(A, ri+1, ci);
   std::cout << "A(ri+1,ci) =" << std::endl;
   std::cout << B << std::endl << std::endl;
-#if EIGEN_COMP_CXXVER >= 11
-  B =  mat_indexing(A, ArrayXi::LinSpaced(13,0,12).unaryExpr([](int x){return x%4;}), ArrayXi::LinSpaced(4,0,3));
+  B =  mat_indexing(A, Eigen::ArrayXi::LinSpaced(13,0,12).unaryExpr([](int x){return x%4;}), Eigen::ArrayXi::LinSpaced(4,0,3));
   std::cout << "A(ArrayXi::LinSpaced(13,0,12).unaryExpr([](int x){return x%4;}), ArrayXi::LinSpaced(4,0,3)) =" << std::endl;
   std::cout << B << std::endl << std::endl;
-#endif
   std::cout << "[main2]\n";
 }
 
diff --git a/libs/eigen/doc/examples/tut_arithmetic_add_sub.cpp b/libs/eigen/doc/examples/tut_arithmetic_add_sub.cpp
index e97477b..95162c0 100644
--- a/libs/eigen/doc/examples/tut_arithmetic_add_sub.cpp
+++ b/libs/eigen/doc/examples/tut_arithmetic_add_sub.cpp
@@ -1,14 +1,12 @@
 #include <iostream>
 #include <Eigen/Dense>
 
-using namespace Eigen;
-
 int main()
 {
-  Matrix2d a;
+  Eigen::Matrix2d a;
   a << 1, 2,
        3, 4;
-  MatrixXd b(2,2);
+  Eigen::MatrixXd b(2,2);
   b << 2, 3,
        1, 4;
   std::cout << "a + b =\n" << a + b << std::endl;
@@ -16,7 +14,7 @@ int main()
   std::cout << "Doing a += b;" << std::endl;
   a += b;
   std::cout << "Now a =\n" << a << std::endl;
-  Vector3d v(1,2,3);
-  Vector3d w(1,0,0);
+  Eigen::Vector3d v(1,2,3);
+  Eigen::Vector3d w(1,0,0);
   std::cout << "-v + w - v =\n" << -v + w - v << std::endl;
 }
diff --git a/libs/eigen/doc/examples/tut_arithmetic_dot_cross.cpp b/libs/eigen/doc/examples/tut_arithmetic_dot_cross.cpp
index 631c9a5..d95e03c 100644
--- a/libs/eigen/doc/examples/tut_arithmetic_dot_cross.cpp
+++ b/libs/eigen/doc/examples/tut_arithmetic_dot_cross.cpp
@@ -1,15 +1,18 @@
 #include <iostream>
 #include <Eigen/Dense>
 
-using namespace Eigen;
-using namespace std;
 int main()
 {
-  Vector3d v(1,2,3);
-  Vector3d w(0,1,2);
+  Eigen::Vector3d v(1,2,3);
+  Eigen::Vector3d w(0,1,2);
 
-  cout << "Dot product: " << v.dot(w) << endl;
+  std::cout << "Dot product: " << v.dot(w) << std::endl;
   double dp = v.adjoint()*w; // automatic conversion of the inner product to a scalar
-  cout << "Dot product via a matrix product: " << dp << endl;
-  cout << "Cross product:\n" << v.cross(w) << endl;
+  std::cout << "Dot product via a matrix product: " << dp << std::endl;
+
+  std::cout << "Cross product:\n" << v.cross(w) << std::endl;
+  Eigen::Vector2d v2(1,2);
+  Eigen::Vector2d w2(0,1);
+  double cp = v2.cross(w2); // returning a scalar between size-2 vectors
+  std::cout << "Cross product for 2D vectors: " << cp << std::endl;
 }
diff --git a/libs/eigen/doc/examples/tut_arithmetic_matrix_mul.cpp b/libs/eigen/doc/examples/tut_arithmetic_matrix_mul.cpp
index f213902..c2d5e2d 100644
--- a/libs/eigen/doc/examples/tut_arithmetic_matrix_mul.cpp
+++ b/libs/eigen/doc/examples/tut_arithmetic_matrix_mul.cpp
@@ -1,13 +1,12 @@
 #include <iostream>
 #include <Eigen/Dense>
 
-using namespace Eigen;
 int main()
 {
-  Matrix2d mat;
+  Eigen::Matrix2d mat;
   mat << 1, 2,
          3, 4;
-  Vector2d u(-1,1), v(2,0);
+  Eigen::Vector2d u(-1,1), v(2,0);
   std::cout << "Here is mat*mat:\n" << mat*mat << std::endl;
   std::cout << "Here is mat*u:\n" << mat*u << std::endl;
   std::cout << "Here is u^T*mat:\n" << u.transpose()*mat << std::endl;
diff --git a/libs/eigen/doc/examples/tut_arithmetic_scalar_mul_div.cpp b/libs/eigen/doc/examples/tut_arithmetic_scalar_mul_div.cpp
index d5f65b5..0ba8d6b 100644
--- a/libs/eigen/doc/examples/tut_arithmetic_scalar_mul_div.cpp
+++ b/libs/eigen/doc/examples/tut_arithmetic_scalar_mul_div.cpp
@@ -1,14 +1,12 @@
 #include <iostream>
 #include <Eigen/Dense>
 
-using namespace Eigen;
-
 int main()
 {
-  Matrix2d a;
+  Eigen::Matrix2d a;
   a << 1, 2,
        3, 4;
-  Vector3d v(1,2,3);
+  Eigen::Vector3d v(1,2,3);
   std::cout << "a * 2.5 =\n" << a * 2.5 << std::endl;
   std::cout << "0.1 * v =\n" << 0.1 * v << std::endl;
   std::cout << "Doing v *= 2;" << std::endl;
diff --git a/libs/eigen/doc/examples/tut_matrix_coefficient_accessors.cpp b/libs/eigen/doc/examples/tut_matrix_coefficient_accessors.cpp
index c2da171..040087c 100644
--- a/libs/eigen/doc/examples/tut_matrix_coefficient_accessors.cpp
+++ b/libs/eigen/doc/examples/tut_matrix_coefficient_accessors.cpp
@@ -1,17 +1,15 @@
 #include <iostream>
 #include <Eigen/Dense>
 
-using namespace Eigen;
-
 int main()
 {
-  MatrixXd m(2,2);
+  Eigen::MatrixXd m(2,2);
   m(0,0) = 3;
   m(1,0) = 2.5;
   m(0,1) = -1;
   m(1,1) = m(1,0) + m(0,1);
   std::cout << "Here is the matrix m:\n" << m << std::endl;
-  VectorXd v(2);
+  Eigen::VectorXd v(2);
   v(0) = 4;
   v(1) = v(0) - 1;
   std::cout << "Here is the vector v:\n" << v << std::endl;
diff --git a/libs/eigen/doc/examples/tut_matrix_resize.cpp b/libs/eigen/doc/examples/tut_matrix_resize.cpp
index 0392c3a..aa80cf5 100644
--- a/libs/eigen/doc/examples/tut_matrix_resize.cpp
+++ b/libs/eigen/doc/examples/tut_matrix_resize.cpp
@@ -1,16 +1,14 @@
 #include <iostream>
 #include <Eigen/Dense>
 
-using namespace Eigen;
-
 int main()
 {
-  MatrixXd m(2,5);
+  Eigen::MatrixXd m(2,5);
   m.resize(4,3);
   std::cout << "The matrix m is of size "
             << m.rows() << "x" << m.cols() << std::endl;
   std::cout << "It has " << m.size() << " coefficients" << std::endl;
-  VectorXd v(2);
+  Eigen::VectorXd v(2);
   v.resize(5);
   std::cout << "The vector v is of size " << v.size() << std::endl;
   std::cout << "As a matrix, v is of size "
diff --git a/libs/eigen/doc/examples/tut_matrix_resize_fixed_size.cpp b/libs/eigen/doc/examples/tut_matrix_resize_fixed_size.cpp
index dcbdfa7..3df87d2 100644
--- a/libs/eigen/doc/examples/tut_matrix_resize_fixed_size.cpp
+++ b/libs/eigen/doc/examples/tut_matrix_resize_fixed_size.cpp
@@ -1,11 +1,9 @@
 #include <iostream>
 #include <Eigen/Dense>
 
-using namespace Eigen;
-
 int main()
 {
-  Matrix4d m;
+  Eigen::Matrix4d m;
   m.resize(4,4); // no operation
   std::cout << "The matrix m is of size "
             << m.rows() << "x" << m.cols() << std::endl;
diff --git a/libs/eigen/doc/snippets/CMakeLists.txt b/libs/eigen/doc/snippets/CMakeLists.txt
index 65f195a..868d669 100644
--- a/libs/eigen/doc/snippets/CMakeLists.txt
+++ b/libs/eigen/doc/snippets/CMakeLists.txt
@@ -6,31 +6,26 @@ foreach(snippet_src ${snippets_SRCS})
   get_filename_component(snippet ${snippet_src} NAME_WE)
   set(compile_snippet_target compile_${snippet})
   set(compile_snippet_src ${compile_snippet_target}.cpp)
-  if((NOT ${snippet_src} MATCHES "cxx11") OR EIGEN_COMPILER_SUPPORT_CPP11)
-    file(READ ${snippet_src} snippet_source_code)
-    configure_file(${CMAKE_CURRENT_SOURCE_DIR}/compile_snippet.cpp.in
-                  ${CMAKE_CURRENT_BINARY_DIR}/${compile_snippet_src})
-    add_executable(${compile_snippet_target}
-                  ${CMAKE_CURRENT_BINARY_DIR}/${compile_snippet_src})
-    if(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO)
-      target_link_libraries(${compile_snippet_target} ${EIGEN_STANDARD_LIBRARIES_TO_LINK_TO})
-    endif()
-    if(${snippet_src} MATCHES "cxx11")
-      set_target_properties(${compile_snippet_target} PROPERTIES COMPILE_FLAGS "-std=c++11")
-    endif()
-    if(${snippet_src} MATCHES "deprecated")
-      set_target_properties(${compile_snippet_target} PROPERTIES COMPILE_FLAGS "-DEIGEN_NO_DEPRECATED_WARNING")
-    endif()
-    add_custom_command(
-      TARGET ${compile_snippet_target}
-      POST_BUILD
-      COMMAND ${compile_snippet_target}
-      ARGS >${CMAKE_CURRENT_BINARY_DIR}/${snippet}.out
-    )
-    add_dependencies(all_snippets ${compile_snippet_target})
-    set_source_files_properties(${CMAKE_CURRENT_BINARY_DIR}/${compile_snippet_src}
-                                PROPERTIES OBJECT_DEPENDS ${snippet_src})
-  else()
-    message("skip snippet ${snippet_src} because compiler does not support C++11")
+  
+  file(READ ${snippet_src} snippet_source_code)
+  configure_file(${CMAKE_CURRENT_SOURCE_DIR}/compile_snippet.cpp.in
+                ${CMAKE_CURRENT_BINARY_DIR}/${compile_snippet_src})
+  add_executable(${compile_snippet_target}
+                ${CMAKE_CURRENT_BINARY_DIR}/${compile_snippet_src})
+  if(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO)
+    target_link_libraries(${compile_snippet_target} ${EIGEN_STANDARD_LIBRARIES_TO_LINK_TO})
   endif()
+
+  if(${snippet_src} MATCHES "deprecated")
+    set_target_properties(${compile_snippet_target} PROPERTIES COMPILE_FLAGS "-DEIGEN_NO_DEPRECATED_WARNING")
+  endif()
+  add_custom_command(
+    TARGET ${compile_snippet_target}
+    POST_BUILD
+    COMMAND ${compile_snippet_target}
+    ARGS >${CMAKE_CURRENT_BINARY_DIR}/${snippet}.out
+  )
+  add_dependencies(all_snippets ${compile_snippet_target})
+  set_source_files_properties(${CMAKE_CURRENT_BINARY_DIR}/${compile_snippet_src}
+                              PROPERTIES OBJECT_DEPENDS ${snippet_src})
 endforeach()
diff --git a/libs/eigen/doc/snippets/Cwise_array_atan2_array.cpp b/libs/eigen/doc/snippets/Cwise_array_atan2_array.cpp
new file mode 100644
index 0000000..ace075a
--- /dev/null
+++ b/libs/eigen/doc/snippets/Cwise_array_atan2_array.cpp
@@ -0,0 +1,4 @@
+Array<double,1,3> x(8,-25,3),
+                  y(1./3.,0.5,-2.);
+cout << "atan2([" << x << "], [" << y << "]) = " << x.atan2(y) << endl; // using ArrayBase::pow
+cout << "atan2([" << x << "], [" << y << "] = " << atan2(x,y) << endl; // using Eigen::pow
diff --git a/libs/eigen/doc/snippets/JacobiSVD_basic.cpp b/libs/eigen/doc/snippets/JacobiSVD_basic.cpp
index ab24b9b..6c21baf 100644
--- a/libs/eigen/doc/snippets/JacobiSVD_basic.cpp
+++ b/libs/eigen/doc/snippets/JacobiSVD_basic.cpp
@@ -1,6 +1,6 @@
 MatrixXf m = MatrixXf::Random(3,2);
 cout << "Here is the matrix m:" << endl << m << endl;
-JacobiSVD<MatrixXf> svd(m, ComputeThinU | ComputeThinV);
+JacobiSVD<MatrixXf, ComputeThinU | ComputeThinV> svd(m);
 cout << "Its singular values are:" << endl << svd.singularValues() << endl;
 cout << "Its left singular vectors are the columns of the thin U matrix:" << endl << svd.matrixU() << endl;
 cout << "Its right singular vectors are the columns of the thin V matrix:" << endl << svd.matrixV() << endl;
diff --git a/libs/eigen/doc/snippets/SelfAdjointEigenSolver_SelfAdjointEigenSolver_MatrixType2.cpp b/libs/eigen/doc/snippets/SelfAdjointEigenSolver_SelfAdjointEigenSolver_MatrixType2.cpp
index bbb821e..cc0c50e 100644
--- a/libs/eigen/doc/snippets/SelfAdjointEigenSolver_SelfAdjointEigenSolver_MatrixType2.cpp
+++ b/libs/eigen/doc/snippets/SelfAdjointEigenSolver_SelfAdjointEigenSolver_MatrixType2.cpp
@@ -3,7 +3,7 @@ MatrixXd A = X + X.transpose();
 cout << "Here is a random symmetric matrix, A:" << endl << A << endl;
 X = MatrixXd::Random(5,5);
 MatrixXd B = X * X.transpose();
-cout << "and a random postive-definite matrix, B:" << endl << B << endl << endl;
+cout << "and a random positive-definite matrix, B:" << endl << B << endl << endl;
 
 GeneralizedSelfAdjointEigenSolver<MatrixXd> es(A,B);
 cout << "The eigenvalues of the pencil (A,B) are:" << endl << es.eigenvalues() << endl;
diff --git a/libs/eigen/doc/snippets/Slicing_arrayexpr.cpp b/libs/eigen/doc/snippets/Slicing_arrayexpr.cpp
index 2df8180..6d09980 100644
--- a/libs/eigen/doc/snippets/Slicing_arrayexpr.cpp
+++ b/libs/eigen/doc/snippets/Slicing_arrayexpr.cpp
@@ -1,4 +1,4 @@
 ArrayXi ind(5); ind<<4,2,5,5,3;
 MatrixXi A = MatrixXi::Random(4,6);
 cout << "Initial matrix A:\n" << A << "\n\n";
-cout << "A(all,ind-1):\n" << A(all,ind-1) << "\n\n";
+cout << "A(all,ind-1):\n" << A(Eigen::placeholders::all,ind-1) << "\n\n";
diff --git a/libs/eigen/doc/snippets/Slicing_rawarray_cxx11.cpp b/libs/eigen/doc/snippets/Slicing_rawarray_cxx11.cpp
index 1087131..7a3e6e5 100644
--- a/libs/eigen/doc/snippets/Slicing_rawarray_cxx11.cpp
+++ b/libs/eigen/doc/snippets/Slicing_rawarray_cxx11.cpp
@@ -1,5 +1,3 @@
-#if EIGEN_HAS_STATIC_ARRAY_TEMPLATE
 MatrixXi A = MatrixXi::Random(4,6);
 cout << "Initial matrix A:\n" << A << "\n\n";
-cout << "A(all,{4,2,5,5,3}):\n" << A(all,{4,2,5,5,3}) << "\n\n";
-#endif
+cout << "A(all,{4,2,5,5,3}):\n" << A(Eigen::placeholders::all,{4,2,5,5,3}) << "\n\n";
diff --git a/libs/eigen/doc/snippets/Slicing_stdvector_cxx11.cpp b/libs/eigen/doc/snippets/Slicing_stdvector_cxx11.cpp
index 555f662..74f0727 100644
--- a/libs/eigen/doc/snippets/Slicing_stdvector_cxx11.cpp
+++ b/libs/eigen/doc/snippets/Slicing_stdvector_cxx11.cpp
@@ -1,4 +1,4 @@
 std::vector<int> ind{4,2,5,5,3};
 MatrixXi A = MatrixXi::Random(4,6);
 cout << "Initial matrix A:\n" << A << "\n\n";
-cout << "A(all,ind):\n" << A(all,ind) << "\n\n";
+cout << "A(all,ind):\n" << A(Eigen::placeholders::all,ind) << "\n\n";
diff --git a/libs/eigen/doc/special_examples/CMakeLists.txt b/libs/eigen/doc/special_examples/CMakeLists.txt
index 5b00e8b..e6407aa 100644
--- a/libs/eigen/doc/special_examples/CMakeLists.txt
+++ b/libs/eigen/doc/special_examples/CMakeLists.txt
@@ -19,16 +19,13 @@ if(QT4_FOUND)
   add_dependencies(all_examples Tutorial_sparse_example)
 endif()
 
-if(EIGEN_COMPILER_SUPPORT_CPP11)
-  add_executable(random_cpp11 random_cpp11.cpp)
-  target_link_libraries(random_cpp11 ${EIGEN_STANDARD_LIBRARIES_TO_LINK_TO})
-  add_dependencies(all_examples random_cpp11)
-  ei_add_target_property(random_cpp11 COMPILE_FLAGS "-std=c++11")
+add_executable(random_cpp11 random_cpp11.cpp)
+target_link_libraries(random_cpp11 ${EIGEN_STANDARD_LIBRARIES_TO_LINK_TO})
+add_dependencies(all_examples random_cpp11)
 
-  add_custom_command(
-    TARGET random_cpp11
-    POST_BUILD
-    COMMAND random_cpp11
-    ARGS >${CMAKE_CURRENT_BINARY_DIR}/random_cpp11.out
-  )
-endif()
+add_custom_command(
+  TARGET random_cpp11
+  POST_BUILD
+  COMMAND random_cpp11
+  ARGS >${CMAKE_CURRENT_BINARY_DIR}/random_cpp11.out
+)
diff --git a/libs/eigen/doc/special_examples/random_cpp11.cpp b/libs/eigen/doc/special_examples/random_cpp11.cpp
index 33744c0..bd73800 100644
--- a/libs/eigen/doc/special_examples/random_cpp11.cpp
+++ b/libs/eigen/doc/special_examples/random_cpp11.cpp
@@ -2,13 +2,11 @@
 #include <iostream>
 #include <random>
 
-using namespace Eigen;
-
 int main() {
   std::default_random_engine generator;
   std::poisson_distribution<int> distribution(4.1);
   auto poisson = [&] () {return distribution(generator);};
 
-  RowVectorXi v = RowVectorXi::NullaryExpr(10, poisson );
+  Eigen::RowVectorXi v = Eigen::RowVectorXi::NullaryExpr(10, poisson );
   std::cout << v << "\n";
 }
diff --git a/libs/eigen/failtest/CMakeLists.txt b/libs/eigen/failtest/CMakeLists.txt
index 256e541..2c5fc33 100644
--- a/libs/eigen/failtest/CMakeLists.txt
+++ b/libs/eigen/failtest/CMakeLists.txt
@@ -62,9 +62,5 @@ ei_add_failtest("jacobisvd_int")
 ei_add_failtest("bdcsvd_int")
 ei_add_failtest("eigensolver_int")
 ei_add_failtest("eigensolver_cplx")
-
-if(EIGEN_TEST_CXX11)
-  ei_add_failtest("initializer_list_1")
-  ei_add_failtest("initializer_list_2")
-endif()
-
+ei_add_failtest("initializer_list_1")
+ei_add_failtest("initializer_list_2")
diff --git a/libs/eigen/lapack/CMakeLists.txt b/libs/eigen/lapack/CMakeLists.txt
index e48497f..8d6d754 100644
--- a/libs/eigen/lapack/CMakeLists.txt
+++ b/libs/eigen/lapack/CMakeLists.txt
@@ -1,10 +1,18 @@
-
 project(EigenLapack CXX)
 
+if(EIGEN_BUILD_LAPACK AND EIGEN_BUILD_BLAS)
+
 include(CheckLanguage)
 check_language(Fortran)
 if(CMAKE_Fortran_COMPILER)
   enable_language(Fortran)
+  if("${CMAKE_Fortran_COMPILER_ID}" STREQUAL "GNU")
+    if ("${CMAKE_Fortran_COMPILER_VERSION}" VERSION_GREATER_EQUAL 10.0)
+      # We use an old version of LAPACK with argument type mismatches.
+      # Allow them to compile anyway with newer GNU versions.
+      set(CMAKE_Fortran_FLAGS  "${CMAKE_Fortran_FLAGS} -fallow-argument-mismatch")
+    endif()
+  endif()
   set(EIGEN_Fortran_COMPILER_WORKS ON)
 else()
   set(EIGEN_Fortran_COMPILER_WORKS OFF)
@@ -145,6 +153,7 @@ if(EXISTS ${eigen_full_path_to_testing_lapack})
     string(REPLACE "." "_" input_name ${input})
     set(testName "${target}_${input_name}")
     if(EXISTS "${TEST_INPUT}")
+      add_dependencies(buildtests ${target})
       add_test(NAME LAPACK-${testName}
         COMMAND "${CMAKE_COMMAND}"
         -DTEST=$<TARGET_FILE:${target}>
@@ -450,3 +459,6 @@ if(EXISTS ${eigen_full_path_to_testing_lapack})
 
 endif()
 
+elseif(EIGEN_BUILD_LAPACK AND NOT EIGEN_BUILD_BLAS)
+ message(FATAL_ERROR "EIGEN_BUILD_LAPACK requires EIGEN_BUILD_BLAS")
+endif() #EIGEN_BUILD_LAPACK
diff --git a/libs/eigen/lapack/cholesky.cpp b/libs/eigen/lapack/cholesky.inc
similarity index 100%
rename from libs/eigen/lapack/cholesky.cpp
rename to libs/eigen/lapack/cholesky.inc
diff --git a/libs/eigen/lapack/complex_double.cpp b/libs/eigen/lapack/complex_double.cpp
index c9c5752..492f743 100644
--- a/libs/eigen/lapack/complex_double.cpp
+++ b/libs/eigen/lapack/complex_double.cpp
@@ -13,6 +13,6 @@
 #define REAL_SCALAR_SUFFIX d
 #define ISCOMPLEX     1
 
-#include "cholesky.cpp"
-#include "lu.cpp"
-#include "svd.cpp"
+#include "cholesky.inc"
+#include "lu.inc"
+#include "svd.inc"
diff --git a/libs/eigen/lapack/complex_single.cpp b/libs/eigen/lapack/complex_single.cpp
index 6d11b26..cdf989e 100644
--- a/libs/eigen/lapack/complex_single.cpp
+++ b/libs/eigen/lapack/complex_single.cpp
@@ -13,6 +13,6 @@
 #define REAL_SCALAR_SUFFIX s
 #define ISCOMPLEX     1
 
-#include "cholesky.cpp"
-#include "lu.cpp"
-#include "svd.cpp"
+#include "cholesky.inc"
+#include "lu.inc"
+#include "svd.inc"
diff --git a/libs/eigen/lapack/double.cpp b/libs/eigen/lapack/double.cpp
index ea78bb6..afabce3 100644
--- a/libs/eigen/lapack/double.cpp
+++ b/libs/eigen/lapack/double.cpp
@@ -12,7 +12,7 @@
 #define SCALAR_SUFFIX_UP "D"
 #define ISCOMPLEX     0
 
-#include "cholesky.cpp"
-#include "lu.cpp"
-#include "eigenvalues.cpp"
-#include "svd.cpp"
+#include "cholesky.inc"
+#include "lu.inc"
+#include "eigenvalues.inc"
+#include "svd.inc"
diff --git a/libs/eigen/lapack/eigenvalues.cpp b/libs/eigen/lapack/eigenvalues.inc
similarity index 100%
rename from libs/eigen/lapack/eigenvalues.cpp
rename to libs/eigen/lapack/eigenvalues.inc
diff --git a/libs/eigen/lapack/lu.cpp b/libs/eigen/lapack/lu.inc
similarity index 100%
rename from libs/eigen/lapack/lu.cpp
rename to libs/eigen/lapack/lu.inc
diff --git a/libs/eigen/lapack/single.cpp b/libs/eigen/lapack/single.cpp
index c7da3ef..2994436 100644
--- a/libs/eigen/lapack/single.cpp
+++ b/libs/eigen/lapack/single.cpp
@@ -12,7 +12,7 @@
 #define SCALAR_SUFFIX_UP "S"
 #define ISCOMPLEX     0
 
-#include "cholesky.cpp"
-#include "lu.cpp"
-#include "eigenvalues.cpp"
-#include "svd.cpp"
+#include "cholesky.inc"
+#include "lu.inc"
+#include "eigenvalues.inc"
+#include "svd.inc"
diff --git a/libs/eigen/lapack/svd.cpp b/libs/eigen/lapack/svd.inc
similarity index 99%
rename from libs/eigen/lapack/svd.cpp
rename to libs/eigen/lapack/svd.inc
index 77b302b..83544cf 100644
--- a/libs/eigen/lapack/svd.cpp
+++ b/libs/eigen/lapack/svd.inc
@@ -135,4 +135,4 @@ EIGEN_LAPACK_FUNC(gesvd,(char *jobu, char *jobv, int *m, int* n, Scalar* a, int
   else  if(*jobv=='O') matrix(a,diag_size,*n,*lda)    = svd.matrixV().adjoint();
   }
   return 0;
-}
+}
\ No newline at end of file
diff --git a/libs/eigen/test/AnnoyingScalar.h b/libs/eigen/test/AnnoyingScalar.h
index 7ace083..4362de2 100644
--- a/libs/eigen/test/AnnoyingScalar.h
+++ b/libs/eigen/test/AnnoyingScalar.h
@@ -32,14 +32,12 @@ class AnnoyingScalar
 {
   public:
     AnnoyingScalar()                { init(); *v = 0;  }
-    AnnoyingScalar(long double _v)  { init(); *v = _v; }
-    AnnoyingScalar(double _v)       { init(); *v = _v; }
+    AnnoyingScalar(long double _v)  { init(); *v = static_cast<float>(_v); }
+    AnnoyingScalar(double _v)       { init(); *v = static_cast<float>(_v); }
     AnnoyingScalar(float _v)        { init(); *v = _v; }
-    AnnoyingScalar(int _v)          { init(); *v = _v; }
-    AnnoyingScalar(long _v)         { init(); *v = _v; }
-    #if EIGEN_HAS_CXX11
-    AnnoyingScalar(long long _v)    { init(); *v = _v; }
-    #endif
+    AnnoyingScalar(int _v)          { init(); *v = static_cast<float>(_v); }
+    AnnoyingScalar(long _v)         { init(); *v = static_cast<float>(_v); }
+    AnnoyingScalar(long long _v)    { init(); *v = static_cast<float>(_v); }
     AnnoyingScalar(const AnnoyingScalar& other) { init(); *v = *(other.v); }
     ~AnnoyingScalar() {
       if(v!=&data)
@@ -83,8 +81,8 @@ class AnnoyingScalar
     AnnoyingScalar& operator/=(const AnnoyingScalar& other) { *v /= *other.v; return *this; }
     AnnoyingScalar& operator= (const AnnoyingScalar& other) { *v  = *other.v; return *this; }
 
-    bool operator==(const AnnoyingScalar& other) const { return *v == *other.v; }
-    bool operator!=(const AnnoyingScalar& other) const { return *v != *other.v; }
+    bool operator==(const AnnoyingScalar& other) const { return numext::equal_strict(*v, *other.v); }
+    bool operator!=(const AnnoyingScalar& other) const { return numext::not_equal_strict(*v, *other.v); }
     bool operator<=(const AnnoyingScalar& other) const { return *v <= *other.v; }
     bool operator< (const AnnoyingScalar& other) const { return *v <  *other.v; }
     bool operator>=(const AnnoyingScalar& other) const { return *v >= *other.v; }
diff --git a/libs/eigen/test/CMakeLists.txt b/libs/eigen/test/CMakeLists.txt
index 5136f82..223a9f1 100644
--- a/libs/eigen/test/CMakeLists.txt
+++ b/libs/eigen/test/CMakeLists.txt
@@ -42,45 +42,53 @@ endif()
 set(SPARSE_LIBS " ")
 
 find_package(CHOLMOD)
-if(CHOLMOD_FOUND)
+if(CHOLMOD_FOUND AND EIGEN_BUILD_BLAS AND EIGEN_BUILD_LAPACK)
   add_definitions("-DEIGEN_CHOLMOD_SUPPORT")
   include_directories(${CHOLMOD_INCLUDES})
   set(SPARSE_LIBS ${SPARSE_LIBS} ${CHOLMOD_LIBRARIES} ${EIGEN_BLAS_LIBRARIES} ${EIGEN_LAPACK_LIBRARIES})
   set(CHOLMOD_ALL_LIBS  ${CHOLMOD_LIBRARIES} ${EIGEN_BLAS_LIBRARIES} ${EIGEN_LAPACK_LIBRARIES})
   ei_add_property(EIGEN_TESTED_BACKENDS "CHOLMOD, ")
+  
+  ei_add_test(cholmod_support "" "${CHOLMOD_ALL_LIBS}")
 else()
   ei_add_property(EIGEN_MISSING_BACKENDS "CHOLMOD, ")
 endif()
 
 find_package(UMFPACK)
-if(UMFPACK_FOUND)
+if(UMFPACK_FOUND AND EIGEN_BUILD_BLAS)
   add_definitions("-DEIGEN_UMFPACK_SUPPORT")
   include_directories(${UMFPACK_INCLUDES})
   set(SPARSE_LIBS ${SPARSE_LIBS} ${UMFPACK_LIBRARIES} ${EIGEN_BLAS_LIBRARIES})
   set(UMFPACK_ALL_LIBS ${UMFPACK_LIBRARIES} ${EIGEN_BLAS_LIBRARIES})
   ei_add_property(EIGEN_TESTED_BACKENDS "UMFPACK, ")
+  
+  ei_add_test(umfpack_support "" "${UMFPACK_ALL_LIBS}")
 else()
   ei_add_property(EIGEN_MISSING_BACKENDS "UMFPACK, ")
 endif()
 
 find_package(KLU)
-if(KLU_FOUND)
+if(KLU_FOUND AND EIGEN_BUILD_BLAS)
   add_definitions("-DEIGEN_KLU_SUPPORT")
   include_directories(${KLU_INCLUDES})
   set(SPARSE_LIBS ${SPARSE_LIBS} ${KLU_LIBRARIES} ${EIGEN_BLAS_LIBRARIES})
   set(KLU_ALL_LIBS ${KLU_LIBRARIES} ${EIGEN_BLAS_LIBRARIES})
   ei_add_property(EIGEN_TESTED_BACKENDS "KLU, ")
+  
+  ei_add_test(klu_support "" "${KLU_ALL_LIBS}")
 else()
   ei_add_property(EIGEN_MISSING_BACKENDS "KLU, ")
 endif()
 
 find_package(SuperLU 4.0)
-if(SuperLU_FOUND)
+if(SuperLU_FOUND AND EIGEN_BUILD_BLAS)
   add_definitions("-DEIGEN_SUPERLU_SUPPORT")
   include_directories(${SUPERLU_INCLUDES})
   set(SPARSE_LIBS ${SPARSE_LIBS} ${SUPERLU_LIBRARIES} ${EIGEN_BLAS_LIBRARIES})
   set(SUPERLU_ALL_LIBS ${SUPERLU_LIBRARIES} ${EIGEN_BLAS_LIBRARIES})
   ei_add_property(EIGEN_TESTED_BACKENDS  "SuperLU, ")
+  
+  ei_add_test(superlu_support "" "${SUPERLU_ALL_LIBS}")
 else()
   ei_add_property(EIGEN_MISSING_BACKENDS  "SuperLU, ")
 endif()
@@ -124,7 +132,7 @@ else()
 endif()
 
 find_package(SPQR)
-if(SPQR_FOUND AND CHOLMOD_FOUND AND (EIGEN_Fortran_COMPILER_WORKS OR LAPACK_FOUND) )
+if(SPQR_FOUND AND CHOLMOD_FOUND AND EIGEN_BUILD_BLAS AND EIGEN_BUILD_LAPACK AND (EIGEN_Fortran_COMPILER_WORKS OR LAPACK_FOUND) )
   add_definitions("-DEIGEN_SPQR_SUPPORT")
   include_directories(${SPQR_INCLUDES})
   set(SPQR_ALL_LIBS ${SPQR_LIBRARIES} ${CHOLMOD_LIBRARIES} ${EIGEN_LAPACK_LIBRARIES} ${EIGEN_BLAS_LIBRARIES} ${LAPACK_LIBRARIES})
@@ -134,6 +142,17 @@ else()
   ei_add_property(EIGEN_MISSING_BACKENDS "SPQR, ")
 endif()
 
+find_package(Accelerate)
+if(Accelerate_FOUND)
+  add_definitions("-DEIGEN_ACCELERATE_SUPPORT")
+  include_directories(${Accelerate_INCLUDES})
+  set(SPARSE_LIBS ${SPARSE_LIBS} ${Accelerate_LIBRARIES})
+  set(Accelerate_ALL_LIBS ${Accelerate_LIBRARIES})
+  ei_add_property(EIGEN_TESTED_BACKENDS "Accelerate, ")
+else()
+  ei_add_property(EIGEN_MISSING_BACKENDS "Accelerate, ")
+endif()
+
 option(EIGEN_TEST_NOQT "Disable Qt support in unit tests" OFF)
 if(NOT EIGEN_TEST_NOQT)
   find_package(Qt4)
@@ -166,6 +185,7 @@ ei_add_test(io)
 ei_add_test(packetmath "-DEIGEN_FAST_MATH=1")
 ei_add_test(vectorization_logic)
 ei_add_test(basicstuff)
+ei_add_test(constexpr)
 ei_add_test(constructor)
 ei_add_test(linearstructure)
 ei_add_test(integer_types)
@@ -187,6 +207,7 @@ ei_add_test(product_small)
 ei_add_test(product_large)
 ei_add_test(product_extra)
 ei_add_test(diagonalmatrices)
+ei_add_test(skew_symmetric_matrix3)
 ei_add_test(adjoint)
 ei_add_test(diagonal)
 ei_add_test(miscmatrices)
@@ -194,6 +215,7 @@ ei_add_test(commainitializer)
 ei_add_test(smallvectors)
 ei_add_test(mapped_matrix)
 ei_add_test(mapstride)
+ei_add_test(unaryviewstride)
 ei_add_test(mapstaticmethods)
 ei_add_test(array_cwise)
 ei_add_test(array_for_matrix)
@@ -285,10 +307,11 @@ ei_add_test(array_of_string)
 ei_add_test(num_dimensions)
 ei_add_test(stl_iterators)
 ei_add_test(blasutil)
-if(EIGEN_TEST_CXX11)
-  ei_add_test(initializer_list_construction)
-  ei_add_test(diagonal_matrix_variadic_ctor)
-endif()
+ei_add_test(random_matrix)
+ei_add_test(initializer_list_construction)
+ei_add_test(diagonal_matrix_variadic_ctor)
+ei_add_test(serializer)
+ei_add_test(tuple_test)
 
 add_executable(bug1213 bug1213.cpp bug1213_main.cpp)
 
@@ -302,7 +325,7 @@ else()
   endif()
 endif()
 
-ei_add_test(fastmath " ${EIGEN_FASTMATH_FLAGS} ")
+ei_add_test(fastmath "${EIGEN_FASTMATH_FLAGS}")
 
 # # ei_add_test(denseLM)
 
@@ -310,22 +333,6 @@ if(QT4_FOUND)
   ei_add_test(qtvector "" "${QT_QTCORE_LIBRARY}")
 endif()
 
-if(UMFPACK_FOUND)
-  ei_add_test(umfpack_support "" "${UMFPACK_ALL_LIBS}")
-endif()
-
-if(KLU_FOUND OR SuiteSparse_FOUND)
-  ei_add_test(klu_support "" "${KLU_ALL_LIBS}")
-endif()
-
-if(SUPERLU_FOUND)
-  ei_add_test(superlu_support "" "${SUPERLU_ALL_LIBS}")
-endif()
-
-if(CHOLMOD_FOUND)
-  ei_add_test(cholmod_support "" "${CHOLMOD_ALL_LIBS}")
-endif()
-
 if(PARDISO_FOUND)
   ei_add_test(pardiso_support "" "${PARDISO_ALL_LIBS}")
 endif()
@@ -334,7 +341,7 @@ if(PASTIX_FOUND AND (SCOTCH_FOUND OR METIS_FOUND))
   ei_add_test(pastix_support "" "${PASTIX_ALL_LIBS}")
 endif()
 
-if(SPQR_FOUND AND CHOLMOD_FOUND)
+if(SPQR_FOUND AND CHOLMOD_FOUND AND EIGEN_BUILD_BLAS AND EIGEN_BUILD_LAPACK)
   ei_add_test(spqr_support "" "${SPQR_ALL_LIBS}")
 endif()
 
@@ -342,6 +349,10 @@ if(METIS_FOUND)
 ei_add_test(metis_support "" "${METIS_LIBRARIES}")
 endif()
 
+if(Accelerate_FOUND)
+  ei_add_test(accelerate_support "" "${Accelerate_ALL_LIBS}")
+endif()
+
 string(TOLOWER "${CMAKE_CXX_COMPILER}" cmake_cxx_compiler_tolower)
 if(cmake_cxx_compiler_tolower MATCHES "qcc")
   set(CXX_IS_QCC "ON")
@@ -383,43 +394,51 @@ if(EIGEN_TEST_CUDA_CLANG AND NOT CMAKE_CXX_COMPILER MATCHES "clang")
   message(WARNING "EIGEN_TEST_CUDA_CLANG is set, but CMAKE_CXX_COMPILER does not appear to be clang.")
 endif()
 
-if(EIGEN_TEST_CUDA)
+find_package(CUDA 9.0)
+if(CUDA_FOUND AND EIGEN_TEST_CUDA)
+  # Make sure to compile without the -pedantic, -Wundef, -Wnon-virtual-dtor
+  # and -fno-check-new flags since they trigger thousands of compilation warnings
+  # in the CUDA runtime
+  string(REPLACE "-pedantic" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+  string(REPLACE "-Wundef" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+  string(REPLACE "-Wnon-virtual-dtor" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+  string(REPLACE "-fno-check-new" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")  
 
-find_package(CUDA 5.0)
-if(CUDA_FOUND)
-  
-  set(CUDA_PROPAGATE_HOST_FLAGS OFF)
-  
-  set(EIGEN_CUDA_RELAXED_CONSTEXPR "--expt-relaxed-constexpr")
-  if (${CUDA_VERSION} STREQUAL "7.0")
-    set(EIGEN_CUDA_RELAXED_CONSTEXPR "--relaxed-constexpr")
-  endif()
-  
-  if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") 
-    set(CUDA_NVCC_FLAGS "-ccbin ${CMAKE_C_COMPILER}" CACHE STRING "nvcc flags" FORCE)
-  endif()
   if(EIGEN_TEST_CUDA_CLANG)
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
     string(APPEND CMAKE_CXX_FLAGS " --cuda-path=${CUDA_TOOLKIT_ROOT_DIR}")
     foreach(GPU IN LISTS EIGEN_CUDA_COMPUTE_ARCH)
       string(APPEND CMAKE_CXX_FLAGS " --cuda-gpu-arch=sm_${GPU}")
     endforeach()
+    string(APPEND CMAKE_CXX_FLAGS " ${EIGEN_CUDA_CXX_FLAGS}")
   else()
-    foreach(GPU IN LISTS EIGEN_CUDA_COMPUTE_ARCH)
-      string(APPEND CUDA_NVCC_FLAGS " -gencode arch=compute_${GPU},code=sm_${GPU}")
+    set(CUDA_PROPAGATE_HOST_FLAGS OFF)
+    set(NVCC_ARCH_FLAGS)
+    # Define an -arch=sm_<arch>, otherwise if GPU does not exactly match one of
+    # those in the arch list for -gencode, the kernels will fail to run with
+    #    cudaErrorNoKernelImageForDevice
+    # This can happen with newer cards (e.g. sm_75) and compiling with older
+    # versions of nvcc (e.g. 9.2) that do not support their specific arch.
+    list(LENGTH EIGEN_CUDA_COMPUTE_ARCH EIGEN_CUDA_COMPUTE_ARCH_SIZE)
+    if(EIGEN_CUDA_COMPUTE_ARCH_SIZE)
+      list(GET EIGEN_CUDA_COMPUTE_ARCH 0 EIGEN_CUDA_COMPUTE_DEFAULT)
+      set(NVCC_ARCH_FLAGS " -arch=sm_${EIGEN_CUDA_COMPUTE_DEFAULT}")
+    endif()
+    foreach(ARCH IN LISTS EIGEN_CUDA_COMPUTE_ARCH)
+      string(APPEND NVCC_ARCH_FLAGS " -gencode arch=compute_${ARCH},code=sm_${ARCH}")
     endforeach()
+    set(CUDA_NVCC_FLAGS  "--expt-relaxed-constexpr -Xcudafe \"--display_error_number\" ${NVCC_ARCH_FLAGS} ${CUDA_NVCC_FLAGS} ${EIGEN_CUDA_CXX_FLAGS}")
+    cuda_include_directories("${CMAKE_CURRENT_BINARY_DIR}" "${CUDA_TOOLKIT_ROOT_DIR}/include")
   endif()
-  string(APPEND CUDA_NVCC_FLAGS " ${EIGEN_CUDA_RELAXED_CONSTEXPR}")
+  
   set(EIGEN_ADD_TEST_FILENAME_EXTENSION  "cu")
   
+  ei_add_test(gpu_example)
   ei_add_test(gpu_basic)
   
   unset(EIGEN_ADD_TEST_FILENAME_EXTENSION)
 
 endif()
 
-endif()
-
 
 # HIP unit tests
 option(EIGEN_TEST_HIP "Add HIP support." OFF)
@@ -442,6 +461,7 @@ if (EIGEN_TEST_HIP)
 
 	set(EIGEN_ADD_TEST_FILENAME_EXTENSION  "cu")
 	ei_add_test(gpu_basic)
+  ei_add_test(gpu_example)
 	unset(EIGEN_ADD_TEST_FILENAME_EXTENSION)
 	
       elseif ((${HIP_PLATFORM} STREQUAL "nvcc") OR (${HIP_PLATFORM} STREQUAL "nvidia"))
diff --git a/libs/eigen/test/OffByOneScalar.h b/libs/eigen/test/OffByOneScalar.h
new file mode 100644
index 0000000..c0371a6
--- /dev/null
+++ b/libs/eigen/test/OffByOneScalar.h
@@ -0,0 +1,28 @@
+
+// A Scalar with internal representation T+1 so that zero is internally
+// represented by T(1). This is used to test memory fill.
+// 
+template<typename T>
+class OffByOneScalar {
+ public:
+  OffByOneScalar() : val_(1) {}
+  OffByOneScalar(const OffByOneScalar& other) {
+    *this = other;
+  }
+  OffByOneScalar& operator=(const OffByOneScalar& other) {
+    val_ = other.val_;
+    return *this;
+  }
+  
+  OffByOneScalar(T val) : val_(val + 1) {}
+  OffByOneScalar& operator=(T val) {
+    val_ = val + 1;
+  }
+  
+  operator T() const {
+    return val_ - 1;
+  }
+ 
+ private:
+  T val_;
+};
diff --git a/libs/eigen/test/accelerate_support.cpp b/libs/eigen/test/accelerate_support.cpp
new file mode 100644
index 0000000..ac4be61
--- /dev/null
+++ b/libs/eigen/test/accelerate_support.cpp
@@ -0,0 +1,176 @@
+#define EIGEN_NO_DEBUG_SMALL_PRODUCT_BLOCKS
+#include "sparse_solver.h"
+
+#if defined(DEBUG)
+#undef DEBUG
+#endif
+
+#include <Eigen/AccelerateSupport>
+
+template<typename MatrixType,typename DenseMat>
+int generate_sparse_rectangular_problem(MatrixType& A, DenseMat& dA, int maxRows = 300, int maxCols = 300)
+{
+  typedef typename MatrixType::Scalar Scalar;
+  int rows = internal::random<int>(1, maxRows);
+  int cols = internal::random<int>(1, maxCols);
+  double density = (std::max)(8.0 / (rows * cols), 0.01);
+  
+  A.resize(rows,cols);
+  dA.resize(rows,cols);
+  initSparse<Scalar>(density, dA, A, ForceNonZeroDiag);
+  A.makeCompressed();
+  return rows;
+}
+
+template<typename MatrixType,typename DenseMat>
+int generate_sparse_square_symmetric_problem(MatrixType& A, DenseMat& dA, int maxSize = 300)
+{
+  typedef typename MatrixType::Scalar Scalar;
+  int rows = internal::random<int>(1, maxSize);
+  int cols = rows;
+  double density = (std::max)(8.0 / (rows * cols), 0.01);
+  
+  A.resize(rows,cols);
+  dA.resize(rows,cols);
+  initSparse<Scalar>(density, dA, A, ForceNonZeroDiag);
+  dA = dA * dA.transpose();
+  A  = A * A.transpose();
+  A.makeCompressed();
+  return rows;
+}
+
+template<typename Scalar, typename Solver> void test_accelerate_ldlt()
+{
+  typedef SparseMatrix<Scalar> MatrixType; 
+  typedef Matrix<Scalar,Dynamic,1> DenseVector;
+ 
+  MatrixType A;
+  Matrix<Scalar,Dynamic,Dynamic> dA;
+
+  generate_sparse_square_symmetric_problem(A, dA);
+
+  DenseVector b = DenseVector::Random(A.rows());
+
+  Solver solver;
+  solver.compute(A);
+
+  if (solver.info() != Success)
+  {
+    std::cerr << "sparse LDLT factorization failed\n";
+    exit(0);
+    return;
+  }
+
+  DenseVector x = solver.solve(b);
+
+  if (solver.info() != Success)
+  {
+    std::cerr << "sparse LDLT factorization failed\n";
+    exit(0);
+    return;
+  }
+
+  //Compare with a dense solver
+  DenseVector refX = dA.ldlt().solve(b);
+  VERIFY((A * x).isApprox(A * refX, test_precision<Scalar>()));
+}
+
+template<typename Scalar, typename Solver> void test_accelerate_llt()
+{
+  typedef SparseMatrix<Scalar> MatrixType; 
+  typedef Matrix<Scalar,Dynamic,1> DenseVector;
+ 
+  MatrixType A;
+  Matrix<Scalar,Dynamic,Dynamic> dA;
+
+  generate_sparse_square_symmetric_problem(A, dA);
+
+  DenseVector b = DenseVector::Random(A.rows());
+
+  Solver solver;
+  solver.compute(A);
+
+  if (solver.info() != Success)
+  {
+    std::cerr << "sparse LLT factorization failed\n";
+    exit(0);
+    return;
+  }
+
+  DenseVector x = solver.solve(b);
+
+  if (solver.info() != Success)
+  {
+    std::cerr << "sparse LLT factorization failed\n";
+    exit(0);
+    return;
+  }
+
+  //Compare with a dense solver
+  DenseVector refX = dA.llt().solve(b);
+  VERIFY((A * x).isApprox(A * refX, test_precision<Scalar>()));
+}
+
+template<typename Scalar, typename Solver> void test_accelerate_qr()
+{
+  typedef SparseMatrix<Scalar> MatrixType; 
+  typedef Matrix<Scalar,Dynamic,1> DenseVector;
+ 
+  MatrixType A;
+  Matrix<Scalar,Dynamic,Dynamic> dA;
+
+  generate_sparse_rectangular_problem(A, dA);
+
+  DenseVector b = DenseVector::Random(A.rows());
+
+  Solver solver;
+  solver.compute(A);
+
+  if (solver.info() != Success)
+  {
+    std::cerr << "sparse QR factorization failed\n";
+    exit(0);
+    return;
+  }
+
+  DenseVector x = solver.solve(b);
+
+  if (solver.info() != Success)
+  {
+    std::cerr << "sparse QR factorization failed\n";
+    exit(0);
+    return;
+  }
+
+  //Compare with a dense solver
+  DenseVector refX = dA.colPivHouseholderQr().solve(b);
+  VERIFY((A * x).isApprox(A * refX, test_precision<Scalar>()));
+}
+
+template<typename Scalar> 
+void run_tests()
+{
+  typedef SparseMatrix<Scalar> MatrixType; 
+
+  test_accelerate_ldlt<Scalar, AccelerateLDLT<MatrixType, Lower> >();
+  test_accelerate_ldlt<Scalar, AccelerateLDLTUnpivoted<MatrixType, Lower> >();
+  test_accelerate_ldlt<Scalar, AccelerateLDLTSBK<MatrixType, Lower> >();
+  test_accelerate_ldlt<Scalar, AccelerateLDLTTPP<MatrixType, Lower> >();
+
+  test_accelerate_ldlt<Scalar, AccelerateLDLT<MatrixType, Upper> >();
+  test_accelerate_ldlt<Scalar, AccelerateLDLTUnpivoted<MatrixType, Upper> >();
+  test_accelerate_ldlt<Scalar, AccelerateLDLTSBK<MatrixType, Upper> >();
+  test_accelerate_ldlt<Scalar, AccelerateLDLTTPP<MatrixType, Upper> >();
+
+  test_accelerate_llt<Scalar, AccelerateLLT<MatrixType, Lower> >();
+
+  test_accelerate_llt<Scalar, AccelerateLLT<MatrixType, Upper> >();
+
+  test_accelerate_qr<Scalar, AccelerateQR<MatrixType> >();
+}
+
+EIGEN_DECLARE_TEST(accelerate_support)
+{
+  CALL_SUBTEST_1(run_tests<float>());
+  CALL_SUBTEST_2(run_tests<double>());
+}
diff --git a/libs/eigen/test/adjoint.cpp b/libs/eigen/test/adjoint.cpp
index 4c4f98b..37d23b1 100644
--- a/libs/eigen/test/adjoint.cpp
+++ b/libs/eigen/test/adjoint.cpp
@@ -7,8 +7,6 @@
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
-#define EIGEN_NO_STATIC_ASSERT
-
 #include "main.h"
 
 template<bool IsInteger> struct adjoint_specific;
@@ -47,7 +45,7 @@ template<> struct adjoint_specific<false> {
     VERIFY_IS_APPROX((v1*0).normalized(), (v1*0));
 #if (!EIGEN_ARCH_i386) || defined(EIGEN_VECTORIZE)
     RealScalar very_small = (std::numeric_limits<RealScalar>::min)();
-    VERIFY( (v1*very_small).norm() == 0 );
+    VERIFY( numext::is_exactly_zero((v1*very_small).norm()) );
     VERIFY_IS_APPROX((v1*very_small).normalized(), (v1*very_small));
     v3 = v1*very_small;
     v3.normalize();
@@ -64,6 +62,17 @@ template<> struct adjoint_specific<false> {
   }
 };
 
+template<typename MatrixType, typename Scalar = typename MatrixType::Scalar>
+MatrixType RandomMatrix(Index rows, Index cols, Scalar min, Scalar max) {
+  MatrixType M = MatrixType(rows, cols);
+  for (Index i=0; i<rows; ++i) {
+    for (Index j=0; j<cols; ++j) {
+      M(i, j) = Eigen::internal::random<Scalar>(min, max);
+    }
+  }
+  return M;
+}
+
 template<typename MatrixType> void adjoint(const MatrixType& m)
 {
   /* this test covers the following files:
@@ -79,17 +88,21 @@ template<typename MatrixType> void adjoint(const MatrixType& m)
   Index rows = m.rows();
   Index cols = m.cols();
 
-  MatrixType m1 = MatrixType::Random(rows, cols),
-             m2 = MatrixType::Random(rows, cols),
+  // Avoid integer overflow by limiting input values.
+  RealScalar rmin = static_cast<RealScalar>(NumTraits<Scalar>::IsInteger ? NumTraits<Scalar>::IsSigned ? -100 : 0 : -1);
+  RealScalar rmax = static_cast<RealScalar>(NumTraits<Scalar>::IsInteger ? 100 : 1);
+
+  MatrixType m1 = RandomMatrix<MatrixType>(rows, cols, rmin, rmax),
+             m2 = RandomMatrix<MatrixType>(rows, cols, rmin, rmax),
              m3(rows, cols),
-             square = SquareMatrixType::Random(rows, rows);
-  VectorType v1 = VectorType::Random(rows),
-             v2 = VectorType::Random(rows),
-             v3 = VectorType::Random(rows),
+             square = RandomMatrix<SquareMatrixType>(rows, rows, rmin, rmax);
+  VectorType v1 = RandomMatrix<VectorType>(rows, 1, rmin, rmax),
+             v2 = RandomMatrix<VectorType>(rows, 1, rmin, rmax),
+             v3 = RandomMatrix<VectorType>(rows, 1, rmin, rmax),
              vzero = VectorType::Zero(rows);
 
-  Scalar s1 = internal::random<Scalar>(),
-         s2 = internal::random<Scalar>();
+  Scalar s1 = internal::random<Scalar>(rmin, rmax),
+         s2 = internal::random<Scalar>(rmin, rmax);
 
   // check basic compatibility of adjoint, transpose, conjugate
   VERIFY_IS_APPROX(m1.transpose().conjugate().adjoint(),    m1);
@@ -140,7 +153,8 @@ template<typename MatrixType> void adjoint(const MatrixType& m)
 
   // check mixed dot product
   typedef Matrix<RealScalar, MatrixType::RowsAtCompileTime, 1> RealVectorType;
-  RealVectorType rv1 = RealVectorType::Random(rows);
+  RealVectorType rv1 = RandomMatrix<RealVectorType>(rows, 1, rmin, rmax);
+  
   VERIFY_IS_APPROX(v1.dot(rv1.template cast<Scalar>()), v1.dot(rv1));
   VERIFY_IS_APPROX(rv1.template cast<Scalar>().dot(v1), rv1.dot(v1));
 
diff --git a/libs/eigen/test/array_cwise.cpp b/libs/eigen/test/array_cwise.cpp
index 0cc438b..af8a1ef 100644
--- a/libs/eigen/test/array_cwise.cpp
+++ b/libs/eigen/test/array_cwise.cpp
@@ -7,12 +7,22 @@
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
+#include <vector>
 #include "main.h"
 
+template <typename Scalar, std::enable_if_t<NumTraits<Scalar>::IsInteger,int> = 0>
+std::vector<Scalar> special_values() {
+  const Scalar zero = Scalar(0);
+  const Scalar one = Scalar(1);
+  const Scalar two = Scalar(2);
+  const Scalar three = Scalar(3);
+  const Scalar min = (std::numeric_limits<Scalar>::min)();
+  const Scalar max = (std::numeric_limits<Scalar>::max)();
+  return { zero, min, one, two, three, max };
+}
 
-// Test the corner cases of pow(x, y) for real types.
-template<typename Scalar>
-void pow_test() {
+template <typename Scalar, std::enable_if_t<!NumTraits<Scalar>::IsInteger, int> = 0>
+std::vector<Scalar> special_values() {
   const Scalar zero = Scalar(0);
   const Scalar eps = Eigen::NumTraits<Scalar>::epsilon();
   const Scalar one = Scalar(1);
@@ -26,36 +36,29 @@ void pow_test() {
   const Scalar min = (std::numeric_limits<Scalar>::min)();
   const Scalar max = (std::numeric_limits<Scalar>::max)();
   const Scalar max_exp = (static_cast<Scalar>(int(Eigen::NumTraits<Scalar>::max_exponent())) * Scalar(EIGEN_LN2)) / eps;
+  return { zero, denorm_min, min, eps, sqrt_half, one, sqrt2, two, three, max_exp, max, inf, nan };
+}
 
-  const static Scalar abs_vals[] = {zero,
-                                    denorm_min,
-                                    min,
-                                    eps,
-                                    sqrt_half,
-                                    one,
-                                    sqrt2,
-                                    two,
-                                    three,
-                                    max_exp,
-                                    max,
-                                    inf,
-                                    nan};
-  const int abs_cases = 13;
-  const int num_cases = 2*abs_cases * 2*abs_cases;
-  // Repeat the same value to make sure we hit the vectorized path.
-  const int num_repeats = 32;
-  Array<Scalar, Dynamic, Dynamic> x(num_repeats, num_cases);
-  Array<Scalar, Dynamic, Dynamic> y(num_repeats, num_cases);
+template<typename Scalar>
+void special_value_pairs(Array<Scalar, Dynamic, Dynamic>& x,
+                         Array<Scalar, Dynamic, Dynamic>& y) {
+  std::vector<Scalar> abs_vals = special_values<Scalar>();
+  const Index abs_cases = (Index)abs_vals.size();
+  const Index num_cases = 2*abs_cases * 2*abs_cases;
+  // ensure both vectorized and non-vectorized paths taken
+  const Index num_repeats = 2 * (Index)internal::packet_traits<Scalar>::size + 1;
+  x.resize(num_repeats, num_cases);
+  y.resize(num_repeats, num_cases);
   int count = 0;
-  for (int i = 0; i < abs_cases; ++i) {
+  for (Index i = 0; i < abs_cases; ++i) {
     const Scalar abs_x = abs_vals[i];
-    for (int sign_x = 0; sign_x < 2; ++sign_x) {
+    for (Index sign_x = 0; sign_x < 2; ++sign_x) {
       Scalar x_case = sign_x == 0 ? -abs_x : abs_x;
-      for (int j = 0; j < abs_cases; ++j) {
+      for (Index j = 0; j < abs_cases; ++j) {
         const Scalar abs_y = abs_vals[j];
-        for (int sign_y = 0; sign_y < 2; ++sign_y) {
+        for (Index sign_y = 0; sign_y < 2; ++sign_y) {
           Scalar y_case = sign_y == 0 ? -abs_y : abs_y;
-          for (int repeat = 0; repeat < num_repeats; ++repeat) {
+          for (Index repeat = 0; repeat < num_repeats; ++repeat) {
             x(repeat, count) = x_case;
             y(repeat, count) = y_case;
           }
@@ -64,24 +67,266 @@ void pow_test() {
       }
     }
   }
+}
 
-  Array<Scalar, Dynamic, Dynamic> actual = x.pow(y);
+template <typename Scalar, typename Fn, typename RefFn>
+void binary_op_test(std::string name, Fn fun, RefFn ref) {
   const Scalar tol = test_precision<Scalar>();
+  Array<Scalar, Dynamic, Dynamic> x;
+  Array<Scalar, Dynamic, Dynamic> y;
+  special_value_pairs(x, y);
+
+  Array<Scalar, Dynamic, Dynamic> actual = fun(x, y);
   bool all_pass = true;
-  for (int i = 0; i < 1; ++i) {
-    for (int j = 0; j < num_cases; ++j) {
-      Scalar e = static_cast<Scalar>(std::pow(x(i,j), y(i,j)));
+  for (Index i = 0; i < x.rows(); ++i) {
+    for (Index j = 0; j < x.cols(); ++j) {
+      Scalar e = static_cast<Scalar>(ref(x(i,j), y(i,j)));
       Scalar a = actual(i, j);
-      bool fail = !(a==e) && !internal::isApprox(a, e, tol) && !((numext::isnan)(a) && (numext::isnan)(e));
-      all_pass &= !fail;
-      if (fail) {
-        std::cout << "pow(" << x(i,j) << "," << y(i,j) << ")   =   " << a << " !=  " << e << std::endl;
+      bool success = (a==e) || ((numext::isfinite)(e) && internal::isApprox(a, e, tol)) || ((numext::isnan)(a) && (numext::isnan)(e));
+      all_pass &= success;
+      if (!success) {
+        std::cout << name << "(" << x(i,j) << "," << y(i,j) << ") = " << a << " !=  " << e << std::endl;
       }
     }
   }
   VERIFY(all_pass);
 }
 
+template <typename Scalar>
+void binary_ops_test() {
+  binary_op_test<Scalar>("pow",
+                         [](auto x, auto y) { return Eigen::pow(x, y); },
+                         [](auto x, auto y) { return std::pow(x, y); });
+  binary_op_test<Scalar>("atan2",
+                         [](auto x, auto y) { return Eigen::atan2(x, y); },
+                         [](auto x, auto y) { return std::atan2(x, y); });
+}
+
+template <typename Scalar>
+void pow_scalar_exponent_test() {
+  using Int_t = typename internal::make_integer<Scalar>::type;
+  const Scalar tol = test_precision<Scalar>();
+
+  std::vector<Scalar> abs_vals = special_values<Scalar>();
+  const Index num_vals = (Index)abs_vals.size();
+  Map<Array<Scalar, Dynamic, 1>> bases(abs_vals.data(), num_vals);
+
+  bool all_pass = true;
+  for (Scalar abs_exponent : abs_vals) {
+    for (Scalar exponent : {-abs_exponent, abs_exponent}) {
+      // test integer exponent code path
+      bool exponent_is_integer = (numext::isfinite)(exponent) && (numext::round(exponent) == exponent) &&
+                                 (numext::abs(exponent) < static_cast<Scalar>(NumTraits<Int_t>::highest()));
+      if (exponent_is_integer) {
+        Int_t exponent_as_int = static_cast<Int_t>(exponent);
+        Array<Scalar, Dynamic, 1> eigenPow = bases.pow(exponent_as_int);
+        for (Index j = 0; j < num_vals; j++) {
+          Scalar e = static_cast<Scalar>(std::pow(bases(j), exponent));
+          Scalar a = eigenPow(j);
+          bool success = (a == e) || ((numext::isfinite)(e) && internal::isApprox(a, e, tol)) ||
+                         ((numext::isnan)(a) && (numext::isnan)(e));
+          all_pass &= success;
+          if (!success) {
+            std::cout << "pow(" << bases(j) << "," << exponent << ") = " << a << " !=  " << e << std::endl;
+          }
+        }
+      } else {
+        // test floating point exponent code path
+        Array<Scalar, Dynamic, 1> eigenPow = bases.pow(exponent);
+        for (Index j = 0; j < num_vals; j++) {
+          Scalar e = static_cast<Scalar>(std::pow(bases(j), exponent));
+          Scalar a = eigenPow(j);
+          bool success = (a == e) || ((numext::isfinite)(e) && internal::isApprox(a, e, tol)) ||
+                         ((numext::isnan)(a) && (numext::isnan)(e));
+          all_pass &= success;
+          if (!success) {
+            std::cout << "pow(" << bases(j) << "," << exponent << ")   =   " << a << " !=  " << e << std::endl;
+          }
+        }
+      }
+    }
+  }
+  VERIFY(all_pass);
+}
+
+template <typename Scalar, typename ScalarExponent>
+Scalar calc_overflow_threshold(const ScalarExponent exponent) {
+    EIGEN_USING_STD(exp2);
+    EIGEN_USING_STD(log2);
+    EIGEN_STATIC_ASSERT((NumTraits<Scalar>::digits() < 2 * NumTraits<double>::digits()), BASE_TYPE_IS_TOO_BIG);
+
+    if (exponent < 2)
+        return NumTraits<Scalar>::highest();
+    else {
+        // base^e <= highest ==> base <= 2^(log2(highest)/e)
+        // For floating-point types, consider the bound for integer values that can be reproduced exactly = 2 ^ digits
+        double highest_bits = numext::mini(static_cast<double>(NumTraits<Scalar>::digits()),
+                                           static_cast<double>(log2(NumTraits<Scalar>::highest())));
+        return static_cast<Scalar>(
+          numext::floor(exp2(highest_bits / static_cast<double>(exponent))));
+    }
+}
+
+template <typename Base, typename Exponent, bool ExpIsInteger = NumTraits<Exponent>::IsInteger>
+struct ref_pow {
+  static Base run(Base base, Exponent exponent) {
+    EIGEN_USING_STD(pow);
+    return pow(base, static_cast<Base>(exponent));
+  }
+};
+
+template <typename Base, typename Exponent>
+struct ref_pow<Base, Exponent, true> {
+  static Base run(Base base, Exponent exponent) {
+    EIGEN_USING_STD(pow);
+    return pow(base, exponent);
+  }
+};
+
+template <typename Base, typename Exponent>
+void test_exponent(Exponent exponent) {
+  const Base max_abs_bases = static_cast<Base>(10000);
+  // avoid integer overflow in Base type
+  Base threshold = calc_overflow_threshold<Base, Exponent>(numext::abs(exponent));
+  // avoid numbers that can't be verified with std::pow
+  double double_threshold = calc_overflow_threshold<double, Exponent>(numext::abs(exponent));
+  // use the lesser of these two thresholds
+  Base testing_threshold =
+      static_cast<double>(threshold) < double_threshold ? threshold : static_cast<Base>(double_threshold);
+  // test both vectorized and non-vectorized code paths
+  const Index array_size = 2 * internal::packet_traits<Base>::size + 1;
+
+  Base max_base = numext::mini(testing_threshold, max_abs_bases);
+  Base min_base = NumTraits<Base>::IsSigned ? -max_base : Base(0);
+
+  ArrayX<Base> x(array_size), y(array_size);
+  bool all_pass = true;
+  for (Base base = min_base; base <= max_base; base++) {
+    if (exponent < 0 && base == 0) continue;
+    x.setConstant(base);
+    y = x.pow(exponent);
+    for (Base a : y) {
+      Base e = ref_pow<Base, Exponent>::run(base, exponent);
+      bool pass = (a == e);
+      if (!NumTraits<Base>::IsInteger) {
+        pass = pass || (((numext::isfinite)(e) && internal::isApprox(a, e)) ||
+                        ((numext::isnan)(a) && (numext::isnan)(e)));
+      }
+      all_pass &= pass;
+      if (!pass) {
+        std::cout << "pow(" << base << "," << exponent << ")   =   " << a << " !=  " << e << std::endl;
+      }
+    }
+  }
+  VERIFY(all_pass);
+}
+
+template <typename Base, typename Exponent>
+void unary_pow_test() {
+  Exponent max_exponent = static_cast<Exponent>(NumTraits<Base>::digits());
+  Exponent min_exponent = static_cast<Exponent>(NumTraits<Exponent>::IsSigned ? -max_exponent : 0);
+
+  for (Exponent exponent = min_exponent; exponent < max_exponent; ++exponent) {
+    test_exponent<Base, Exponent>(exponent);
+  }
+}
+
+void mixed_pow_test() {
+  // The following cases will test promoting a smaller exponent type
+  // to a wider base type.
+  unary_pow_test<double, int>();
+  unary_pow_test<double, float>();
+  unary_pow_test<float, half>();
+  unary_pow_test<double, half>();
+  unary_pow_test<float, bfloat16>();
+  unary_pow_test<double, bfloat16>();
+
+  // Although in the following cases the exponent cannot be represented exactly
+  // in the base type, we do not perform a conversion, but implement
+  // the operation using repeated squaring.
+  unary_pow_test<float, int>();
+  unary_pow_test<double, long long>();
+
+  // The following cases will test promoting a wider exponent type
+  // to a narrower base type. This should compile but generate a
+  // deprecation warning:
+  unary_pow_test<float, double>();
+}
+
+void int_pow_test() {
+  unary_pow_test<int, int>();
+  unary_pow_test<unsigned int, unsigned int>();
+  unary_pow_test<long long, long long>();
+  unary_pow_test<unsigned long long, unsigned long long>();
+
+  // Although in the following cases the exponent cannot be represented exactly
+  // in the base type, we do not perform a conversion, but implement the
+  // operation using repeated squaring.
+  unary_pow_test<long long, int>();
+  unary_pow_test<int, unsigned int>();
+  unary_pow_test<unsigned int, int>();
+  unary_pow_test<long long, unsigned long long>();
+  unary_pow_test<unsigned long long, long long>();
+  unary_pow_test<long long, int>();
+}
+
+namespace Eigen {
+namespace internal {
+template <typename Scalar>
+struct test_signbit_op {
+  Scalar constexpr operator()(const Scalar& a) const { return numext::signbit(a); }
+  template <typename Packet>
+  inline Packet packetOp(const Packet& a) const {
+    return psignbit(a);
+  }
+};
+template <typename Scalar>
+struct functor_traits<test_signbit_op<Scalar>> {
+  enum { Cost = 1, PacketAccess = true }; //todo: define HasSignbit flag
+};
+}  // namespace internal
+}  // namespace Eigen
+
+
+template <typename Scalar>
+void signbit_test() {
+  const size_t size = 100 * internal::packet_traits<Scalar>::size;
+  ArrayX<Scalar> x(size), y(size);
+  x.setRandom();
+  std::vector<Scalar> special_vals = special_values<Scalar>();
+  for (size_t i = 0; i < special_vals.size(); i++) {
+    x(2 * i + 0) = special_vals[i];
+    x(2 * i + 1) = -special_vals[i];
+  }
+  y = x.unaryExpr(internal::test_signbit_op<Scalar>());
+
+  bool all_pass = true;
+  for (size_t i = 0; i < size; i++) {
+    const Scalar ref_val = numext::signbit(x(i));
+    bool not_same = internal::predux_any(internal::bitwise_helper<Scalar>::bitwise_xor(ref_val, y(i)));
+    if (not_same) std::cout << "signbit(" << x(i) << ") != " << y(i) << "\n";
+    all_pass = all_pass && !not_same;
+  }
+
+  VERIFY(all_pass);
+}
+void signbit_tests() {
+  signbit_test<float>();
+  signbit_test<double>();
+  signbit_test<Eigen::half>();
+  signbit_test<Eigen::bfloat16>();
+
+  signbit_test<uint8_t>();
+  signbit_test<uint16_t>();
+  signbit_test<uint32_t>();
+  signbit_test<uint64_t>();
+
+  signbit_test<int8_t>();
+  signbit_test<int16_t>();
+  signbit_test<int32_t>();
+  signbit_test<int64_t>();
+}
+
 template<typename ArrayType> void array(const ArrayType& m)
 {
   typedef typename ArrayType::Scalar Scalar;
@@ -92,8 +337,20 @@ template<typename ArrayType> void array(const ArrayType& m)
   Index rows = m.rows();
   Index cols = m.cols();
 
-  ArrayType m1 = ArrayType::Random(rows, cols),
-             m2 = ArrayType::Random(rows, cols),
+  ArrayType m1 = ArrayType::Random(rows, cols);
+  if (NumTraits<RealScalar>::IsInteger && NumTraits<RealScalar>::IsSigned
+      && !NumTraits<Scalar>::IsComplex) {
+    // Here we cap the size of the values in m1 such that pow(3)/cube()
+    // doesn't overflow and result in undefined behavior. Notice that because
+    // pow(int, int) promotes its inputs and output to double (according to
+    // the C++ standard), we have to make sure that the result fits in 53 bits
+    // for int64,
+    RealScalar max_val =
+        numext::mini(RealScalar(std::cbrt(NumTraits<RealScalar>::highest())),
+                     RealScalar(std::cbrt(1LL << 53)))/2;
+    m1.array() = (m1.abs().array() <= max_val).select(m1, Scalar(max_val));
+  }
+  ArrayType  m2 = ArrayType::Random(rows, cols),
              m3(rows, cols);
   ArrayType m4 = m1; // copy constructor
   VERIFY_IS_APPROX(m1, m4);
@@ -119,23 +376,23 @@ template<typename ArrayType> void array(const ArrayType& m)
   VERIFY_IS_APPROX(m3, m1 - s1);
 
   // scalar operators via Maps
-  m3 = m1;
-  ArrayType::Map(m1.data(), m1.rows(), m1.cols()) -= ArrayType::Map(m2.data(), m2.rows(), m2.cols());
-  VERIFY_IS_APPROX(m1, m3 - m2);
+  m3 = m1;  m4 = m1;
+  ArrayType::Map(m4.data(), m4.rows(), m4.cols()) -= ArrayType::Map(m2.data(), m2.rows(), m2.cols());
+  VERIFY_IS_APPROX(m4, m3 - m2);
 
-  m3 = m1;
-  ArrayType::Map(m1.data(), m1.rows(), m1.cols()) += ArrayType::Map(m2.data(), m2.rows(), m2.cols());
-  VERIFY_IS_APPROX(m1, m3 + m2);
+  m3 = m1;  m4 = m1;
+  ArrayType::Map(m4.data(), m4.rows(), m4.cols()) += ArrayType::Map(m2.data(), m2.rows(), m2.cols());
+  VERIFY_IS_APPROX(m4, m3 + m2);
 
-  m3 = m1;
-  ArrayType::Map(m1.data(), m1.rows(), m1.cols()) *= ArrayType::Map(m2.data(), m2.rows(), m2.cols());
-  VERIFY_IS_APPROX(m1, m3 * m2);
+  m3 = m1; m4 = m1;
+  ArrayType::Map(m4.data(), m4.rows(), m4.cols()) *= ArrayType::Map(m2.data(), m2.rows(), m2.cols());
+  VERIFY_IS_APPROX(m4, m3 * m2);
 
-  m3 = m1;
+  m3 = m1; m4 = m1;
   m2 = ArrayType::Random(rows,cols);
   m2 = (m2==0).select(1,m2);
-  ArrayType::Map(m1.data(), m1.rows(), m1.cols()) /= ArrayType::Map(m2.data(), m2.rows(), m2.cols());
-  VERIFY_IS_APPROX(m1, m3 / m2);
+  ArrayType::Map(m4.data(), m4.rows(), m4.cols()) /= ArrayType::Map(m2.data(), m2.rows(), m2.cols());
+  VERIFY_IS_APPROX(m4, m3 / m2);
 
   // reductions
   VERIFY_IS_APPROX(m1.abs().colwise().sum().sum(), m1.abs().sum());
@@ -176,7 +433,6 @@ template<typename ArrayType> void array(const ArrayType& m)
     FixedArrayType f4(f1.data());
     VERIFY_IS_APPROX(f4, f1);
   }
-  #if EIGEN_HAS_CXX11
   {
     FixedArrayType f1{s1};
     VERIFY_IS_APPROX(f1, FixedArrayType::Constant(s1));
@@ -188,7 +444,6 @@ template<typename ArrayType> void array(const ArrayType& m)
     FixedArrayType f4{f1.data()};
     VERIFY_IS_APPROX(f4, f1);
   }
-  #endif
 
   // pow
   VERIFY_IS_APPROX(m1.pow(2), m1.square());
@@ -214,14 +469,12 @@ template<typename ArrayType> void array(const ArrayType& m)
     OneDArrayType o2(static_cast<int>(rows));
     VERIFY(o2.size()==rows);
   }
-  #if EIGEN_HAS_CXX11
   {
     OneDArrayType o1{rows};
     VERIFY(o1.size()==rows);
     OneDArrayType o4{int(rows)};
     VERIFY(o4.size()==rows);
   }
-  #endif
   // Check possible conflicts with 2D ctor
   typedef Array<Scalar, Dynamic, Dynamic> TwoDArrayType;
   typedef Array<Scalar, 2, 1> ArrayType2;
@@ -238,7 +491,6 @@ template<typename ArrayType> void array(const ArrayType& m)
     ArrayType2 o4(static_cast<int>(rows),static_cast<int>(cols));
     VERIFY(o4(0)==Scalar(rows) && o4(1)==Scalar(cols));
   }
-  #if EIGEN_HAS_CXX11
   {
     TwoDArrayType o1{rows,cols};
     VERIFY(o1.rows()==rows);
@@ -252,7 +504,6 @@ template<typename ArrayType> void array(const ArrayType& m)
     ArrayType2 o4{int(rows),int(cols)};
     VERIFY(o4(0)==Scalar(rows) && o4(1)==Scalar(cols));
   }
-  #endif
 }
 
 template<typename ArrayType> void comparisons(const ArrayType& m)
@@ -360,11 +611,11 @@ template<typename ArrayType> void array_real(const ArrayType& m)
   VERIFY_IS_APPROX(m1.sinh(), sinh(m1));
   VERIFY_IS_APPROX(m1.cosh(), cosh(m1));
   VERIFY_IS_APPROX(m1.tanh(), tanh(m1));
-#if EIGEN_HAS_CXX11_MATH
+  VERIFY_IS_APPROX(m1.atan2(m2), atan2(m1,m2));
+
   VERIFY_IS_APPROX(m1.tanh().atanh(), atanh(tanh(m1)));
   VERIFY_IS_APPROX(m1.sinh().asinh(), asinh(sinh(m1)));
   VERIFY_IS_APPROX(m1.cosh().acosh(), acosh(cosh(m1)));
-#endif
   VERIFY_IS_APPROX(m1.logistic(), logistic(m1));
 
   VERIFY_IS_APPROX(m1.arg(), arg(m1));
@@ -421,6 +672,13 @@ template<typename ArrayType> void array_real(const ArrayType& m)
   VERIFY_IS_APPROX( m1.sign(), -(-m1).sign() );
   VERIFY_IS_APPROX( m1*m1.sign(),m1.abs());
   VERIFY_IS_APPROX(m1.sign() * m1.abs(), m1);
+  
+  ArrayType tmp = m1.atan2(m2);
+  for (Index i = 0; i < tmp.size(); ++i) {
+    Scalar actual = tmp.array()(i);
+    Scalar expected = atan2(m1.array()(i), m2.array()(i));
+    VERIFY_IS_APPROX(actual, expected);
+  }
 
   VERIFY_IS_APPROX(numext::abs2(numext::real(m1)) + numext::abs2(numext::imag(m1)), numext::abs2(m1));
   VERIFY_IS_APPROX(numext::abs2(Eigen::real(m1)) + numext::abs2(Eigen::imag(m1)), numext::abs2(m1));
@@ -448,7 +706,10 @@ template<typename ArrayType> void array_real(const ArrayType& m)
   // Avoid inf and NaN.
   m3 = (m1.square()<NumTraits<Scalar>::epsilon()).select(Scalar(1),m3);
   VERIFY_IS_APPROX(m3.pow(RealScalar(-2)), m3.square().inverse());
-  pow_test<Scalar>();
+
+  // Test pow and atan2 on special IEEE values.
+  binary_ops_test<Scalar>();
+  pow_scalar_exponent_test<Scalar>();
 
   VERIFY_IS_APPROX(log10(m3), log(m3)/numext::log(Scalar(10)));
   VERIFY_IS_APPROX(log2(m3), log(m3)/numext::log(Scalar(2)));
@@ -457,7 +718,7 @@ template<typename ArrayType> void array_real(const ArrayType& m)
   const RealScalar tiny = sqrt(std::numeric_limits<RealScalar>::epsilon());
   s1 += Scalar(tiny);
   m1 += ArrayType::Constant(rows,cols,Scalar(tiny));
-  VERIFY_IS_APPROX(s1/m1, s1 * m1.inverse());
+  VERIFY_IS_CWISE_APPROX(s1/m1, s1 * m1.inverse());
 
   // check inplace transpose
   m3 = m1;
@@ -467,6 +728,7 @@ template<typename ArrayType> void array_real(const ArrayType& m)
   VERIFY_IS_APPROX(m3, m1);
 }
 
+
 template<typename ArrayType> void array_complex(const ArrayType& m)
 {
   typedef typename ArrayType::Scalar Scalar;
@@ -512,7 +774,6 @@ template<typename ArrayType> void array_complex(const ArrayType& m)
   VERIFY_IS_APPROX(cos(m1+RealScalar(3)*m2), cos((m1+RealScalar(3)*m2).eval()));
   VERIFY_IS_APPROX(m1.sign(), sign(m1));
 
-
   VERIFY_IS_APPROX(m1.exp() * m2.exp(), exp(m1+m2));
   VERIFY_IS_APPROX(m1.exp(), exp(m1));
   VERIFY_IS_APPROX(m1.exp() / m2.exp(),(m1-m2).exp());
@@ -661,6 +922,35 @@ template<typename ArrayType> void array_integer(const ArrayType& m)
   VERIFY( (m2 == m1.unaryExpr(arithmetic_shift_right<9>())).all() );
 }
 
+template <typename ArrayType>
+struct signed_shift_test_impl {
+  typedef typename ArrayType::Scalar Scalar;
+  static constexpr size_t Size = sizeof(Scalar);
+  static constexpr size_t MaxShift = (CHAR_BIT * Size) - 1;
+
+  template <size_t N = 0>
+  static inline std::enable_if_t<(N >  MaxShift), void> run(const ArrayType&  ) {}
+  template <size_t N = 0>
+  static inline std::enable_if_t<(N <= MaxShift), void> run(const ArrayType& m) {
+    const Index rows = m.rows();
+    const Index cols = m.cols();
+
+    ArrayType m1 = ArrayType::Random(rows, cols), m2(rows, cols);
+
+    m2 = m1.unaryExpr([](const Scalar& x) { return x >> N; });
+    VERIFY((m2 == m1.unaryExpr(internal::scalar_shift_right_op<Scalar, N>())).all());
+
+    m2 = m1.unaryExpr([](const Scalar& x) { return x << N; });
+    VERIFY((m2 == m1.unaryExpr( internal::scalar_shift_left_op<Scalar, N>())).all());
+
+    run<N + 1>(m);
+  }
+};
+template <typename ArrayType>
+void signed_shift_test(const ArrayType& m) {
+    signed_shift_test_impl<ArrayType>::run(m);
+}
+
 EIGEN_DECLARE_TEST(array_cwise)
 {
   for(int i = 0; i < g_repeat; i++) {
@@ -673,6 +963,9 @@ EIGEN_DECLARE_TEST(array_cwise)
     CALL_SUBTEST_6( array(Array<Index,Dynamic,Dynamic>(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
     CALL_SUBTEST_6( array_integer(ArrayXXi(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
     CALL_SUBTEST_6( array_integer(Array<Index,Dynamic,Dynamic>(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
+    CALL_SUBTEST_7( signed_shift_test(ArrayXXi(internal::random<int>(1, EIGEN_TEST_MAX_SIZE), internal::random<int>(1, EIGEN_TEST_MAX_SIZE))));
+    CALL_SUBTEST_7( signed_shift_test(Array<Index, Dynamic, Dynamic>(internal::random<int>(1, EIGEN_TEST_MAX_SIZE), internal::random<int>(1, EIGEN_TEST_MAX_SIZE))));
+
   }
   for(int i = 0; i < g_repeat; i++) {
     CALL_SUBTEST_1( comparisons(Array<float, 1, 1>()) );
@@ -700,6 +993,12 @@ EIGEN_DECLARE_TEST(array_cwise)
     CALL_SUBTEST_4( array_complex(ArrayXXcf(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
   }
 
+  for(int i = 0; i < g_repeat; i++) {
+    CALL_SUBTEST_6( int_pow_test() );
+    CALL_SUBTEST_7( mixed_pow_test() );
+    CALL_SUBTEST_8( signbit_tests() );
+  }
+
   VERIFY((internal::is_same< internal::global_math_functions_filtering_base<int>::type, int >::value));
   VERIFY((internal::is_same< internal::global_math_functions_filtering_base<float>::type, float >::value));
   VERIFY((internal::is_same< internal::global_math_functions_filtering_base<Array2i>::type, ArrayBase<Array2i> >::value));
diff --git a/libs/eigen/test/array_for_matrix.cpp b/libs/eigen/test/array_for_matrix.cpp
index fb6be35..06e04a2 100644
--- a/libs/eigen/test/array_for_matrix.cpp
+++ b/libs/eigen/test/array_for_matrix.cpp
@@ -211,6 +211,40 @@ template<typename MatrixType> void cwise_min_max(const MatrixType& m)
   VERIFY_IS_APPROX(MatrixType::Constant(rows,cols, maxM1).array(), (m1.array().max)( maxM1));
   VERIFY_IS_APPROX(m1.array(), (m1.array().max)( minM1));
 
+  // Test NaN propagation for min/max.
+  if (!NumTraits<Scalar>::IsInteger) {
+    m1(0,0) = NumTraits<Scalar>::quiet_NaN();
+    // Elementwise.
+    VERIFY((numext::isnan)(m1.template cwiseMax<PropagateNaN>(MatrixType::Constant(rows,cols, Scalar(1)))(0,0)));
+    VERIFY((numext::isnan)(m1.template cwiseMin<PropagateNaN>(MatrixType::Constant(rows,cols, Scalar(1)))(0,0)));
+    VERIFY(!(numext::isnan)(m1.template cwiseMax<PropagateNumbers>(MatrixType::Constant(rows,cols, Scalar(1)))(0,0)));
+    VERIFY(!(numext::isnan)(m1.template cwiseMin<PropagateNumbers>(MatrixType::Constant(rows,cols, Scalar(1)))(0,0)));
+    VERIFY((numext::isnan)(m1.template cwiseMax<PropagateNaN>(Scalar(1))(0,0)));
+    VERIFY((numext::isnan)(m1.template cwiseMin<PropagateNaN>(Scalar(1))(0,0)));
+    VERIFY(!(numext::isnan)(m1.template cwiseMax<PropagateNumbers>(Scalar(1))(0,0)));
+    VERIFY(!(numext::isnan)(m1.template cwiseMin<PropagateNumbers>(Scalar(1))(0,0)));
+
+
+    VERIFY((numext::isnan)(m1.array().template max<PropagateNaN>(MatrixType::Constant(rows,cols, Scalar(1)).array())(0,0)));
+    VERIFY((numext::isnan)(m1.array().template min<PropagateNaN>(MatrixType::Constant(rows,cols, Scalar(1)).array())(0,0)));
+    VERIFY(!(numext::isnan)(m1.array().template max<PropagateNumbers>(MatrixType::Constant(rows,cols, Scalar(1)).array())(0,0)));
+    VERIFY(!(numext::isnan)(m1.array().template min<PropagateNumbers>(MatrixType::Constant(rows,cols, Scalar(1)).array())(0,0)));
+    VERIFY((numext::isnan)(m1.array().template max<PropagateNaN>(Scalar(1))(0,0)));
+    VERIFY((numext::isnan)(m1.array().template min<PropagateNaN>(Scalar(1))(0,0)));
+    VERIFY(!(numext::isnan)(m1.array().template max<PropagateNumbers>(Scalar(1))(0,0)));
+    VERIFY(!(numext::isnan)(m1.array().template min<PropagateNumbers>(Scalar(1))(0,0)));
+
+    // Reductions.
+    VERIFY((numext::isnan)(m1.template maxCoeff<PropagateNaN>()));
+    VERIFY((numext::isnan)(m1.template minCoeff<PropagateNaN>()));
+    if (m1.size() > 1) {
+      VERIFY(!(numext::isnan)(m1.template maxCoeff<PropagateNumbers>()));
+      VERIFY(!(numext::isnan)(m1.template minCoeff<PropagateNumbers>()));
+    } else {
+      VERIFY((numext::isnan)(m1.template maxCoeff<PropagateNumbers>()));
+      VERIFY((numext::isnan)(m1.template minCoeff<PropagateNumbers>()));
+    }
+  }
 }
 
 template<typename MatrixTraits> void resize(const MatrixTraits& t)
diff --git a/libs/eigen/test/basicstuff.cpp b/libs/eigen/test/basicstuff.cpp
index 4ca607c..47dfc04 100644
--- a/libs/eigen/test/basicstuff.cpp
+++ b/libs/eigen/test/basicstuff.cpp
@@ -7,11 +7,20 @@
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
-#define EIGEN_NO_STATIC_ASSERT
-
 #include "main.h"
 #include "random_without_cast_overflow.h"
 
+template <typename MatrixType>
+std::enable_if_t<(MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1),void>
+check_index(const MatrixType& m) {
+  VERIFY_RAISES_ASSERT(m[0]);
+  VERIFY_RAISES_ASSERT((m+m)[0]);
+}
+
+template <typename MatrixType>
+std::enable_if_t<!(MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1),void>
+check_index(const MatrixType& /*unused*/) {}
+
 template<typename MatrixType> void basicStuff(const MatrixType& m)
 {
   typedef typename MatrixType::Scalar Scalar;
@@ -60,10 +69,8 @@ template<typename MatrixType> void basicStuff(const MatrixType& m)
   x = v1(static_cast<unsigned int>(r1));
   x = v1(static_cast<signed long>(r1));
   x = v1(static_cast<unsigned long>(r1));
-#if EIGEN_HAS_CXX11
   x = v1(static_cast<long long int>(r1));
   x = v1(static_cast<unsigned long long int>(r1));
-#endif
 
   VERIFY_IS_APPROX(               v1,    v1);
   VERIFY_IS_NOT_APPROX(           v1,    2*v1);
@@ -101,8 +108,7 @@ template<typename MatrixType> void basicStuff(const MatrixType& m)
 
   if(cols!=1 && rows!=1)
   {
-    VERIFY_RAISES_ASSERT(m1[0]);
-    VERIFY_RAISES_ASSERT((m1+m1)[0]);
+    check_index(m1);
   }
 
   VERIFY_IS_APPROX(m3 = m1,m1);
@@ -223,10 +229,8 @@ struct casting_test_runner {
     casting_test<SrcScalar, uint16_t>::run();
     casting_test<SrcScalar, int32_t>::run();
     casting_test<SrcScalar, uint32_t>::run();
-#if EIGEN_HAS_CXX11
     casting_test<SrcScalar, int64_t>::run();
     casting_test<SrcScalar, uint64_t>::run();
-#endif
     casting_test<SrcScalar, half>::run();
     casting_test<SrcScalar, bfloat16>::run();
     casting_test<SrcScalar, float>::run();
@@ -237,7 +241,7 @@ struct casting_test_runner {
 };
 
 template<typename SrcScalar>
-struct casting_test_runner<SrcScalar, typename internal::enable_if<(NumTraits<SrcScalar>::IsComplex)>::type>
+struct casting_test_runner<SrcScalar, std::enable_if_t<(NumTraits<SrcScalar>::IsComplex)>>
 {
   static void run() {
     // Only a few casts from std::complex<T> are defined.
@@ -256,10 +260,8 @@ void casting_all() {
   casting_test_runner<uint16_t>::run();
   casting_test_runner<int32_t>::run();
   casting_test_runner<uint32_t>::run();
-#if EIGEN_HAS_CXX11
   casting_test_runner<int64_t>::run();
   casting_test_runner<uint64_t>::run();
-#endif
   casting_test_runner<half>::run();
   casting_test_runner<bfloat16>::run();
   casting_test_runner<float>::run();
diff --git a/libs/eigen/test/bdcsvd.cpp b/libs/eigen/test/bdcsvd.cpp
index e92a7dc..539494b 100644
--- a/libs/eigen/test/bdcsvd.cpp
+++ b/libs/eigen/test/bdcsvd.cpp
@@ -10,35 +10,27 @@
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/
 
+// We explicitly disable deprecated declarations for this set of tests
+// because we purposely verify assertions for the deprecated SVD runtime
+// option behavior.
+#if defined(__GNUC__)
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+#elif defined(_MSC_VER)
+#pragma warning( disable : 4996 )
+#endif
+
 // discard stack allocation as that too bypasses malloc
 #define EIGEN_STACK_ALLOCATION_LIMIT 0
 #define EIGEN_RUNTIME_NO_MALLOC
 
 #include "main.h"
 #include <Eigen/SVD>
-#include <iostream>
-#include <Eigen/LU>
-
 
 #define SVD_DEFAULT(M) BDCSVD<M>
 #define SVD_FOR_MIN_NORM(M) BDCSVD<M>
+#define SVD_STATIC_OPTIONS(M, O) BDCSVD<M, O>
 #include "svd_common.h"
 
-// Check all variants of JacobiSVD
-template<typename MatrixType>
-void bdcsvd(const MatrixType& a = MatrixType(), bool pickrandom = true)
-{
-  MatrixType m;
-  if(pickrandom) {
-    m.resizeLike(a);
-    svd_fill_random(m);
-  }
-  else
-    m = a;
-
-  CALL_SUBTEST(( svd_test_all_computation_options<BDCSVD<MatrixType> >(m, false)  ));
-}
-
 template<typename MatrixType>
 void bdcsvd_method()
 {
@@ -49,70 +41,141 @@ void bdcsvd_method()
   VERIFY_IS_APPROX(m.bdcSvd().singularValues(), RealVecType::Ones());
   VERIFY_RAISES_ASSERT(m.bdcSvd().matrixU());
   VERIFY_RAISES_ASSERT(m.bdcSvd().matrixV());
+  
+  // Deprecated behavior.
   VERIFY_IS_APPROX(m.bdcSvd(ComputeFullU|ComputeFullV).solve(m), m);
   VERIFY_IS_APPROX(m.bdcSvd(ComputeFullU|ComputeFullV).transpose().solve(m), m);
   VERIFY_IS_APPROX(m.bdcSvd(ComputeFullU|ComputeFullV).adjoint().solve(m), m);
+  VERIFY_IS_APPROX(m.template bdcSvd<DisableQRDecomposition>(ComputeFullU|ComputeFullV).solve(m), m);
+  VERIFY_IS_APPROX(m.template bdcSvd<DisableQRDecomposition>(ComputeFullU|ComputeFullV).transpose().solve(m), m);
+  VERIFY_IS_APPROX(m.template bdcSvd<DisableQRDecomposition>(ComputeFullU|ComputeFullV).adjoint().solve(m), m);
+
+  VERIFY_IS_APPROX(m.template bdcSvd<ComputeFullU | ComputeFullV>().solve(m), m);
+  VERIFY_IS_APPROX(m.template bdcSvd<ComputeFullU | ComputeFullV>().transpose().solve(m), m);
+  VERIFY_IS_APPROX(m.template bdcSvd<ComputeFullU | ComputeFullV>().adjoint().solve(m), m);
+
+  VERIFY_IS_APPROX(m.template bdcSvd<ComputeFullU | ComputeFullV | DisableQRDecomposition>().solve(m), m);
+  VERIFY_IS_APPROX(m.template bdcSvd<ComputeFullU | ComputeFullV | DisableQRDecomposition>().transpose().solve(m), m);
+  VERIFY_IS_APPROX(m.template bdcSvd<ComputeFullU | ComputeFullV | DisableQRDecomposition>().adjoint().solve(m), m);
 }
 
 // compare the Singular values returned with Jacobi and Bdc
-template<typename MatrixType> 
-void compare_bdc_jacobi(const MatrixType& a = MatrixType(), unsigned int computationOptions = 0)
-{
-  MatrixType m = MatrixType::Random(a.rows(), a.cols());
-  BDCSVD<MatrixType> bdc_svd(m);
+template <typename MatrixType>
+void compare_bdc_jacobi(const MatrixType& a = MatrixType(), int algoswap = 16, bool random = true) {
+  MatrixType m = random ? MatrixType::Random(a.rows(), a.cols()) : a;
+
+  BDCSVD<MatrixType> bdc_svd(m.rows(), m.cols());
+  bdc_svd.setSwitchSize(algoswap);
+  bdc_svd.compute(m);
+
   JacobiSVD<MatrixType> jacobi_svd(m);
   VERIFY_IS_APPROX(bdc_svd.singularValues(), jacobi_svd.singularValues());
-  if(computationOptions & ComputeFullU) VERIFY_IS_APPROX(bdc_svd.matrixU(), jacobi_svd.matrixU());
-  if(computationOptions & ComputeThinU) VERIFY_IS_APPROX(bdc_svd.matrixU(), jacobi_svd.matrixU());
-  if(computationOptions & ComputeFullV) VERIFY_IS_APPROX(bdc_svd.matrixV(), jacobi_svd.matrixV());
-  if(computationOptions & ComputeThinV) VERIFY_IS_APPROX(bdc_svd.matrixV(), jacobi_svd.matrixV());
+}
+
+// Verifies total deflation is **not** triggered.
+void compare_bdc_jacobi_instance(bool structure_as_m, int algoswap = 16)
+{
+  MatrixXd m(4, 3);
+  if (structure_as_m) {
+    // The first 3 rows are the reduced form of Matrix 1 as shown below, and it
+    // has nonzero elements in the first column and diagonals only.
+    m << 1.056293, 0, 0,
+         -0.336468, 0.907359, 0,
+         -1.566245, 0, 0.149150,
+         -0.1, 0, 0;
+  } else {
+    // Matrix 1.
+    m << 0.882336, 18.3914, -26.7921,
+         -5.58135, 17.1931, -24.0892,
+         -20.794, 8.68496, -4.83103,
+         -8.4981, -10.5451, 23.9072;
+  }
+  compare_bdc_jacobi(m, algoswap, false);
+}
+
+template <typename MatrixType>
+void bdcsvd_all_options(const MatrixType& input = MatrixType()) {
+  MatrixType m(input.rows(), input.cols());
+  svd_fill_random(m);
+  svd_option_checks<MatrixType, 0>(m);
+}
+
+template <typename MatrixType>
+void bdcsvd_verify_assert(const MatrixType& input = MatrixType()) {
+  svd_verify_assert<MatrixType>(input);
+  svd_verify_constructor_options_assert<BDCSVD<MatrixType>>(input);
 }
 
 EIGEN_DECLARE_TEST(bdcsvd)
 {
-  CALL_SUBTEST_3(( svd_verify_assert<BDCSVD<Matrix3f>  >(Matrix3f()) ));
-  CALL_SUBTEST_4(( svd_verify_assert<BDCSVD<Matrix4d>  >(Matrix4d()) ));
-  CALL_SUBTEST_7(( svd_verify_assert<BDCSVD<MatrixXf>  >(MatrixXf(10,12)) ));
-  CALL_SUBTEST_8(( svd_verify_assert<BDCSVD<MatrixXcd> >(MatrixXcd(7,5)) ));
-  
-  CALL_SUBTEST_101(( svd_all_trivial_2x2(bdcsvd<Matrix2cd>) ));
-  CALL_SUBTEST_102(( svd_all_trivial_2x2(bdcsvd<Matrix2d>) ));
+  CALL_SUBTEST_1((bdcsvd_verify_assert<Matrix3f>()));
+  CALL_SUBTEST_1((bdcsvd_verify_assert<Matrix4d>()));
+  CALL_SUBTEST_2((bdcsvd_verify_assert<Matrix<float, 10, 7>>()));
+  CALL_SUBTEST_2((bdcsvd_verify_assert<Matrix<float, 7, 10>>()));
+  CALL_SUBTEST_3((bdcsvd_verify_assert<Matrix<std::complex<double>, 6, 9>>()));
 
-  for(int i = 0; i < g_repeat; i++) {
-    CALL_SUBTEST_3(( bdcsvd<Matrix3f>() ));
-    CALL_SUBTEST_4(( bdcsvd<Matrix4d>() ));
-    CALL_SUBTEST_5(( bdcsvd<Matrix<float,3,5> >() ));
+  CALL_SUBTEST_4((svd_all_trivial_2x2(bdcsvd_all_options<Matrix2cd>)));
+  CALL_SUBTEST_5((svd_all_trivial_2x2(bdcsvd_all_options<Matrix2d>)));
 
+  for (int i = 0; i < g_repeat; i++) {
     int r = internal::random<int>(1, EIGEN_TEST_MAX_SIZE/2),
         c = internal::random<int>(1, EIGEN_TEST_MAX_SIZE/2);
-    
+
     TEST_SET_BUT_UNUSED_VARIABLE(r)
     TEST_SET_BUT_UNUSED_VARIABLE(c)
-    
-    CALL_SUBTEST_6((  bdcsvd(Matrix<double,Dynamic,2>(r,2)) ));
-    CALL_SUBTEST_7((  bdcsvd(MatrixXf(r,c)) ));
-    CALL_SUBTEST_7((  compare_bdc_jacobi(MatrixXf(r,c)) ));
-    CALL_SUBTEST_10(( bdcsvd(MatrixXd(r,c)) ));
-    CALL_SUBTEST_10(( compare_bdc_jacobi(MatrixXd(r,c)) ));
-    CALL_SUBTEST_8((  bdcsvd(MatrixXcd(r,c)) ));
-    CALL_SUBTEST_8((  compare_bdc_jacobi(MatrixXcd(r,c)) ));
 
+    CALL_SUBTEST_6((compare_bdc_jacobi<MatrixXf>(MatrixXf(r, c))));
+    CALL_SUBTEST_7((compare_bdc_jacobi<MatrixXd>(MatrixXd(r, c))));
+    CALL_SUBTEST_8((compare_bdc_jacobi<MatrixXcd>(MatrixXcd(r, c))));
     // Test on inf/nan matrix
-    CALL_SUBTEST_7(  (svd_inf_nan<BDCSVD<MatrixXf>, MatrixXf>()) );
-    CALL_SUBTEST_10( (svd_inf_nan<BDCSVD<MatrixXd>, MatrixXd>()) );
+    CALL_SUBTEST_9((svd_inf_nan<MatrixXf>()));
+    CALL_SUBTEST_10((svd_inf_nan<MatrixXd>()));
+
+    // Verify some computations using all combinations of the Options template parameter.
+    CALL_SUBTEST_11((bdcsvd_all_options<Matrix3f>()));
+    CALL_SUBTEST_12((bdcsvd_all_options<Matrix<float, 2, 3>>()));
+    CALL_SUBTEST_13((bdcsvd_all_options<MatrixXd>(MatrixXd(20, 17))));
+    CALL_SUBTEST_14((bdcsvd_all_options<MatrixXd>(MatrixXd(17, 20))));
+    CALL_SUBTEST_15((bdcsvd_all_options<Matrix<double, Dynamic, 15>>(Matrix<double, Dynamic, 15>(r, 15))));
+    CALL_SUBTEST_16((bdcsvd_all_options<Matrix<double, 13, Dynamic>>(Matrix<double, 13, Dynamic>(13, c))));
+    CALL_SUBTEST_17((bdcsvd_all_options<MatrixXf>(MatrixXf(r, c))));
+    CALL_SUBTEST_18((bdcsvd_all_options<MatrixXcd>(MatrixXcd(r, c))));
+    CALL_SUBTEST_19((bdcsvd_all_options<MatrixXd>(MatrixXd(r, c))));
+    CALL_SUBTEST_20((bdcsvd_all_options<Matrix<double, Dynamic, Dynamic, RowMajor>>(Matrix<double, Dynamic, Dynamic, RowMajor>(20, 27))));
+    CALL_SUBTEST_21((bdcsvd_all_options<Matrix<double, Dynamic, Dynamic, RowMajor>>(Matrix<double, Dynamic, Dynamic, RowMajor>(27, 20))));
+
+    CALL_SUBTEST_22((
+        svd_check_max_size_matrix<Matrix<float, Dynamic, Dynamic, ColMajor, 20, 35>, ColPivHouseholderQRPreconditioner>(
+            r, c)));
+    CALL_SUBTEST_22(
+        (svd_check_max_size_matrix<Matrix<float, Dynamic, Dynamic, ColMajor, 35, 20>, HouseholderQRPreconditioner>(r,
+                                                                                                                   c)));
+    CALL_SUBTEST_22((
+        svd_check_max_size_matrix<Matrix<float, Dynamic, Dynamic, RowMajor, 20, 35>, ColPivHouseholderQRPreconditioner>(
+            r, c)));
+    CALL_SUBTEST_22(
+        (svd_check_max_size_matrix<Matrix<float, Dynamic, Dynamic, RowMajor, 35, 20>, HouseholderQRPreconditioner>(r,
+                                                                                                                   c)));
   }
 
   // test matrixbase method
-  CALL_SUBTEST_1(( bdcsvd_method<Matrix2cd>() ));
-  CALL_SUBTEST_3(( bdcsvd_method<Matrix3f>() ));
+  CALL_SUBTEST_23(( bdcsvd_method<Matrix2cd>() ));
+  CALL_SUBTEST_23(( bdcsvd_method<Matrix3f>() ));
 
   // Test problem size constructors
-  CALL_SUBTEST_7( BDCSVD<MatrixXf>(10,10) );
+  CALL_SUBTEST_24( BDCSVD<MatrixXf>(10,10) );
 
   // Check that preallocation avoids subsequent mallocs
   // Disabled because not supported by BDCSVD
   // CALL_SUBTEST_9( svd_preallocate<void>() );
 
-  CALL_SUBTEST_2( svd_underoverflow<void>() );
-}
+  CALL_SUBTEST_25( svd_underoverflow<void>() );
 
+  // Without total deflation issues.
+  CALL_SUBTEST_26((  compare_bdc_jacobi_instance(true) ));
+  CALL_SUBTEST_26((  compare_bdc_jacobi_instance(false) ));
+
+  // With total deflation issues before, when it shouldn't be triggered.
+  CALL_SUBTEST_27((  compare_bdc_jacobi_instance(true, 3) ));
+  CALL_SUBTEST_27((  compare_bdc_jacobi_instance(false, 3) ));
+}
diff --git a/libs/eigen/test/bfloat16_float.cpp b/libs/eigen/test/bfloat16_float.cpp
index c3de0b1..b2a22ce 100644
--- a/libs/eigen/test/bfloat16_float.cpp
+++ b/libs/eigen/test/bfloat16_float.cpp
@@ -209,8 +209,8 @@ void test_numtraits()
 
 void test_arithmetic()
 {
-  VERIFY_IS_EQUAL(static_cast<float>(bfloat16(2) + bfloat16(2)), 4);
-  VERIFY_IS_EQUAL(static_cast<float>(bfloat16(2) + bfloat16(-2)), 0);
+  VERIFY_IS_EQUAL(static_cast<float>(bfloat16(2) + bfloat16(2)), 4.f);
+  VERIFY_IS_EQUAL(static_cast<float>(bfloat16(2) + bfloat16(-2)), 0.f);
   VERIFY_IS_APPROX(static_cast<float>(bfloat16(0.33333f) + bfloat16(0.66667f)), 1.0f);
   VERIFY_IS_EQUAL(static_cast<float>(bfloat16(2.0f) * bfloat16(-5.5f)), -11.0f);
   VERIFY_IS_APPROX(static_cast<float>(bfloat16(1.0f) / bfloat16(3.0f)), 0.3339f);
diff --git a/libs/eigen/test/blasutil.cpp b/libs/eigen/test/blasutil.cpp
index 845a498..ee98df4 100644
--- a/libs/eigen/test/blasutil.cpp
+++ b/libs/eigen/test/blasutil.cpp
@@ -196,12 +196,7 @@ EIGEN_DECLARE_TEST(blasutil)
 
 // TODO: Replace this by a call to numext::int64_t as soon as we have a way to
 // detect the typedef for int64_t on all platforms
-#if EIGEN_HAS_CXX11
         CALL_SUBTEST_4(run_test<signed long long>());
-#else
-        CALL_SUBTEST_4(run_test<signed long>());
-#endif
-
         CALL_SUBTEST_5(run_test<float_t>());
         CALL_SUBTEST_6(run_test<double_t>());
         CALL_SUBTEST_7(run_test<std::complex<float> >());
diff --git a/libs/eigen/test/block.cpp b/libs/eigen/test/block.cpp
index 84124ab..f8583c3 100644
--- a/libs/eigen/test/block.cpp
+++ b/libs/eigen/test/block.cpp
@@ -7,11 +7,10 @@
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
-#define EIGEN_NO_STATIC_ASSERT // otherwise we fail at compile time on unused paths
 #include "main.h"
 
 template<typename MatrixType, typename Index, typename Scalar>
-typename Eigen::internal::enable_if<!NumTraits<typename MatrixType::Scalar>::IsComplex,typename MatrixType::Scalar>::type
+std::enable_if_t<!NumTraits<typename MatrixType::Scalar>::IsComplex,typename MatrixType::Scalar>
 block_real_only(const MatrixType &m1, Index r1, Index r2, Index c1, Index c2, const Scalar& s1) {
   // check cwise-Functions:
   VERIFY_IS_APPROX(m1.row(r1).cwiseMax(s1), m1.cwiseMax(s1).row(r1));
@@ -24,19 +23,33 @@ block_real_only(const MatrixType &m1, Index r1, Index r2, Index c1, Index c2, co
 }
 
 template<typename MatrixType, typename Index, typename Scalar>
-typename Eigen::internal::enable_if<NumTraits<typename MatrixType::Scalar>::IsComplex,typename MatrixType::Scalar>::type
+std::enable_if_t<NumTraits<typename MatrixType::Scalar>::IsComplex,typename MatrixType::Scalar>
 block_real_only(const MatrixType &, Index, Index, Index, Index, const Scalar&) {
   return Scalar(0);
 }
 
 // Check at compile-time that T1==T2, and at runtime-time that a==b
 template<typename T1,typename T2>
-typename internal::enable_if<internal::is_same<T1,T2>::value,bool>::type
+std::enable_if_t<internal::is_same<T1,T2>::value,bool>
 is_same_block(const T1& a, const T2& b)
 {
   return a.isApprox(b);
 }
 
+template <typename MatrixType>
+std::enable_if_t<((MatrixType::Flags&RowMajorBit)==0),void>
+check_left_top(const MatrixType& m, Index r, Index c,
+               Index rows, Index /*unused*/) {
+  VERIFY_IS_EQUAL(m.leftCols(c).coeff(r+c*rows), m(r,c));
+}
+
+template <typename MatrixType>
+std::enable_if_t<((MatrixType::Flags&RowMajorBit)!=0),void>
+check_left_top(const MatrixType& m,  Index r, Index c,
+               Index /*unused*/, Index cols) {
+  VERIFY_IS_EQUAL(m.topRows(r).coeff(c+r*cols), m(r,c));
+}
+
 template<typename MatrixType> void block(const MatrixType& m)
 {
   typedef typename MatrixType::Scalar Scalar;
@@ -79,7 +92,8 @@ template<typename MatrixType> void block(const MatrixType& m)
   VERIFY_IS_APPROX(m1.col(c1), m1_copy.col(c1) + s1 * m1_copy.col(c2));
   m1.col(c1).col(0) += s1 * m1_copy.col(c2);
   VERIFY_IS_APPROX(m1.col(c1), m1_copy.col(c1) + Scalar(2) * s1 * m1_copy.col(c2));
-  
+
+  check_left_top(m1,r1,c1,rows,cols);
   
   //check block()
   Matrix<Scalar,Dynamic,Dynamic> b1(1,1); b1(0,0) = m1(r1,c1);
@@ -135,19 +149,14 @@ template<typename MatrixType> void block(const MatrixType& m)
   }
 
   // stress some basic stuffs with block matrices
-  VERIFY(numext::real(ones.col(c1).sum()) == RealScalar(rows));
-  VERIFY(numext::real(ones.row(r1).sum()) == RealScalar(cols));
+  VERIFY_IS_EQUAL(numext::real(ones.col(c1).sum()), RealScalar(rows));
+  VERIFY_IS_EQUAL(numext::real(ones.row(r1).sum()), RealScalar(cols));
 
-  VERIFY(numext::real(ones.col(c1).dot(ones.col(c2))) == RealScalar(rows));
-  VERIFY(numext::real(ones.row(r1).dot(ones.row(r2))) == RealScalar(cols));
+  VERIFY_IS_EQUAL(numext::real(ones.col(c1).dot(ones.col(c2))), RealScalar(rows));
+  VERIFY_IS_EQUAL(numext::real(ones.row(r1).dot(ones.row(r2))), RealScalar(cols));
   
   // check that linear acccessors works on blocks
   m1 = m1_copy;
-  if((MatrixType::Flags&RowMajorBit)==0)
-    VERIFY_IS_EQUAL(m1.leftCols(c1).coeff(r1+c1*rows), m1(r1,c1));
-  else
-    VERIFY_IS_EQUAL(m1.topRows(r1).coeff(c1+r1*cols), m1(r1,c1));
-  
 
   // now test some block-inside-of-block.
   
@@ -213,14 +222,6 @@ template<typename MatrixType> void block(const MatrixType& m)
   VERIFY_IS_EQUAL( ((m1*1).template block<Dynamic,1>(1,0,0,1)), m1.block(1,0,0,1));
   VERIFY_IS_EQUAL( ((m1*1).template block<1,Dynamic>(0,1,1,0)), m1.block(0,1,1,0));
 
-  if (rows>=2 && cols>=2)
-  {
-    VERIFY_RAISES_ASSERT( m1 += m1.col(0) );
-    VERIFY_RAISES_ASSERT( m1 -= m1.col(0) );
-    VERIFY_RAISES_ASSERT( m1.array() *= m1.col(0).array() );
-    VERIFY_RAISES_ASSERT( m1.array() /= m1.col(0).array() );
-  }
-
   VERIFY_IS_EQUAL( m1.template subVector<Horizontal>(r1), m1.row(r1) );
   VERIFY_IS_APPROX( (m1+m1).template subVector<Horizontal>(r1), (m1+m1).row(r1) );
   VERIFY_IS_EQUAL( m1.template subVector<Vertical>(c1), m1.col(c1) );
@@ -240,13 +241,35 @@ template<typename MatrixType> void block(const MatrixType& m)
 }
 
 
+
 template<typename MatrixType>
-void compare_using_data_and_stride(const MatrixType& m)
+std::enable_if_t<MatrixType::IsVectorAtCompileTime,void>
+compare_using_data_and_stride(const MatrixType& m)
 {
   Index rows = m.rows();
   Index cols = m.cols();
   Index size = m.size();
   Index innerStride = m.innerStride();
+  Index rowStride = m.rowStride();
+  Index colStride = m.colStride();
+  const typename MatrixType::Scalar* data = m.data();
+
+  for(int j=0;j<cols;++j)
+    for(int i=0;i<rows;++i)
+      VERIFY(m.coeff(i,j) == data[i*rowStride + j*colStride]);
+
+  VERIFY(innerStride == int((&m.coeff(1))-(&m.coeff(0))));
+  for (int i=0;i<size;++i)
+    VERIFY(m.coeff(i) == data[i*innerStride]);
+}
+
+template<typename MatrixType>
+std::enable_if_t<!MatrixType::IsVectorAtCompileTime,void>
+compare_using_data_and_stride(const MatrixType& m)
+{
+  Index rows = m.rows();
+  Index cols = m.cols();
+  Index innerStride = m.innerStride();
   Index outerStride = m.outerStride();
   Index rowStride = m.rowStride();
   Index colStride = m.colStride();
@@ -256,21 +279,11 @@ void compare_using_data_and_stride(const MatrixType& m)
     for(int i=0;i<rows;++i)
       VERIFY(m.coeff(i,j) == data[i*rowStride + j*colStride]);
 
-  if(!MatrixType::IsVectorAtCompileTime)
-  {
-    for(int j=0;j<cols;++j)
-      for(int i=0;i<rows;++i)
-        VERIFY(m.coeff(i,j) == data[(MatrixType::Flags&RowMajorBit)
-                                     ? i*outerStride + j*innerStride
-                                     : j*outerStride + i*innerStride]);
-  }
-
-  if(MatrixType::IsVectorAtCompileTime)
-  {
-    VERIFY(innerStride == int((&m.coeff(1))-(&m.coeff(0))));
-    for (int i=0;i<size;++i)
-      VERIFY(m.coeff(i) == data[i*innerStride]);
-  }
+  for(int j=0;j<cols;++j)
+    for(int i=0;i<rows;++i)
+      VERIFY(m.coeff(i,j) == data[(MatrixType::Flags&RowMajorBit)
+                                  ? i*outerStride + j*innerStride
+                                  : j*outerStride + i*innerStride]);
 }
 
 template<typename MatrixType>
diff --git a/libs/eigen/test/boostmultiprec.cpp b/libs/eigen/test/boostmultiprec.cpp
index 7c79ded..e2fc9a8 100644
--- a/libs/eigen/test/boostmultiprec.cpp
+++ b/libs/eigen/test/boostmultiprec.cpp
@@ -74,8 +74,7 @@
 #include <boost/math/special_functions.hpp>
 #include <boost/math/complex.hpp>
 
-namespace mp = boost::multiprecision;
-typedef mp::number<mp::cpp_dec_float<100>, mp::et_on> Real;
+typedef boost::multiprecision::number<boost::multiprecision::cpp_dec_float<100>, boost::multiprecision::et_on> Real;
 
 namespace Eigen {
   template<> struct NumTraits<Real> : GenericNumTraits<Real> {
@@ -201,8 +200,8 @@ EIGEN_DECLARE_TEST(boostmultiprec)
     TEST_SET_BUT_UNUSED_VARIABLE(s)
   }
 
-  CALL_SUBTEST_9(( jacobisvd(Mat(internal::random<int>(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE), internal::random<int>(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE/2))) ));
-  CALL_SUBTEST_10(( bdcsvd(Mat(internal::random<int>(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE), internal::random<int>(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE/2))) ));
+  CALL_SUBTEST_9(( jacobisvd_all_options(Mat(internal::random<int>(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE), internal::random<int>(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE/2))) ));
+  CALL_SUBTEST_10(( bdcsvd_all_options(Mat(internal::random<int>(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE), internal::random<int>(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE/2))) ));
 
   CALL_SUBTEST_11(( test_simplicial_cholesky_T<Real,int,ColMajor>() ));
 }
diff --git a/libs/eigen/test/constexpr.cpp b/libs/eigen/test/constexpr.cpp
new file mode 100644
index 0000000..b8f0b09
--- /dev/null
+++ b/libs/eigen/test/constexpr.cpp
@@ -0,0 +1,52 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2022 Alex Richardson <alexrichardson@google.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+
+EIGEN_DECLARE_TEST(constexpr) {
+  // Clang accepts (some of) this code when using C++14/C++17, but GCC does not like
+  // the fact that `T array[Size]` inside Eigen::internal::plain_array is not initialized
+  // until after the constructor returns:
+  // error: member ‘Eigen::internal::plain_array<int, 9, 0, 0>::array’ must be initialized by mem-initializer in
+  // ‘constexpr’ constructor
+#if EIGEN_COMP_CXXVER >= 20
+  constexpr Matrix3i mat({{1, 2, 3}, {4, 5, 6}, {7, 8, 9}});
+  VERIFY_IS_EQUAL(mat.size(), 9);
+  VERIFY_IS_EQUAL(mat(0, 0), 1);
+  static_assert(mat.coeff(0,1) == 2);
+  constexpr Array33i arr({{1, 2, 3}, {4, 5, 6}, {7, 8, 9}});
+  VERIFY_IS_EQUAL(arr(0, 0), 1);
+  VERIFY_IS_EQUAL(arr.size(), 9);
+  static_assert(arr.coeff(0,1) == 2);
+  // Also check dynamic size arrays/matrices with fixed-size storage (currently
+  // only works if all elements are initialized, since otherwise the compiler
+  // complains about uninitialized trailing elements.
+  constexpr Matrix<int, Eigen::Dynamic, Eigen::Dynamic, 0, 3, 3> dyn_mat({{1, 2, 3}, {4, 5, 6}, {7, 8, 9}});
+  VERIFY_IS_EQUAL(dyn_mat.size(), 9);
+  VERIFY_IS_EQUAL(dyn_mat(0, 0), 1);
+  static_assert(dyn_mat.coeff(0,1) == 2);
+  constexpr Array<int, Eigen::Dynamic, Eigen::Dynamic, 0, 3, 3> dyn_arr({{1, 2, 3}, {4, 5, 6}, {7, 8, 9}});
+  VERIFY_IS_EQUAL(dyn_arr(0, 0), 1);
+  VERIFY_IS_EQUAL(dyn_arr.size(), 9);
+  static_assert(dyn_arr.coeff(0,1) == 2);
+#endif  // EIGEN_COMP_CXXVER >= 20
+}
+
+// Check that we can use the std::initializer_list constructor for constexpr variables.
+#if EIGEN_COMP_CXXVER >= 20
+// EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT() will fail constexpr evaluation unless
+// we have std::is_constant_evaluated().
+constexpr Matrix<int, 2, 2> global_mat({{1, 2}, {3, 4}});
+
+EIGEN_DECLARE_TEST(constexpr_global) {
+  VERIFY_IS_EQUAL(global_mat.size(), 4);
+  VERIFY_IS_EQUAL(global_mat(0, 0), 1);
+  static_assert(global_mat.coeff(0,0) == 1);
+}
+#endif  // EIGEN_COMP_CXXVER >= 20
diff --git a/libs/eigen/test/dense_storage.cpp b/libs/eigen/test/dense_storage.cpp
index 45c2bd7..ec78f01 100644
--- a/libs/eigen/test/dense_storage.cpp
+++ b/libs/eigen/test/dense_storage.cpp
@@ -13,7 +13,6 @@
 
 #include <Eigen/Core>
 
-#if EIGEN_HAS_TYPE_TRAITS && EIGEN_HAS_CXX11
 using DenseStorageD3x3 = Eigen::DenseStorage<double, 3, 3, 3, 3>;
 static_assert(std::is_trivially_move_constructible<DenseStorageD3x3>::value, "DenseStorage not trivially_move_constructible");
 static_assert(std::is_trivially_move_assignable<DenseStorageD3x3>::value, "DenseStorage not trivially_move_assignable");
@@ -22,7 +21,6 @@ static_assert(std::is_trivially_copy_constructible<DenseStorageD3x3>::value, "De
 static_assert(std::is_trivially_copy_assignable<DenseStorageD3x3>::value, "DenseStorage not trivially_copy_assignable");
 static_assert(std::is_trivially_copyable<DenseStorageD3x3>::value, "DenseStorage not trivially_copyable");
 #endif
-#endif
 
 template <typename T, int Size, int Rows, int Cols>
 void dense_storage_copy(int rows, int cols)
@@ -90,8 +88,6 @@ void dense_storage_swap(int rows0, int cols0, int rows1, int cols1)
 template<typename T, int Size, std::size_t Alignment>
 void dense_storage_alignment()
 {
-  #if EIGEN_HAS_ALIGNAS
-  
   struct alignas(Alignment) Empty1 {};
   VERIFY_IS_EQUAL(std::alignment_of<Empty1>::value, Alignment);
 
@@ -104,13 +100,12 @@ void dense_storage_alignment()
   VERIFY_IS_EQUAL( (std::alignment_of<internal::plain_array<T,Size,AutoAlign,Alignment> >::value), Alignment);
 
   const std::size_t default_alignment = internal::compute_default_alignment<T,Size>::value;
-
-  VERIFY_IS_EQUAL( (std::alignment_of<DenseStorage<T,Size,1,1,AutoAlign> >::value), default_alignment);
-  VERIFY_IS_EQUAL( (std::alignment_of<Matrix<T,Size,1,AutoAlign> >::value), default_alignment);
-  struct Nested2 { Matrix<T,Size,1,AutoAlign> mat; };
-  VERIFY_IS_EQUAL(std::alignment_of<Nested2>::value, default_alignment);
-
-  #endif
+  if (default_alignment > 0) {
+    VERIFY_IS_EQUAL( (std::alignment_of<DenseStorage<T,Size,1,1,AutoAlign> >::value), default_alignment);
+    VERIFY_IS_EQUAL( (std::alignment_of<Matrix<T,Size,1,AutoAlign> >::value), default_alignment);
+    struct Nested2 { Matrix<T,Size,1,AutoAlign> mat; };
+    VERIFY_IS_EQUAL(std::alignment_of<Nested2>::value, default_alignment);
+  }
 }
 
 template<typename T>
diff --git a/libs/eigen/test/diagonal_matrix_variadic_ctor.cpp b/libs/eigen/test/diagonal_matrix_variadic_ctor.cpp
index fbc8f84..db56539 100644
--- a/libs/eigen/test/diagonal_matrix_variadic_ctor.cpp
+++ b/libs/eigen/test/diagonal_matrix_variadic_ctor.cpp
@@ -7,32 +7,8 @@
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
-#define EIGEN_NO_STATIC_ASSERT
-
 #include "main.h"
 
-template <typename Scalar>
-void assertionTest()
-{
-  typedef DiagonalMatrix<Scalar, 5> DiagMatrix5;
-  typedef DiagonalMatrix<Scalar, 7> DiagMatrix7;
-  typedef DiagonalMatrix<Scalar, Dynamic> DiagMatrixX;
-
-  Scalar raw[6];
-  for (int i = 0; i < 6; ++i) {
-    raw[i] = internal::random<Scalar>();
-  }
-
-  VERIFY_RAISES_ASSERT((DiagMatrix5{raw[0], raw[1], raw[2], raw[3]}));
-  VERIFY_RAISES_ASSERT((DiagMatrix5{raw[0], raw[1], raw[3]}));
-  VERIFY_RAISES_ASSERT((DiagMatrix7{raw[0], raw[1], raw[2], raw[3]}));
-
-  VERIFY_RAISES_ASSERT((DiagMatrixX {
-    {raw[0], raw[1], raw[2]},
-    {raw[3], raw[4], raw[5]}
-  }));
-}
-
 #define VERIFY_IMPLICIT_CONVERSION_3(DIAGTYPE, V0, V1, V2) \
   DIAGTYPE d(V0, V1, V2);                                  \
   DIAGTYPE::DenseMatrixType Dense = d.toDenseMatrix();     \
@@ -167,14 +143,6 @@ void constructorTest<float>()
 
 EIGEN_DECLARE_TEST(diagonal_matrix_variadic_ctor)
 {
-  CALL_SUBTEST_1(assertionTest<unsigned char>());
-  CALL_SUBTEST_1(assertionTest<float>());
-  CALL_SUBTEST_1(assertionTest<Index>());
-  CALL_SUBTEST_1(assertionTest<int>());
-  CALL_SUBTEST_1(assertionTest<long int>());
-  CALL_SUBTEST_1(assertionTest<std::ptrdiff_t>());
-  CALL_SUBTEST_1(assertionTest<std::complex<double>>());
-
   CALL_SUBTEST_2(constructorTest<unsigned char>());
   CALL_SUBTEST_2(constructorTest<float>());
   CALL_SUBTEST_2(constructorTest<Index>());
diff --git a/libs/eigen/test/diagonalmatrices.cpp b/libs/eigen/test/diagonalmatrices.cpp
index 276bead..15492a7 100644
--- a/libs/eigen/test/diagonalmatrices.cpp
+++ b/libs/eigen/test/diagonalmatrices.cpp
@@ -7,6 +7,12 @@
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
+
+// discard stack allocation as that too bypasses malloc
+#define EIGEN_STACK_ALLOCATION_LIMIT 0
+// heap allocation will raise an assert if enabled at runtime
+#define EIGEN_RUNTIME_NO_MALLOC
+
 #include "main.h"
 using namespace std;
 template<typename MatrixType> void diagonalmatrices(const MatrixType& m)
@@ -56,6 +62,7 @@ template<typename MatrixType> void diagonalmatrices(const MatrixType& m)
   Index i = internal::random<Index>(0, rows-1);
   Index j = internal::random<Index>(0, cols-1);
   
+  internal::set_is_malloc_allowed(false);
   VERIFY_IS_APPROX( ((ldm1 * m1)(i,j))  , ldm1.diagonal()(i) * m1(i,j) );
   VERIFY_IS_APPROX( ((ldm1 * (m1+m2))(i,j))  , ldm1.diagonal()(i) * (m1+m2)(i,j) );
   VERIFY_IS_APPROX( ((m1 * rdm1)(i,j))  , rdm1.diagonal()(j) * m1(i,j) );
@@ -65,6 +72,10 @@ template<typename MatrixType> void diagonalmatrices(const MatrixType& m)
   VERIFY_IS_APPROX( (((v1+v2).asDiagonal() * (m1+m2))(i,j))  , (v1+v2)(i) * (m1+m2)(i,j) );
   VERIFY_IS_APPROX( ((m1 * (rv1+rv2).asDiagonal())(i,j))  , (rv1+rv2)(j) * m1(i,j) );
   VERIFY_IS_APPROX( (((m1+m2) * (rv1+rv2).asDiagonal())(i,j))  , (rv1+rv2)(j) * (m1+m2)(i,j) );
+  VERIFY_IS_APPROX( (ldm1 * ldm1).diagonal()(i), ldm1.diagonal()(i) * ldm1.diagonal()(i) );
+  VERIFY_IS_APPROX( (ldm1 * ldm1 * m1)(i, j), ldm1.diagonal()(i) * ldm1.diagonal()(i) * m1(i, j) );
+  VERIFY_IS_APPROX( ((v1.asDiagonal() * v1.asDiagonal()).diagonal()(i)), v1(i) * v1(i) );
+  internal::set_is_malloc_allowed(true);
   
   if(rows>1)
   {
@@ -84,7 +95,15 @@ template<typename MatrixType> void diagonalmatrices(const MatrixType& m)
   big.block(i,j,rows,cols) = m1;
   big.block(i,j,rows,cols) = big.block(i,j,rows,cols) * rv1.asDiagonal();
   VERIFY_IS_APPROX((big.block(i,j,rows,cols)) , m1 * rv1.asDiagonal() );
-  
+
+  // products do not allocate memory
+  MatrixType res(rows, cols);
+  internal::set_is_malloc_allowed(false);
+  res.noalias() = ldm1 * m1;
+  res.noalias() = m1 * rdm1;
+  res.noalias() = ldm1 * m1 * rdm1;
+  res.noalias() = LeftDiagonalMatrix::Identity(rows) * m1 * RightDiagonalMatrix::Zero(cols);
+  internal::set_is_malloc_allowed(true);  
   
   // scalar multiple
   VERIFY_IS_APPROX(LeftDiagonalMatrix(ldm1*s1).diagonal(), ldm1.diagonal() * s1);
@@ -112,6 +131,13 @@ template<typename MatrixType> void diagonalmatrices(const MatrixType& m)
   VERIFY_IS_APPROX( sq_m3 = v1.asDiagonal() + v2.asDiagonal(), sq_m1 + sq_m2);
   VERIFY_IS_APPROX( sq_m3 = v1.asDiagonal() - v2.asDiagonal(), sq_m1 - sq_m2);
   VERIFY_IS_APPROX( sq_m3 = v1.asDiagonal() - 2*v2.asDiagonal() + v1.asDiagonal(), sq_m1 - 2*sq_m2 + sq_m1);
+
+  // Zero and Identity
+  LeftDiagonalMatrix zero = LeftDiagonalMatrix::Zero(rows);
+  LeftDiagonalMatrix identity = LeftDiagonalMatrix::Identity(rows);
+  VERIFY_IS_APPROX(identity.diagonal().sum(), Scalar(rows));
+  VERIFY_IS_APPROX(zero.diagonal().sum(), Scalar(0));
+  VERIFY_IS_APPROX((zero + 2 * LeftDiagonalMatrix::Identity(rows)).diagonal().sum(), Scalar(2 * rows));
 }
 
 template<typename MatrixType> void as_scalar_product(const MatrixType& m)
diff --git a/libs/eigen/test/dynalloc.cpp b/libs/eigen/test/dynalloc.cpp
index 23c90a7..cdc10ee 100644
--- a/libs/eigen/test/dynalloc.cpp
+++ b/libs/eigen/test/dynalloc.cpp
@@ -20,9 +20,12 @@ typedef Matrix<float,8,1> Vector8f;
 
 void check_handmade_aligned_malloc()
 {
+  // Hand-make alignment needs at least sizeof(void*) to store the offset.
+  constexpr int alignment = (std::max<int>)(EIGEN_DEFAULT_ALIGN_BYTES, sizeof(void*));
+  
   for(int i = 1; i < 1000; i++)
   {
-    char *p = (char*)internal::handmade_aligned_malloc(i);
+    char *p = (char*)internal::handmade_aligned_malloc(i, alignment);
     VERIFY(internal::UIntPtr(p)%ALIGNMENT==0);
     // if the buffer is wrongly allocated this will give a bad write --> check with valgrind
     for(int j = 0; j < i; j++) p[j]=0;
diff --git a/libs/eigen/test/eigensolver_generalized_real.cpp b/libs/eigen/test/eigensolver_generalized_real.cpp
index 95ed431..a0c99b1 100644
--- a/libs/eigen/test/eigensolver_generalized_real.cpp
+++ b/libs/eigen/test/eigensolver_generalized_real.cpp
@@ -85,6 +85,42 @@ template<typename MatrixType> void generalized_eigensolver_real(const MatrixType
   }
 }
 
+template<typename MatrixType>
+void generalized_eigensolver_assert() {
+    GeneralizedEigenSolver<MatrixType> eig;
+    // all raise assert if uninitialized
+    VERIFY_RAISES_ASSERT(eig.info());
+    VERIFY_RAISES_ASSERT(eig.eigenvectors());
+    VERIFY_RAISES_ASSERT(eig.eigenvalues());
+    VERIFY_RAISES_ASSERT(eig.alphas());
+    VERIFY_RAISES_ASSERT(eig.betas());
+
+    // none raise assert after compute called
+    eig.compute(MatrixType::Random(20, 20), MatrixType::Random(20, 20));
+    VERIFY(eig.info() == Success);
+    eig.eigenvectors();
+    eig.eigenvalues();
+    eig.alphas();
+    eig.betas();
+
+    // eigenvectors() raises assert, if eigenvectors were not requested
+    eig.compute(MatrixType::Random(20, 20), MatrixType::Random(20, 20), false);
+    VERIFY(eig.info() == Success);
+    VERIFY_RAISES_ASSERT(eig.eigenvectors());
+    eig.eigenvalues();
+    eig.alphas();
+    eig.betas();
+
+    // all except info raise assert if realQZ did not converge
+    eig.setMaxIterations(0); // force real QZ to fail.
+    eig.compute(MatrixType::Random(20, 20), MatrixType::Random(20, 20));
+    VERIFY(eig.info() == NoConvergence);
+    VERIFY_RAISES_ASSERT(eig.eigenvectors());
+    VERIFY_RAISES_ASSERT(eig.eigenvalues());
+    VERIFY_RAISES_ASSERT(eig.alphas());
+    VERIFY_RAISES_ASSERT(eig.betas());
+}
+
 EIGEN_DECLARE_TEST(eigensolver_generalized_real)
 {
   for(int i = 0; i < g_repeat; i++) {
@@ -98,6 +134,7 @@ EIGEN_DECLARE_TEST(eigensolver_generalized_real)
     CALL_SUBTEST_2( generalized_eigensolver_real(MatrixXd(2,2)) );
     CALL_SUBTEST_3( generalized_eigensolver_real(Matrix<double,1,1>()) );
     CALL_SUBTEST_4( generalized_eigensolver_real(Matrix2d()) );
+    CALL_SUBTEST_5( generalized_eigensolver_assert<MatrixXd>() );
     TEST_SET_BUT_UNUSED_VARIABLE(s)
   }
 }
diff --git a/libs/eigen/test/evaluators.cpp b/libs/eigen/test/evaluators.cpp
index 2810cd2..95bfb45 100644
--- a/libs/eigen/test/evaluators.cpp
+++ b/libs/eigen/test/evaluators.cpp
@@ -510,7 +510,9 @@ EIGEN_DECLARE_TEST(evaluators)
     const size_t K = 2;
     const size_t N = 5;
     float *destMem = new float[(M*N) + 1];
-    float *dest = (internal::UIntPtr(destMem)%EIGEN_MAX_ALIGN_BYTES) == 0 ? destMem+1 : destMem;
+    // In case of no alignment, avoid division by zero.
+    constexpr int alignment = (std::max<int>)(EIGEN_MAX_ALIGN_BYTES, 1);
+    float *dest = (internal::UIntPtr(destMem)%alignment) == 0 ? destMem+1 : destMem;
 
     const Matrix<float, Dynamic, Dynamic, RowMajor> a = Matrix<float, Dynamic, Dynamic, RowMajor>::Random(M, K);
     const Matrix<float, Dynamic, Dynamic, RowMajor> b = Matrix<float, Dynamic, Dynamic, RowMajor>::Random(K, N);
diff --git a/libs/eigen/test/geo_alignedbox.cpp b/libs/eigen/test/geo_alignedbox.cpp
index 7b1684f..e4dab32 100644
--- a/libs/eigen/test/geo_alignedbox.cpp
+++ b/libs/eigen/test/geo_alignedbox.cpp
@@ -211,7 +211,7 @@ MatrixType randomRotationMatrix()
   // https://www.isprs-ann-photogramm-remote-sens-spatial-inf-sci.net/III-7/103/2016/isprs-annals-III-7-103-2016.pdf
   const MatrixType rand = MatrixType::Random();
   const MatrixType q = rand.householderQr().householderQ();
-  const JacobiSVD<MatrixType> svd = q.jacobiSvd(ComputeFullU | ComputeFullV);
+  const JacobiSVD<MatrixType, ComputeFullU | ComputeFullV> svd(q);
   const typename MatrixType::Scalar det = (svd.matrixU() * svd.matrixV().transpose()).determinant();
   MatrixType diag = rand.Identity();
   diag(MatrixType::RowsAtCompileTime - 1, MatrixType::ColsAtCompileTime - 1) = det;
diff --git a/libs/eigen/test/geo_eulerangles.cpp b/libs/eigen/test/geo_eulerangles.cpp
index 693c627..bea2419 100644
--- a/libs/eigen/test/geo_eulerangles.cpp
+++ b/libs/eigen/test/geo_eulerangles.cpp
@@ -26,7 +26,7 @@ void verify_euler(const Matrix<Scalar,3,1>& ea, int i, int j, int k)
   VERIFY_IS_APPROX(m,  mbis); 
   /* If I==K, and ea[1]==0, then there no unique solution. */ 
   /* The remark apply in the case where I!=K, and |ea[1]| is close to pi/2. */ 
-  if( (i!=k || ea[1]!=0) && (i==k || !internal::isApprox(abs(ea[1]),Scalar(EIGEN_PI/2),test_precision<Scalar>())) ) 
+  if((i!=k || !numext::is_exactly_zero(ea[1])) && (i == k || !internal::isApprox(abs(ea[1]), Scalar(EIGEN_PI / 2), test_precision<Scalar>())) )
     VERIFY((ea-eabis).norm() <= test_precision<Scalar>());
   
   // approx_or_less_than does not work for 0
diff --git a/libs/eigen/test/geo_orthomethods.cpp b/libs/eigen/test/geo_orthomethods.cpp
index b7b6607..64b3927 100644
--- a/libs/eigen/test/geo_orthomethods.cpp
+++ b/libs/eigen/test/geo_orthomethods.cpp
@@ -73,8 +73,39 @@ template<typename Scalar> void orthomethods_3()
   // check mixed product
   typedef Matrix<RealScalar, 3, 1> RealVector3;
   RealVector3 rv1 = RealVector3::Random();
-  VERIFY_IS_APPROX(v1.cross(rv1.template cast<Scalar>()), v1.cross(rv1));
-  VERIFY_IS_APPROX(rv1.template cast<Scalar>().cross(v1), rv1.cross(v1));
+  v2 = rv1.template cast<Scalar>();
+  VERIFY_IS_APPROX(v1.cross(v2), v1.cross(rv1));
+  VERIFY_IS_APPROX(v2.cross(v1), rv1.cross(v1));
+}
+
+template<typename Scalar> void orthomethods_2()
+{
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  typedef Matrix<Scalar,2,1> Vector2;
+  typedef Matrix<Scalar,3,1> Vector3;
+
+  Vector3 v30 = Vector3::Random(),
+          v31 = Vector3::Random();
+  Vector2 v20 = v30.template head<2>();
+  Vector2 v21 = v31.template head<2>();
+
+  VERIFY_IS_MUCH_SMALLER_THAN(v20.cross(v20), Scalar(1));
+  VERIFY_IS_MUCH_SMALLER_THAN(v21.cross(v21), Scalar(1));
+  VERIFY_IS_APPROX(v20.cross(v21), v30.cross(v31).z());
+  
+  Vector2 v20Rot90(numext::conj(-v20.y()), numext::conj(v20.x()));
+  VERIFY_IS_APPROX(v20.cross( v20Rot90),  v20.squaredNorm());
+  VERIFY_IS_APPROX(v20.cross(-v20Rot90), -v20.squaredNorm());
+  Vector2 v21Rot90(numext::conj(-v21.y()), numext::conj(v21.x()));
+  VERIFY_IS_APPROX(v21.cross( v21Rot90),  v21.squaredNorm());
+  VERIFY_IS_APPROX(v21.cross(-v21Rot90), -v21.squaredNorm());
+
+  // check mixed product
+  typedef Matrix<RealScalar, 2, 1> RealVector2;
+  RealVector2 rv21 = RealVector2::Random();
+  v21 = rv21.template cast<Scalar>();
+  VERIFY_IS_APPROX(v20.cross(v21), v20.cross(rv21));
+  VERIFY_IS_APPROX(v21.cross(v20), rv21.cross(v20));
 }
 
 template<typename Scalar, int Size> void orthomethods(int size=Size)
@@ -118,6 +149,9 @@ template<typename Scalar, int Size> void orthomethods(int size=Size)
 EIGEN_DECLARE_TEST(geo_orthomethods)
 {
   for(int i = 0; i < g_repeat; i++) {
+    CALL_SUBTEST_1( orthomethods_2<float>() );
+    CALL_SUBTEST_2( orthomethods_2<double>() );
+    CALL_SUBTEST_4( orthomethods_2<std::complex<double> >() );
     CALL_SUBTEST_1( orthomethods_3<float>() );
     CALL_SUBTEST_2( orthomethods_3<double>() );
     CALL_SUBTEST_4( orthomethods_3<std::complex<double> >() );
diff --git a/libs/eigen/test/geo_quaternion.cpp b/libs/eigen/test/geo_quaternion.cpp
index c561fc8..a821cf2 100644
--- a/libs/eigen/test/geo_quaternion.cpp
+++ b/libs/eigen/test/geo_quaternion.cpp
@@ -286,15 +286,13 @@ template<typename PlainObjectType> void check_const_correctness(const PlainObjec
   // CMake can help with that.
 
   // verify that map-to-const don't have LvalueBit
-  typedef typename internal::add_const<PlainObjectType>::type ConstPlainObjectType;
+  typedef std::add_const_t<PlainObjectType> ConstPlainObjectType;
   VERIFY( !(internal::traits<Map<ConstPlainObjectType> >::Flags & LvalueBit) );
   VERIFY( !(internal::traits<Map<ConstPlainObjectType, Aligned> >::Flags & LvalueBit) );
   VERIFY( !(Map<ConstPlainObjectType>::Flags & LvalueBit) );
   VERIFY( !(Map<ConstPlainObjectType, Aligned>::Flags & LvalueBit) );
 }
 
-#if EIGEN_HAS_RVALUE_REFERENCES
-
 // Regression for bug 1573
 struct MovableClass {
   // The following line is a workaround for gcc 4.7 and 4.8 (see bug 1573 comments).
@@ -307,8 +305,6 @@ struct MovableClass {
   Quaternionf m_quat;
 };
 
-#endif
-
 EIGEN_DECLARE_TEST(geo_quaternion)
 {
   for(int i = 0; i < g_repeat; i++) {
diff --git a/libs/eigen/test/gpu_basic.cu b/libs/eigen/test/gpu_basic.cu
index 4298da3..e424a93 100644
--- a/libs/eigen/test/gpu_basic.cu
+++ b/libs/eigen/test/gpu_basic.cu
@@ -138,10 +138,12 @@ struct complex_operators {
     out[out_idx++] = a / numext::real(b);
     out[out_idx++] = numext::real(a) / b;
     
+#if !defined(EIGEN_COMP_MSVC)
     out[out_idx] = a; out[out_idx++] += b;
     out[out_idx] = a; out[out_idx++] -= b;
     out[out_idx] = a; out[out_idx++] *= b;
     out[out_idx] = a; out[out_idx++] /= b;
+#endif
     
     const ComplexType true_value = ComplexType(ValueType(1), ValueType(0));
     const ComplexType false_value = ComplexType(ValueType(0), ValueType(0));
@@ -188,6 +190,7 @@ struct complex_operators {
     res.segment(block_idx, size) = x1.real().array() / x2.array();
     block_idx += size;
     
+#if !defined(EIGEN_COMP_MSVC)
     res.segment(block_idx, size) = x1; res.segment(block_idx, size) += x2;
     block_idx += size;
     res.segment(block_idx, size) = x1; res.segment(block_idx, size) -= x2;
@@ -196,6 +199,7 @@ struct complex_operators {
     block_idx += size;
     res.segment(block_idx, size) = x1; res.segment(block_idx, size).array() /= x2.array();
     block_idx += size;
+#endif
 
     const T true_vector = T::Constant(true_value);
     const T false_vector = T::Constant(false_value);
diff --git a/libs/eigen/test/gpu_example.cu b/libs/eigen/test/gpu_example.cu
new file mode 100644
index 0000000..a69f5ea
--- /dev/null
+++ b/libs/eigen/test/gpu_example.cu
@@ -0,0 +1,129 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2021 The Eigen Team.
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+// The following is an example GPU test.
+
+#include "main.h"  // Include the main test utilities.
+
+// Define a kernel functor.
+//
+// The kernel must be a POD type and implement operator().
+struct AddKernel {
+  // Parameters must be POD or serializable Eigen types (e.g. Matrix,
+  // Array). The return value must be a POD or serializable value type.
+  template<typename Type1, typename Type2, typename Type3>
+  EIGEN_DEVICE_FUNC
+  Type3 operator()(const Type1& A, const Type2& B, Type3& C) const {
+    C = A + B;       // Populate output parameter.
+    Type3 D = A + B; // Populate return value.
+    return D;
+  }
+};
+
+// Define a sub-test that uses the kernel.
+template <typename T>
+void test_add(const T& type) {
+  const Index rows = type.rows();
+  const Index cols = type.cols();
+
+  // Create random inputs.
+  const T A = T::Random(rows, cols);
+  const T B = T::Random(rows, cols);
+  T C; // Output parameter.
+
+  // Create kernel.
+  AddKernel add_kernel;
+
+  // Run add_kernel(A, B, C) via run(...).
+  // This will run on the GPU if using a GPU compiler, or CPU otherwise,
+  // facilitating generic tests that can run on either.
+  T D = run(add_kernel, A, B, C);
+
+  // Check that both output parameter and return value are correctly populated.
+  const T expected = A + B;
+  VERIFY_IS_CWISE_EQUAL(C, expected);
+  VERIFY_IS_CWISE_EQUAL(D, expected);
+
+  // In a GPU-only test, we can verify that the CPU and GPU produce the
+  // same results.
+  T C_cpu, C_gpu;
+  T D_cpu = run_on_cpu(add_kernel, A, B, C_cpu); // Runs on CPU.
+  T D_gpu = run_on_gpu(add_kernel, A, B, C_gpu); // Runs on GPU.
+  VERIFY_IS_CWISE_EQUAL(C_cpu, C_gpu);
+  VERIFY_IS_CWISE_EQUAL(D_cpu, D_gpu);
+};
+
+struct MultiplyKernel {
+  template<typename Type1, typename Type2, typename Type3>
+  EIGEN_DEVICE_FUNC
+  Type3 operator()(const Type1& A, const Type2& B, Type3& C) const {
+    C = A * B;
+    return A * B;
+  }
+};
+
+template <typename T1, typename T2, typename T3>
+void test_multiply(const T1& type1, const T2& type2, const T3& type3) {
+  const T1 A = T1::Random(type1.rows(), type1.cols());
+  const T2 B = T2::Random(type2.rows(), type2.cols());
+  T3 C;
+
+  MultiplyKernel multiply_kernel;
+
+  // The run(...) family of functions uses a memory buffer to transfer data back
+  // and forth to and from the device.  The size of this buffer is estimated
+  // from the size of all input parameters.  If the estimated buffer size is
+  // not sufficient for transferring outputs from device-to-host, then an
+  // explicit buffer size needs to be specified.
+
+  // 2 outputs of size (A * B). For each matrix output, the buffer will store
+  // the number of rows, columns, and the data.
+  size_t buffer_capacity_hint = 2 * (                     // 2 output parameters
+    2 * sizeof(typename T3::Index)                        // # Rows, # Cols
+    + A.rows() * B.cols() * sizeof(typename T3::Scalar)); // Output data
+
+  T3 D = run_with_hint(buffer_capacity_hint, multiply_kernel, A, B, C);
+
+  const T3 expected = A * B;
+  VERIFY_IS_CWISE_APPROX(C, expected);
+  VERIFY_IS_CWISE_APPROX(D, expected);
+
+  T3 C_cpu, C_gpu;
+  T3 D_cpu = run_on_cpu(multiply_kernel, A, B, C_cpu);
+  T3 D_gpu = run_on_gpu_with_hint(buffer_capacity_hint,
+                                  multiply_kernel, A, B, C_gpu);
+  VERIFY_IS_CWISE_APPROX(C_cpu, C_gpu);
+  VERIFY_IS_CWISE_APPROX(D_cpu, D_gpu);
+}
+
+// Declare the test fixture.
+EIGEN_DECLARE_TEST(gpu_example)
+{
+  // For the number of repeats, call the desired subtests.
+  for(int i = 0; i < g_repeat; i++) {
+    // Call subtests with different sized/typed inputs.
+    CALL_SUBTEST( test_add(Eigen::Vector3f()) );
+    CALL_SUBTEST( test_add(Eigen::Matrix3d()) );
+    CALL_SUBTEST( test_add(Eigen::MatrixX<int>(10, 10)) );
+
+    CALL_SUBTEST( test_add(Eigen::Array44f()) );
+    CALL_SUBTEST( test_add(Eigen::ArrayXd(20)) );
+    CALL_SUBTEST( test_add(Eigen::ArrayXXi(13, 17)) );
+
+    CALL_SUBTEST( test_multiply(Eigen::Matrix3d(),
+                                Eigen::Matrix3d(),
+                                Eigen::Matrix3d()) );
+    CALL_SUBTEST( test_multiply(Eigen::MatrixX<int>(10, 10),
+                                Eigen::MatrixX<int>(10, 10),
+                                Eigen::MatrixX<int>()) );
+    CALL_SUBTEST( test_multiply(Eigen::MatrixXf(12, 1),
+                                Eigen::MatrixXf(1, 32),
+                                Eigen::MatrixXf()) );
+  }
+}
diff --git a/libs/eigen/test/gpu_test_helper.h b/libs/eigen/test/gpu_test_helper.h
new file mode 100644
index 0000000..0942466
--- /dev/null
+++ b/libs/eigen/test/gpu_test_helper.h
@@ -0,0 +1,476 @@
+#ifndef GPU_TEST_HELPER_H
+#define GPU_TEST_HELPER_H
+
+#include <Eigen/Core>
+
+#ifdef EIGEN_GPUCC
+#define EIGEN_USE_GPU
+#include "../unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h"
+#endif // EIGEN_GPUCC
+
+// std::tuple cannot be used on device, and there is a bug in cuda < 9.2 that
+// doesn't allow std::tuple to compile for host code either. In these cases,
+// use our custom implementation.
+#if defined(EIGEN_GPU_COMPILE_PHASE) || (defined(EIGEN_CUDACC) && EIGEN_CUDA_SDK_VER < 92000)
+#define EIGEN_USE_CUSTOM_TUPLE 1
+#else
+#define EIGEN_USE_CUSTOM_TUPLE 0
+#endif
+
+#if EIGEN_USE_CUSTOM_TUPLE
+#include "../Eigen/src/Core/arch/GPU/Tuple.h"
+#else
+#include <tuple>
+#endif
+namespace Eigen {
+
+namespace internal {
+
+// Note: cannot re-use tuple_impl, since that will cause havoc for
+// tuple_test.
+namespace test_detail {
+// Use std::tuple on CPU, otherwise use the GPU-specific versions.
+#if !EIGEN_USE_CUSTOM_TUPLE
+using std::tuple;
+using std::get;
+using std::make_tuple;
+using std::tie;
+#else
+using tuple_impl::tuple;
+using tuple_impl::get;
+using tuple_impl::make_tuple;
+using tuple_impl::tie;
+#endif
+#undef EIGEN_USE_CUSTOM_TUPLE
+}  // namespace test_detail
+
+template<size_t N, size_t Idx, typename OutputIndexSequence, typename... Ts>
+struct extract_output_indices_helper;
+
+/**
+ * Extracts a set of indices corresponding to non-const l-value reference
+ * output types.
+ *
+ * \internal
+ * \tparam N the number of types {T1, Ts...}.
+ * \tparam Idx the "index" to append if T1 is an output type.
+ * \tparam OutputIndices the current set of output indices.
+ * \tparam T1 the next type to consider, with index Idx.
+ * \tparam Ts the remaining types.
+ */
+template<size_t N, size_t Idx, size_t... OutputIndices, typename T1, typename... Ts>
+struct extract_output_indices_helper<N, Idx, std::index_sequence<OutputIndices...>, T1, Ts...> {
+  using type = typename
+    extract_output_indices_helper<
+      N - 1, Idx + 1,
+      typename std::conditional<
+        // If is a non-const l-value reference, append index.
+        std::is_lvalue_reference<T1>::value 
+          && !std::is_const<std::remove_reference_t<T1>>::value,
+        std::index_sequence<OutputIndices..., Idx>,
+        std::index_sequence<OutputIndices...> >::type,
+      Ts...>::type;
+};
+
+// Base case.
+template<size_t Idx, size_t... OutputIndices>
+struct extract_output_indices_helper<0, Idx, std::index_sequence<OutputIndices...> > {
+  using type = std::index_sequence<OutputIndices...>;
+};
+
+// Extracts a set of indices into Types... that correspond to non-const
+// l-value references.
+template<typename... Types>
+using extract_output_indices = typename extract_output_indices_helper<sizeof...(Types), 0, std::index_sequence<>, Types...>::type;
+
+// Helper struct for dealing with Generic functors that may return void.
+struct void_helper {
+  struct Void {};
+  
+  // Converts void -> Void, T otherwise.
+  template<typename T>
+  using ReturnType = typename std::conditional<std::is_same<T, void>::value, Void, T>::type;
+  
+  // Non-void return value.
+  template<typename Func, typename... Args>
+  static EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC
+  auto call(Func&& func, Args&&... args) -> 
+      std::enable_if_t<!std::is_same<decltype(func(args...)), void>::value,
+                       decltype(func(args...))> {
+    return func(std::forward<Args>(args)...);
+  }
+  
+  // Void return value.
+  template<typename Func, typename... Args>
+  static EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC
+  auto call(Func&& func, Args&&... args) -> 
+    std::enable_if_t<std::is_same<decltype(func(args...)), void>::value,
+                     Void> {
+    func(std::forward<Args>(args)...);
+    return Void{};
+  }
+  
+  // Restores the original return type, Void -> void, T otherwise.
+  template<typename T>
+  static EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC
+  std::enable_if_t<!std::is_same<typename std::decay<T>::type, Void>::value, T>
+  restore(T&& val) {
+    return val;
+  }
+  
+  // Void case.
+  template<typename T = void>
+  static EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC
+  void restore(const Void&) {}
+};
+
+// Runs a kernel via serialized buffer.  Does this by deserializing the buffer
+// to construct the arguments, calling the kernel, then re-serialing the outputs.
+// The buffer contains
+//     [ input_buffer_size, args ]
+// After the kernel call, it is then populated with
+//     [ output_buffer_size, output_parameters, return_value ]
+// If the output_buffer_size exceeds the buffer's capacity, then only the
+// output_buffer_size is populated.
+template<typename Kernel, typename... Args, size_t... Indices, size_t... OutputIndices>
+EIGEN_DEVICE_FUNC
+void run_serialized(std::index_sequence<Indices...>, std::index_sequence<OutputIndices...>,
+                    Kernel kernel, uint8_t* buffer, size_t capacity) {
+  using test_detail::get;
+  using test_detail::make_tuple;
+  using test_detail::tuple;
+  // Deserialize input size and inputs.
+  size_t input_size;
+  const uint8_t* read_ptr = buffer;
+  const uint8_t* read_end = buffer + capacity;
+  read_ptr = Eigen::deserialize(read_ptr, read_end, input_size);
+  // Create value-type instances to populate.
+  auto args = make_tuple(typename std::decay<Args>::type{}...);
+  EIGEN_UNUSED_VARIABLE(args) // Avoid NVCC compile warning.
+  // NVCC 9.1 requires us to spell out the template parameters explicitly.
+  read_ptr = Eigen::deserialize(read_ptr, read_end, get<Indices, typename std::decay<Args>::type...>(args)...);
+  
+  // Call function, with void->Void conversion so we are guaranteed a complete
+  // output type.
+  auto result = void_helper::call(kernel, get<Indices, typename std::decay<Args>::type...>(args)...);
+  
+  // Determine required output size.
+  size_t output_size = Eigen::serialize_size(capacity);
+  output_size += Eigen::serialize_size(get<OutputIndices, typename std::decay<Args>::type...>(args)...);
+  output_size += Eigen::serialize_size(result);
+  
+  // Always serialize required buffer size.
+  uint8_t* write_ptr = buffer;
+  uint8_t* write_end = buffer + capacity;
+  write_ptr = Eigen::serialize(write_ptr, write_end, output_size);
+  // Null `write_ptr` can be safely passed along.
+  // Serialize outputs if they fit in the buffer.
+  if (output_size <= capacity) {
+    // Collect outputs and result.
+    write_ptr = Eigen::serialize(write_ptr, write_end, get<OutputIndices, typename std::decay<Args>::type...>(args)...);
+    write_ptr = Eigen::serialize(write_ptr, write_end, result);
+  }
+}
+
+template<typename Kernel, typename... Args>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+void run_serialized(Kernel kernel, uint8_t* buffer, size_t capacity) {
+  run_serialized<Kernel, Args...> (std::make_index_sequence<sizeof...(Args)>{},
+                                   extract_output_indices<Args...>{},
+                                   kernel, buffer, capacity);
+}
+
+#ifdef EIGEN_GPUCC
+
+// Checks for GPU errors and asserts / prints the error message.
+#define GPU_CHECK(expr)                                                \
+do {                                                                   \
+  gpuError_t err = expr;                                               \
+  if (err != gpuSuccess) {                                             \
+    printf("%s: %s\n", gpuGetErrorName(err), gpuGetErrorString(err));  \
+    gpu_assert(false);                                                 \
+  }                                                                    \
+} while(0)
+
+// Calls run_serialized on the GPU.
+template<typename Kernel, typename... Args>
+__global__
+EIGEN_HIP_LAUNCH_BOUNDS_1024
+void run_serialized_on_gpu_meta_kernel(const Kernel kernel, uint8_t* buffer, size_t capacity) {
+  run_serialized<Kernel, Args...>(kernel, buffer, capacity);
+}
+
+// Runs kernel(args...) on the GPU via the serialization mechanism. 
+//
+// Note: this may end up calling the kernel multiple times if the initial output
+// buffer is not large enough to hold the outputs.
+template<typename Kernel, typename... Args, size_t... Indices, size_t... OutputIndices>
+auto run_serialized_on_gpu(size_t buffer_capacity_hint,
+                           std::index_sequence<Indices...>,
+                           std::index_sequence<OutputIndices...>,
+                           Kernel kernel, Args&&... args) -> decltype(kernel(args...)) {  
+  // Compute the required serialization buffer capacity.
+  // Round up input size to next power of two to give a little extra room
+  // for outputs.
+  size_t input_data_size = sizeof(size_t) + Eigen::serialize_size(args...);
+  
+  size_t capacity;
+  if (buffer_capacity_hint == 0) {
+    // Estimate as the power of two larger than the total input size.
+    capacity = sizeof(size_t);
+    while (capacity <= input_data_size) {
+      capacity *= 2;
+    }
+  } else {
+    // Use the larger of the hint and the total input size.
+    // Add sizeof(size_t) to the hint to account for storing the buffer capacity
+    // itself so the user doesn't need to think about this.
+    capacity = std::max<size_t>(buffer_capacity_hint + sizeof(size_t),
+                                input_data_size);
+  }
+  std::vector<uint8_t> buffer(capacity);
+  
+  uint8_t* host_data = nullptr;
+  uint8_t* host_data_end = nullptr;
+  uint8_t* host_ptr = nullptr;
+  uint8_t* device_data = nullptr;
+  size_t output_data_size = 0;
+  
+  // Allocate buffers and copy input data.
+  capacity = std::max<size_t>(capacity, output_data_size);
+  buffer.resize(capacity);
+  host_data = buffer.data();
+  host_data_end = buffer.data() + capacity;
+  host_ptr = Eigen::serialize(host_data, host_data_end, input_data_size);
+  host_ptr = Eigen::serialize(host_ptr, host_data_end, args...);
+  
+  // Copy inputs to host.
+  gpuMalloc((void**)(&device_data), capacity);
+  gpuMemcpy(device_data, buffer.data(), input_data_size, gpuMemcpyHostToDevice);
+  GPU_CHECK(gpuDeviceSynchronize());
+      
+  // Run kernel.
+  #ifdef EIGEN_USE_HIP
+    hipLaunchKernelGGL(
+        HIP_KERNEL_NAME(run_serialized_on_gpu_meta_kernel<Kernel, Args...>), 
+        1, 1, 0, 0, kernel, device_data, capacity);
+  #else
+    run_serialized_on_gpu_meta_kernel<Kernel, Args...><<<1,1>>>(
+        kernel, device_data, capacity);
+  #endif
+  // Check pre-launch and kernel execution errors.
+  GPU_CHECK(gpuGetLastError());
+  GPU_CHECK(gpuDeviceSynchronize());
+  // Copy back new output to host.
+  gpuMemcpy(host_data, device_data, capacity, gpuMemcpyDeviceToHost);
+  gpuFree(device_data);
+  GPU_CHECK(gpuDeviceSynchronize());
+  
+  // Determine output buffer size.
+  const uint8_t* c_host_ptr = Eigen::deserialize(host_data, host_data_end, output_data_size);
+  // If the output doesn't fit in the buffer, spit out warning and fail.
+  if (output_data_size > capacity) {
+    std::cerr << "The serialized output does not fit in the output buffer, "
+              << output_data_size << " vs capacity " << capacity << "."
+              << std::endl
+              << "Try specifying a minimum buffer capacity: " << std::endl
+              << "  run_with_hint(" << output_data_size << ", ...)"
+              << std::endl;
+    VERIFY(false);
+  }
+  
+  // Deserialize outputs.
+  auto args_tuple = test_detail::tie(args...);
+  EIGEN_UNUSED_VARIABLE(args_tuple)  // Avoid NVCC compile warning.
+  c_host_ptr = Eigen::deserialize(c_host_ptr, host_data_end, test_detail::get<OutputIndices, Args&...>(args_tuple)...);
+  
+  // Maybe deserialize return value, properly handling void.
+  typename void_helper::ReturnType<decltype(kernel(args...))> result;
+  c_host_ptr = Eigen::deserialize(c_host_ptr, host_data_end, result);
+  return void_helper::restore(result);
+}
+
+#endif // EIGEN_GPUCC
+
+} // namespace internal
+
+/**
+ * Runs a kernel on the CPU, returning the results.
+ * \param kernel kernel to run.
+ * \param args ... input arguments.
+ * \return kernel(args...).
+ */
+template<typename Kernel, typename... Args>
+auto run_on_cpu(Kernel kernel, Args&&... args) -> decltype(kernel(args...)){  
+  return kernel(std::forward<Args>(args)...);
+}
+
+#ifdef EIGEN_GPUCC
+
+/**
+ * Runs a kernel on the GPU, returning the results.
+ * 
+ * The kernel must be able to be passed directly as an input to a global
+ * function (i.e. empty or POD).  Its inputs must be "Serializable" so we
+ * can transfer them to the device, and the output must be a Serializable value
+ * type so it can be transferred back from the device.
+ * 
+ * \param kernel kernel to run.
+ * \param args ... input arguments, must be "Serializable".
+ * \return kernel(args...).
+ */
+template<typename Kernel, typename... Args>
+auto run_on_gpu(Kernel kernel, Args&&... args) -> decltype(kernel(args...)){  
+  return internal::run_serialized_on_gpu<Kernel, Args...>(
+      /*buffer_capacity_hint=*/ 0,
+      std::make_index_sequence<sizeof...(Args)>{},
+      internal::extract_output_indices<Args...>{},
+      kernel, std::forward<Args>(args)...);
+}
+
+/**
+ * Runs a kernel on the GPU, returning the results.
+ *
+ * This version allows specifying a minimum buffer capacity size required for
+ * serializing the puts to transfer results from device to host.  Use this when
+ * `run_on_gpu(...)` fails to determine an appropriate capacity by default.
+ *
+ * \param buffer_capacity_hint minimum required buffer size for serializing
+ *        outputs.
+ * \param kernel kernel to run.
+ * \param args ... input arguments, must be "Serializable".
+ * \return kernel(args...).
+ * \sa run_on_gpu
+ */
+template<typename Kernel, typename... Args>
+auto run_on_gpu_with_hint(size_t buffer_capacity_hint, 
+    Kernel kernel, Args&&... args) -> decltype(kernel(args...)){  
+  return internal::run_serialized_on_gpu<Kernel, Args...>(
+      buffer_capacity_hint,
+      std::make_index_sequence<sizeof...(Args)>{},
+      internal::extract_output_indices<Args...>{},
+      kernel, std::forward<Args>(args)...);
+}
+
+/**
+ * Kernel for determining basic Eigen compile-time information
+ * (i.e. the cuda/hip arch)
+ */
+struct CompileTimeDeviceInfoKernel {
+  struct Info {
+    int cuda;
+    int hip;
+  };
+  
+  EIGEN_DEVICE_FUNC
+  Info operator()() const
+  {
+    Info info = {-1, -1};
+    #if defined(__CUDA_ARCH__)
+    info.cuda = static_cast<int>(__CUDA_ARCH__ +0);
+    #endif
+    #if defined(EIGEN_HIP_DEVICE_COMPILE)
+    info.hip = static_cast<int>(EIGEN_HIP_DEVICE_COMPILE +0);
+    #endif
+    return info;
+  }
+};
+
+/**
+ * Queries and prints the compile-time and runtime GPU info.
+ */
+void print_gpu_device_info()
+{
+  int device = 0;
+  gpuDeviceProp_t deviceProp;
+  gpuGetDeviceProperties(&deviceProp, device);
+
+  auto info = run_on_gpu(CompileTimeDeviceInfoKernel());
+
+  std::cout << "GPU compile-time info:\n";
+  
+  #ifdef EIGEN_CUDACC
+  std::cout << "  EIGEN_CUDACC:                " << int(EIGEN_CUDACC) << std::endl;
+  #endif
+  
+  #ifdef EIGEN_CUDA_SDK_VER
+  std::cout << "  EIGEN_CUDA_SDK_VER:          " << int(EIGEN_CUDA_SDK_VER) << std::endl;
+  #endif
+
+  #ifdef EIGEN_COMP_NVCC
+  std::cout << "  EIGEN_COMP_NVCC:             " << int(EIGEN_COMP_NVCC) << std::endl;
+  #endif
+  
+  #ifdef EIGEN_HIPCC
+  std::cout << "  EIGEN_HIPCC:                 " << int(EIGEN_HIPCC) << std::endl;
+  #endif
+
+  std::cout << "  EIGEN_CUDA_ARCH:             " << info.cuda << std::endl;  
+  std::cout << "  EIGEN_HIP_DEVICE_COMPILE:    " << info.hip << std::endl;
+
+  std::cout << "GPU device info:\n";
+  std::cout << "  name:                        " << deviceProp.name << std::endl;
+  std::cout << "  capability:                  " << deviceProp.major << "." << deviceProp.minor << std::endl;
+  std::cout << "  multiProcessorCount:         " << deviceProp.multiProcessorCount << std::endl;
+  std::cout << "  maxThreadsPerMultiProcessor: " << deviceProp.maxThreadsPerMultiProcessor << std::endl;
+  std::cout << "  warpSize:                    " << deviceProp.warpSize << std::endl;
+  std::cout << "  regsPerBlock:                " << deviceProp.regsPerBlock << std::endl;
+  std::cout << "  concurrentKernels:           " << deviceProp.concurrentKernels << std::endl;
+  std::cout << "  clockRate:                   " << deviceProp.clockRate << std::endl;
+  std::cout << "  canMapHostMemory:            " << deviceProp.canMapHostMemory << std::endl;
+  std::cout << "  computeMode:                 " << deviceProp.computeMode << std::endl;
+}
+
+#endif // EIGEN_GPUCC
+
+/**
+ * Runs a kernel on the GPU (if EIGEN_GPUCC), or CPU otherwise.
+ * 
+ * This is to better support creating generic tests.
+ * 
+ * The kernel must be able to be passed directly as an input to a global
+ * function (i.e. empty or POD).  Its inputs must be "Serializable" so we
+ * can transfer them to the device, and the output must be a Serializable value
+ * type so it can be transferred back from the device.
+ * 
+ * \param kernel kernel to run.
+ * \param args ... input arguments, must be "Serializable".
+ * \return kernel(args...).
+ */
+template<typename Kernel, typename... Args>
+auto run(Kernel kernel, Args&&... args) -> decltype(kernel(args...)){
+#ifdef EIGEN_GPUCC
+  return run_on_gpu(kernel, std::forward<Args>(args)...);
+#else
+  return run_on_cpu(kernel, std::forward<Args>(args)...);
+#endif
+}
+
+/**
+ * Runs a kernel on the GPU (if EIGEN_GPUCC), or CPU otherwise.
+ * 
+ * This version allows specifying a minimum buffer capacity size required for
+ * serializing the puts to transfer results from device to host.  Use this when
+ * `run(...)` fails to determine an appropriate capacity by default.
+ *
+ * \param buffer_capacity_hint minimum required buffer size for serializing
+ *        outputs.
+ * \param kernel kernel to run.
+ * \param args ... input arguments, must be "Serializable".
+ * \return kernel(args...).
+ * \sa run
+ */
+template<typename Kernel, typename... Args>
+auto run_with_hint(size_t buffer_capacity_hint,
+    Kernel kernel, Args&&... args) -> decltype(kernel(args...)){
+#ifdef EIGEN_GPUCC
+  return run_on_gpu_with_hint(buffer_capacity_hint, kernel, std::forward<Args>(args)...);
+#else
+  EIGEN_UNUSED_VARIABLE(buffer_capacity_hint)
+  return run_on_cpu(kernel, std::forward<Args>(args)...);
+#endif
+}
+
+} // namespace Eigen
+
+#endif // GPU_TEST_HELPER_H
diff --git a/libs/eigen/test/half_float.cpp b/libs/eigen/test/half_float.cpp
index 729de1b..00a8b48 100644
--- a/libs/eigen/test/half_float.cpp
+++ b/libs/eigen/test/half_float.cpp
@@ -157,6 +157,12 @@ void test_numtraits()
   VERIFY( (std::numeric_limits<half>::denorm_min)() > half(0.f) );
   VERIFY( (std::numeric_limits<half>::min)()/half(2) > half(0.f) );
   VERIFY_IS_EQUAL( (std::numeric_limits<half>::denorm_min)()/half(2), half(0.f) );
+
+  // Test to see that we are able to link against the symbols for digits and
+  // digits10.
+  volatile const int& digits10 = std::numeric_limits<half>::digits10;
+  volatile const int& digits = std::numeric_limits<half>::digits;
+  VERIFY( (digits10) != (digits) );
 }
 
 void test_arithmetic()
@@ -224,6 +230,8 @@ void test_comparison()
 
 void test_basic_functions()
 {
+  constexpr float PI = static_cast<float>(EIGEN_PI);
+
   VERIFY_IS_EQUAL(float(numext::abs(half(3.5f))), 3.5f);
   VERIFY_IS_EQUAL(float(abs(half(3.5f))), 3.5f);
   VERIFY_IS_EQUAL(float(numext::abs(half(-3.5f))), 3.5f);
@@ -251,8 +259,8 @@ void test_basic_functions()
 
   VERIFY_IS_EQUAL(float(numext::exp(half(0.0f))), 1.0f);
   VERIFY_IS_EQUAL(float(exp(half(0.0f))), 1.0f);
-  VERIFY_IS_APPROX(float(numext::exp(half(EIGEN_PI))), 20.f + float(EIGEN_PI));
-  VERIFY_IS_APPROX(float(exp(half(EIGEN_PI))), 20.f + float(EIGEN_PI));
+  VERIFY_IS_APPROX(float(numext::exp(half(PI))), 20.f + PI);
+  VERIFY_IS_APPROX(float(exp(half(PI))), 20.f + PI);
 
   VERIFY_IS_EQUAL(float(numext::expm1(half(0.0f))), 0.0f);
   VERIFY_IS_EQUAL(float(expm1(half(0.0f))), 0.0f);
@@ -277,25 +285,26 @@ void test_basic_functions()
 
 void test_trigonometric_functions()
 {
+  constexpr float PI = static_cast<float>(EIGEN_PI);
   VERIFY_IS_APPROX(numext::cos(half(0.0f)), half(cosf(0.0f)));
   VERIFY_IS_APPROX(cos(half(0.0f)), half(cosf(0.0f)));
-  VERIFY_IS_APPROX(numext::cos(half(EIGEN_PI)), half(cosf(EIGEN_PI)));
-  // VERIFY_IS_APPROX(numext::cos(half(EIGEN_PI/2)), half(cosf(EIGEN_PI/2)));
-  // VERIFY_IS_APPROX(numext::cos(half(3*EIGEN_PI/2)), half(cosf(3*EIGEN_PI/2)));
+  VERIFY_IS_APPROX(numext::cos(half(PI)), half(cosf(PI)));
+  // VERIFY_IS_APPROX(numext::cos(half(PI/2)), half(cosf(PI/2)));
+  // VERIFY_IS_APPROX(numext::cos(half(3*PI/2)), half(cosf(3*PI/2)));
   VERIFY_IS_APPROX(numext::cos(half(3.5f)), half(cosf(3.5f)));
 
   VERIFY_IS_APPROX(numext::sin(half(0.0f)), half(sinf(0.0f)));
   VERIFY_IS_APPROX(sin(half(0.0f)), half(sinf(0.0f)));
-  //  VERIFY_IS_APPROX(numext::sin(half(EIGEN_PI)), half(sinf(EIGEN_PI)));
-  VERIFY_IS_APPROX(numext::sin(half(EIGEN_PI/2)), half(sinf(EIGEN_PI/2)));
-  VERIFY_IS_APPROX(numext::sin(half(3*EIGEN_PI/2)), half(sinf(3*EIGEN_PI/2)));
+  //  VERIFY_IS_APPROX(numext::sin(half(PI)), half(sinf(PI)));
+  VERIFY_IS_APPROX(numext::sin(half(PI/2)), half(sinf(PI/2)));
+  VERIFY_IS_APPROX(numext::sin(half(3*PI/2)), half(sinf(3*PI/2)));
   VERIFY_IS_APPROX(numext::sin(half(3.5f)), half(sinf(3.5f)));
 
   VERIFY_IS_APPROX(numext::tan(half(0.0f)), half(tanf(0.0f)));
   VERIFY_IS_APPROX(tan(half(0.0f)), half(tanf(0.0f)));
-  //  VERIFY_IS_APPROX(numext::tan(half(EIGEN_PI)), half(tanf(EIGEN_PI)));
-  //  VERIFY_IS_APPROX(numext::tan(half(EIGEN_PI/2)), half(tanf(EIGEN_PI/2)));
-  //VERIFY_IS_APPROX(numext::tan(half(3*EIGEN_PI/2)), half(tanf(3*EIGEN_PI/2)));
+  //  VERIFY_IS_APPROX(numext::tan(half(PI)), half(tanf(PI)));
+  //  VERIFY_IS_APPROX(numext::tan(half(PI/2)), half(tanf(PI/2)));
+  //VERIFY_IS_APPROX(numext::tan(half(3*PI/2)), half(tanf(3*PI/2)));
   VERIFY_IS_APPROX(numext::tan(half(3.5f)), half(tanf(3.5f)));
 }
 
diff --git a/libs/eigen/test/householder.cpp b/libs/eigen/test/householder.cpp
index cad8138..3a3d047 100644
--- a/libs/eigen/test/householder.cpp
+++ b/libs/eigen/test/householder.cpp
@@ -30,7 +30,7 @@ template<typename MatrixType> void householder(const MatrixType& m)
 
   typedef Matrix<Scalar, MatrixType::ColsAtCompileTime, MatrixType::RowsAtCompileTime> TMatrixType;
   
-  Matrix<Scalar, EIGEN_SIZE_MAX(MatrixType::RowsAtCompileTime,MatrixType::ColsAtCompileTime), 1> _tmp((std::max)(rows,cols));
+  Matrix<Scalar, internal::max_size_prefer_dynamic(MatrixType::RowsAtCompileTime,MatrixType::ColsAtCompileTime), 1> _tmp((std::max)(rows,cols));
   Scalar* tmp = &_tmp.coeffRef(0,0);
 
   Scalar beta;
@@ -133,6 +133,89 @@ template<typename MatrixType> void householder(const MatrixType& m)
   VERIFY_IS_APPROX(m3 * m5, m1); // test evaluating rhseq to a dense matrix, then applying
 }
 
+
+template <typename MatrixType>
+void householder_update(const MatrixType& m) {
+  // This test is covering the internal::householder_qr_inplace_update function.
+  // At time of writing, there is not public API that exposes this update behavior directly,
+  // so we are testing the internal implementation.
+
+  const Index rows = m.rows();
+  const Index cols = m.cols();
+
+  typedef typename MatrixType::Scalar Scalar;
+  typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, 1> VectorType;
+  typedef Matrix<Scalar, Dynamic, 1> HCoeffsVectorType;
+  typedef Matrix<Scalar, Dynamic, Dynamic> MatrixX;
+  typedef Matrix<Scalar, Dynamic, 1> VectorX;
+
+  VectorX tmpOwner(cols);
+  Scalar* tmp = tmpOwner.data();
+
+  // The matrix to factorize.
+  const MatrixType A = MatrixType::Random(rows, cols); 
+
+  // matQR and hCoeffs will hold the factorization of A,
+  // built by a sequence of calls to `update`.
+  MatrixType matQR(rows, cols);
+  HCoeffsVectorType hCoeffs(cols);
+
+  // householder_qr_inplace_update should be able to build a QR factorization one column at a time.
+  // We verify this by starting with an empty factorization and 'updating' one column at a time.
+  // After each call to update, we should have a QR factorization of the columns presented so far.
+
+  const Index size = (std::min)(rows, cols); // QR can only go up to 'size' b/c that's full rank.
+  for (Index k = 0; k != size; ++k)
+  {
+    // Make a copy of the column to prevent any possibility of 'leaking' other parts of A.
+    const VectorType newColumn = A.col(k); 
+    internal::householder_qr_inplace_update(matQR, hCoeffs, newColumn, k, tmp);
+
+    // Verify Property:
+    // matQR.leftCols(k+1) and hCoeffs.head(k+1) hold
+    // a QR factorization of A.leftCols(k+1).
+    // This is the fundamental guarantee of householder_qr_inplace_update.
+    {
+      const MatrixX matQR_k = matQR.leftCols(k + 1);
+      const VectorX hCoeffs_k = hCoeffs.head(k + 1);
+      MatrixX R = matQR_k.template triangularView<Upper>();
+      MatrixX QxR = householderSequence(matQR_k, hCoeffs_k.conjugate()) * R;
+      VERIFY_IS_APPROX(QxR, A.leftCols(k + 1));
+    }
+
+    // Verify Property:
+    // A sequence of calls to 'householder_qr_inplace_update'
+    // should produce the same result as 'householder_qr_inplace_unblocked'.
+    // This is a property of the current implementation.
+    // If these implementations diverge in the future, 
+    // then simply delete the test of this property.
+    {
+      MatrixX QR_at_once = A.leftCols(k + 1);
+      VectorX hCoeffs_at_once(k + 1);
+      internal::householder_qr_inplace_unblocked(QR_at_once, hCoeffs_at_once, tmp);
+      VERIFY_IS_APPROX(QR_at_once, matQR.leftCols(k + 1));
+      VERIFY_IS_APPROX(hCoeffs_at_once, hCoeffs.head(k + 1));
+    }
+  }
+
+  // Verify Property:
+  // We can go back and update any column to have a new value,
+  // and get a QR factorization of the columns up to that one.  
+  {
+    const Index k = internal::random<Index>(0, size - 1);
+    VectorType newColumn = VectorType::Random(rows);      
+    internal::householder_qr_inplace_update(matQR, hCoeffs, newColumn, k, tmp);
+
+    const MatrixX matQR_k = matQR.leftCols(k + 1);
+    const VectorX hCoeffs_k = hCoeffs.head(k + 1);
+    MatrixX R = matQR_k.template triangularView<Upper>();
+    MatrixX QxR = householderSequence(matQR_k, hCoeffs_k.conjugate()) * R;
+    VERIFY_IS_APPROX(QxR.leftCols(k), A.leftCols(k));
+    VERIFY_IS_APPROX(QxR.col(k), newColumn);
+  }  
+}
+
+
 EIGEN_DECLARE_TEST(householder)
 {
   for(int i = 0; i < g_repeat; i++) {
@@ -144,5 +227,9 @@ EIGEN_DECLARE_TEST(householder)
     CALL_SUBTEST_6( householder(MatrixXcf(internal::random<int>(1,EIGEN_TEST_MAX_SIZE),internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
     CALL_SUBTEST_7( householder(MatrixXf(internal::random<int>(1,EIGEN_TEST_MAX_SIZE),internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
     CALL_SUBTEST_8( householder(Matrix<double,1,1>()) );
+
+    CALL_SUBTEST_9( householder_update(Matrix<double, 3, 5>()) );
+    CALL_SUBTEST_9( householder_update(Matrix<float, 4, 2>()) );
+    CALL_SUBTEST_9( householder_update(MatrixXcf(internal::random<Index>(1,EIGEN_TEST_MAX_SIZE), internal::random<Index>(1,EIGEN_TEST_MAX_SIZE))) );
   }
 }
diff --git a/libs/eigen/test/indexed_view.cpp b/libs/eigen/test/indexed_view.cpp
index 72c54af..d149960 100644
--- a/libs/eigen/test/indexed_view.cpp
+++ b/libs/eigen/test/indexed_view.cpp
@@ -7,38 +7,15 @@
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
-#ifdef EIGEN_TEST_PART_2
-// Make sure we also check c++11 max implementation
-#define EIGEN_MAX_CPP_VER 11
-#endif
-
-#ifdef EIGEN_TEST_PART_3
-// Make sure we also check c++98 max implementation
-#define EIGEN_MAX_CPP_VER 03
-
-// We need to disable this warning when compiling with c++11 while limiting Eigen to c++98
-// Ideally we would rather configure the compiler to build in c++98 mode but this needs
-// to be done at the CMakeLists.txt level.
-#if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))
-  #pragma GCC diagnostic ignored "-Wdeprecated"
-#endif
-
-#if defined(__GNUC__) && (__GNUC__ >=9)
-  #pragma GCC diagnostic ignored "-Wdeprecated-copy"
-#endif
-#if defined(__clang__) && (__clang_major__ >= 10)
-  #pragma clang diagnostic ignored "-Wdeprecated-copy"
-#endif
-
-#endif
-
 #include <valarray>
 #include <vector>
 #include "main.h"
 
-#if EIGEN_HAS_CXX11
+using Eigen::placeholders::all;
+using Eigen::placeholders::last;
+using Eigen::placeholders::lastp1;
+using Eigen::placeholders::lastN;
 #include <array>
-#endif
 
 typedef std::pair<Index,Index> IndexPair;
 
@@ -63,7 +40,7 @@ bool match(const T& xpr, std::string ref, std::string str_xpr = "") {
 #define MATCH(X,R) match(X, R, #X)
 
 template<typename T1,typename T2>
-typename internal::enable_if<internal::is_same<T1,T2>::value,bool>::type
+std::enable_if_t<internal::is_same<T1,T2>::value,bool>
 is_same_eq(const T1& a, const T2& b)
 {
   return (a == b).all();
@@ -82,7 +59,7 @@ bool is_same_seq(const T1& a, const T2& b)
 }
 
 template<typename T1,typename T2>
-typename internal::enable_if<internal::is_same<T1,T2>::value,bool>::type
+std::enable_if_t<internal::is_same<T1,T2>::value,bool>
 is_same_seq_type(const T1& a, const T2& b)
 {
   return is_same_seq(a,b);
@@ -102,11 +79,7 @@ void check_indexed_view()
   ArrayXd a = ArrayXd::LinSpaced(n,0,n-1);
   Array<double,1,Dynamic> b = a.transpose();
 
-  #if EIGEN_COMP_CXXVER>=14
   ArrayXXi A = ArrayXXi::NullaryExpr(n,n, std::ref(encode));
-  #else
-  ArrayXXi A = ArrayXXi::NullaryExpr(n,n, std::ptr_fun(&encode));
-  #endif
 
   for(Index i=0; i<n; ++i)
     for(Index j=0; j<n; ++j)
@@ -220,7 +193,6 @@ void check_indexed_view()
   VERIFY( is_same_seq_type( seqN(2,fix<5>(5),fix<-2>), seqN(2,fix<5>,fix<-2>()) ) );
 
   VERIFY( is_same_seq_type( seq(2,fix<5>), seqN(2,4) ) );
-#if EIGEN_HAS_CXX11
   VERIFY( is_same_seq_type( seq(fix<2>,fix<5>), seqN(fix<2>,fix<4>) ) );
   VERIFY( is_same_seq( seqN(2,std::integral_constant<int,5>(),std::integral_constant<int,-2>()), seqN(2,fix<5>,fix<-2>()) ) );
   VERIFY( is_same_seq( seq(std::integral_constant<int,1>(),std::integral_constant<int,5>(),std::integral_constant<int,2>()),
@@ -231,10 +203,6 @@ void check_indexed_view()
 
   VERIFY( is_same_seq_type( seqN(2,std::integral_constant<int,5>()), seqN(2,fix<5>) ) );
   VERIFY( is_same_seq_type( seq(std::integral_constant<int,1>(),std::integral_constant<int,5>()), seq(fix<1>,fix<5>) ) );
-#else
-  // sorry, no compile-time size recovery in c++98/03
-  VERIFY( is_same_seq( seq(fix<2>,fix<5>), seqN(fix<2>,fix<4>) ) );
-#endif
 
   VERIFY( (A(seqN(2,fix<5>), 5)).RowsAtCompileTime == 5);
   VERIFY( (A(4, all)).ColsAtCompileTime == Dynamic);
@@ -310,7 +278,6 @@ void check_indexed_view()
                       A(seq(last-5,last-1,2), seqN(last-3,3,fix<-2>)).reverse() );
   }
 
-#if EIGEN_HAS_CXX11
   // check lastN
   VERIFY_IS_APPROX( a(lastN(3)), a.tail(3) );
   VERIFY( MATCH( a(lastN(3)), "7\n8\n9" ) );
@@ -323,7 +290,6 @@ void check_indexed_view()
 
   VERIFY_IS_APPROX( (A(std::array<int,3>{{1,3,5}}, std::array<int,4>{{9,6,3,0}})), A(seqN(1,3,2), seqN(9,4,-3)) );
 
-#if EIGEN_HAS_STATIC_ARRAY_TEMPLATE
   VERIFY_IS_APPROX( A({3, 1, 6, 5}, all), A(std::array<int,4>{{3, 1, 6, 5}}, all) );
   VERIFY_IS_APPROX( A(all,{3, 1, 6, 5}), A(all,std::array<int,4>{{3, 1, 6, 5}}) );
   VERIFY_IS_APPROX( A({1,3,5},{3, 1, 6, 5}), A(std::array<int,3>{{1,3,5}},std::array<int,4>{{3, 1, 6, 5}}) );
@@ -336,9 +302,6 @@ void check_indexed_view()
 
   VERIFY_IS_APPROX( b({3, 1, 6, 5}), b(std::array<int,4>{{3, 1, 6, 5}}) );
   VERIFY_IS_EQUAL( b({1,3,5}).SizeAtCompileTime, 3 );
-#endif
-
-#endif
 
   // check mat(i,j) with weird types for i and j
   {
@@ -396,13 +359,11 @@ void check_indexed_view()
   a(XX) = 1;
   A(XX,YY) = 1;
   // Anonymous enums only work with C++11
-#if EIGEN_HAS_CXX11
   enum { X=0, Y=1 };
   a(X) = 1;
   A(X,Y) = 1;
   A(XX,Y) = 1;
   A(X,YY) = 1;
-#endif
 
   // Check compilation of varying integer types as index types:
   Index i = n/2;
@@ -442,13 +403,21 @@ void check_indexed_view()
     VERIFY( MATCH( A(all,1)(1), "101"));
   }
 
-#if EIGEN_HAS_CXX11
+  // bug #2375: indexing over matrices of dim >128 should compile on gcc
+  {
+    Matrix<double, 513, 3> large_mat = Matrix<double, 513, 3>::Random();
+    std::array<int, 2> test_indices = {0, 1};
+    Matrix<double, 513, 2> thin_slice = large_mat(all, test_indices);
+    for(int col = 0; col < int(test_indices.size()); ++col)
+      for(int row = 0; row < large_mat.rows(); ++row)
+        VERIFY_IS_EQUAL( thin_slice(row, col), large_mat(row, col) );
+  }
+
   //Bug IndexView with a single static row should be RowMajor:
   {
     // A(1, seq(0,2,1)).cwiseAbs().colwise().replicate(2).eval();
     STATIC_CHECK(( (internal::evaluator<decltype( A(1,seq(0,2,1)) )>::Flags & RowMajorBit) == RowMajorBit ));
   }
-#endif
 
 }
 
@@ -456,8 +425,6 @@ EIGEN_DECLARE_TEST(indexed_view)
 {
 //   for(int i = 0; i < g_repeat; i++) {
     CALL_SUBTEST_1( check_indexed_view() );
-    CALL_SUBTEST_2( check_indexed_view() );
-    CALL_SUBTEST_3( check_indexed_view() );
 //   }
 
   // static checks of some internals:
diff --git a/libs/eigen/test/initializer_list_construction.cpp b/libs/eigen/test/initializer_list_construction.cpp
index 7a9c49e..b576ec2 100644
--- a/libs/eigen/test/initializer_list_construction.cpp
+++ b/libs/eigen/test/initializer_list_construction.cpp
@@ -7,7 +7,12 @@
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
-#define EIGEN_NO_STATIC_ASSERT
+#if defined(__GNUC__) && __GNUC__ >= 10
+// GCC 10+ has a bug for unsigned char that thinks we're writing past the
+// end of an array when compiled with -O3.  This warning is not triggered for
+// any other types, nor for other compilers, nor for other optimization levels.
+#pragma GCC diagnostic ignored "-Wstringop-overflow"
+#endif
 
 #include "main.h"
 
@@ -320,16 +325,6 @@ template<typename Scalar> void dynamicVectorConstruction()
     VERIFY(v.cols() == 1);
     VERIFY_IS_EQUAL(v, (VectorX {{raw[0], raw[1], raw[2], raw[3]}}));
   }
-
-  {
-    VERIFY_RAISES_ASSERT((VectorX {raw[0], raw[1], raw[2], raw[3]}));
-  }
-  {
-    VERIFY_RAISES_ASSERT((VectorX  {
-      {raw[0], raw[1], raw[2], raw[3]},
-      {raw[0], raw[1], raw[2], raw[3]},
-    }));
-  }
 }
 
 EIGEN_DECLARE_TEST(initializer_list_construction)
diff --git a/libs/eigen/test/integer_types.cpp b/libs/eigen/test/integer_types.cpp
index 31f4100..1322527 100644
--- a/libs/eigen/test/integer_types.cpp
+++ b/libs/eigen/test/integer_types.cpp
@@ -7,8 +7,6 @@
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
-#define EIGEN_NO_STATIC_ASSERT
-
 #include "main.h"
 
 #undef VERIFY_IS_APPROX
@@ -162,12 +160,10 @@ EIGEN_DECLARE_TEST(integer_types)
 
     CALL_SUBTEST_6( integer_type_tests(Matrix<unsigned short, 4, 4>()) );
 
-#if EIGEN_HAS_CXX11
     CALL_SUBTEST_7( integer_type_tests(Matrix<long long, 11, 13>()) );
     CALL_SUBTEST_7( signed_integer_type_tests(Matrix<long long, 11, 13>()) );
 
     CALL_SUBTEST_8( integer_type_tests(Matrix<unsigned long long, Dynamic, 5>(1, 5)) );
-#endif
   }
   CALL_SUBTEST_9( integer_types_extra<0>() );
 }
diff --git a/libs/eigen/test/inverse.cpp b/libs/eigen/test/inverse.cpp
index 9cedfa1..2748c38 100644
--- a/libs/eigen/test/inverse.cpp
+++ b/libs/eigen/test/inverse.cpp
@@ -12,12 +12,12 @@
 #include <Eigen/LU>
 
 template<typename MatrixType>
-void inverse_for_fixed_size(const MatrixType&, typename internal::enable_if<MatrixType::SizeAtCompileTime==Dynamic>::type* = 0)
+void inverse_for_fixed_size(const MatrixType&, std::enable_if_t<MatrixType::SizeAtCompileTime==Dynamic>* = 0)
 {
 }
 
 template<typename MatrixType>
-void inverse_for_fixed_size(const MatrixType& m1, typename internal::enable_if<MatrixType::SizeAtCompileTime!=Dynamic>::type* = 0)
+void inverse_for_fixed_size(const MatrixType& m1, std::enable_if_t<MatrixType::SizeAtCompileTime!=Dynamic>* = 0)
 {
   using std::abs;
 
diff --git a/libs/eigen/test/jacobi.cpp b/libs/eigen/test/jacobi.cpp
index 5604797..273b94d 100644
--- a/libs/eigen/test/jacobi.cpp
+++ b/libs/eigen/test/jacobi.cpp
@@ -65,6 +65,11 @@ EIGEN_DECLARE_TEST(jacobi)
     CALL_SUBTEST_3(( jacobi<Matrix4cf, float>() ));
     CALL_SUBTEST_3(( jacobi<Matrix4cf, std::complex<float> >() ));
 
+    CALL_SUBTEST_1(( jacobi<Matrix<float, 3, 3, RowMajor>, float>() ));
+    CALL_SUBTEST_2(( jacobi<Matrix<double, 4, 4, RowMajor>, double>() ));
+    CALL_SUBTEST_3(( jacobi<Matrix<std::complex<float>, 4, 4, RowMajor>, float>() ));
+    CALL_SUBTEST_3(( jacobi<Matrix<std::complex<float>, 4, 4, RowMajor>, std::complex<float> >() ));
+
     int r = internal::random<int>(2, internal::random<int>(1,EIGEN_TEST_MAX_SIZE)/2),
         c = internal::random<int>(2, internal::random<int>(1,EIGEN_TEST_MAX_SIZE)/2);
     CALL_SUBTEST_4(( jacobi<MatrixXf, float>(MatrixXf(r,c)) ));
diff --git a/libs/eigen/test/jacobisvd.cpp b/libs/eigen/test/jacobisvd.cpp
index 5b15c5a..daf24a7 100644
--- a/libs/eigen/test/jacobisvd.cpp
+++ b/libs/eigen/test/jacobisvd.cpp
@@ -8,6 +8,15 @@
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
+// We explicitly disable deprecated declarations for this set of tests
+// because we purposely verify assertions for the deprecated SVD runtime
+// option behavior.
+#if defined(__GNUC__)
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+#elif defined(_MSC_VER)
+#pragma warning( disable : 4996 )
+#endif
+
 // discard stack allocation as that too bypasses malloc
 #define EIGEN_STACK_ALLOCATION_LIMIT 0
 #define EIGEN_RUNTIME_NO_MALLOC
@@ -16,49 +25,9 @@
 
 #define SVD_DEFAULT(M) JacobiSVD<M>
 #define SVD_FOR_MIN_NORM(M) JacobiSVD<M,ColPivHouseholderQRPreconditioner>
+#define SVD_STATIC_OPTIONS(M, O) JacobiSVD<M, O>
 #include "svd_common.h"
 
-// Check all variants of JacobiSVD
-template<typename MatrixType>
-void jacobisvd(const MatrixType& a = MatrixType(), bool pickrandom = true)
-{
-  MatrixType m = a;
-  if(pickrandom)
-    svd_fill_random(m);
-
-  CALL_SUBTEST(( svd_test_all_computation_options<JacobiSVD<MatrixType, FullPivHouseholderQRPreconditioner> >(m, true)  )); // check full only
-  CALL_SUBTEST(( svd_test_all_computation_options<JacobiSVD<MatrixType, ColPivHouseholderQRPreconditioner>  >(m, false) ));
-  CALL_SUBTEST(( svd_test_all_computation_options<JacobiSVD<MatrixType, HouseholderQRPreconditioner>        >(m, false) ));
-  if(m.rows()==m.cols())
-    CALL_SUBTEST(( svd_test_all_computation_options<JacobiSVD<MatrixType, NoQRPreconditioner>               >(m, false) ));
-}
-
-template<typename MatrixType> void jacobisvd_verify_assert(const MatrixType& m)
-{
-  svd_verify_assert<JacobiSVD<MatrixType> >(m);
-  svd_verify_assert<JacobiSVD<MatrixType, FullPivHouseholderQRPreconditioner> >(m, true);
-  svd_verify_assert<JacobiSVD<MatrixType, ColPivHouseholderQRPreconditioner> >(m);
-  svd_verify_assert<JacobiSVD<MatrixType, HouseholderQRPreconditioner> >(m);
-  Index rows = m.rows();
-  Index cols = m.cols();
-
-  enum {
-    ColsAtCompileTime = MatrixType::ColsAtCompileTime
-  };
-
-
-  MatrixType a = MatrixType::Zero(rows, cols);
-  a.setZero();
-
-  if (ColsAtCompileTime == Dynamic)
-  {
-    JacobiSVD<MatrixType, FullPivHouseholderQRPreconditioner> svd_fullqr;
-    VERIFY_RAISES_ASSERT(svd_fullqr.compute(a, ComputeFullU|ComputeThinV))
-    VERIFY_RAISES_ASSERT(svd_fullqr.compute(a, ComputeThinU|ComputeThinV))
-    VERIFY_RAISES_ASSERT(svd_fullqr.compute(a, ComputeThinU|ComputeFullV))
-  }
-}
-
 template<typename MatrixType>
 void jacobisvd_method()
 {
@@ -69,11 +38,62 @@ void jacobisvd_method()
   VERIFY_IS_APPROX(m.jacobiSvd().singularValues(), RealVecType::Ones());
   VERIFY_RAISES_ASSERT(m.jacobiSvd().matrixU());
   VERIFY_RAISES_ASSERT(m.jacobiSvd().matrixV());
+  VERIFY_IS_APPROX(m.template jacobiSvd<ComputeFullU | ComputeFullV>().solve(m), m);
+  VERIFY_IS_APPROX(m.template jacobiSvd<ComputeFullU | ComputeFullV>().transpose().solve(m), m);
+  VERIFY_IS_APPROX(m.template jacobiSvd<ComputeFullU | ComputeFullV>().adjoint().solve(m), m);
+  
+  // Deprecated behavior.
   VERIFY_IS_APPROX(m.jacobiSvd(ComputeFullU|ComputeFullV).solve(m), m);
   VERIFY_IS_APPROX(m.jacobiSvd(ComputeFullU|ComputeFullV).transpose().solve(m), m);
   VERIFY_IS_APPROX(m.jacobiSvd(ComputeFullU|ComputeFullV).adjoint().solve(m), m);
 }
 
+template <typename MatrixType>
+void jacobisvd_all_options(const MatrixType& input = MatrixType()) {
+  MatrixType m(input.rows(), input.cols());
+  svd_fill_random(m);
+  svd_option_checks<MatrixType, 0>(m);
+  svd_option_checks<MatrixType, ColPivHouseholderQRPreconditioner>(m);
+  svd_option_checks<MatrixType, HouseholderQRPreconditioner>(m);
+  svd_option_checks_full_only<MatrixType, FullPivHouseholderQRPreconditioner>(
+      m);  // FullPiv only used when computing full unitaries
+}
+
+template <typename MatrixType>
+void jacobisvd_verify_assert(const MatrixType& m = MatrixType()) {
+  svd_verify_assert<MatrixType, 0>(m);
+  svd_verify_assert<MatrixType, ColPivHouseholderQRPreconditioner>(m);
+  svd_verify_assert<MatrixType, HouseholderQRPreconditioner>(m);
+  svd_verify_assert_full_only<MatrixType, FullPivHouseholderQRPreconditioner>(m);
+
+  svd_verify_constructor_options_assert<JacobiSVD<MatrixType>>(m);
+  svd_verify_constructor_options_assert<JacobiSVD<MatrixType, ColPivHouseholderQRPreconditioner>>(m);
+  svd_verify_constructor_options_assert<JacobiSVD<MatrixType, HouseholderQRPreconditioner>>(m);
+  svd_verify_constructor_options_assert<JacobiSVD<MatrixType, FullPivHouseholderQRPreconditioner>>(m, true);
+}
+
+template <typename MatrixType>
+void jacobisvd_verify_inputs(const MatrixType& m = MatrixType()) {
+  // check defaults
+  typedef JacobiSVD<MatrixType> DefaultSVD;
+  DefaultSVD defaultSvd(m);
+  VERIFY((int)DefaultSVD::QRPreconditioner == (int)ColPivHouseholderQRPreconditioner);
+  VERIFY(!defaultSvd.computeU());
+  VERIFY(!defaultSvd.computeV());
+
+  // ColPivHouseholderQR is always default in presence of other options.
+  VERIFY(((int)JacobiSVD<MatrixType, ComputeThinU>::QRPreconditioner == (int)ColPivHouseholderQRPreconditioner));
+  VERIFY(((int)JacobiSVD<MatrixType, ComputeThinV>::QRPreconditioner == (int)ColPivHouseholderQRPreconditioner));
+  VERIFY(((int)JacobiSVD<MatrixType, ComputeThinU | ComputeThinV>::QRPreconditioner ==
+          (int)ColPivHouseholderQRPreconditioner));
+  VERIFY(((int)JacobiSVD<MatrixType, ComputeFullU | ComputeFullV>::QRPreconditioner ==
+          (int)ColPivHouseholderQRPreconditioner));
+  VERIFY(((int)JacobiSVD<MatrixType, ComputeThinU | ComputeFullV>::QRPreconditioner ==
+          (int)ColPivHouseholderQRPreconditioner));
+  VERIFY(((int)JacobiSVD<MatrixType, ComputeFullU | ComputeThinV>::QRPreconditioner ==
+          (int)ColPivHouseholderQRPreconditioner));
+}
+
 namespace Foo {
 // older compiler require a default constructor for Bar
 // cf: https://stackoverflow.com/questions/7411515/
@@ -86,62 +106,91 @@ void msvc_workaround()
 {
   const Foo::Bar a;
   const Foo::Bar b;
-  std::max EIGEN_NOT_A_MACRO (a,b);
+  const Foo::Bar c = std::max EIGEN_NOT_A_MACRO (a,b);
+  EIGEN_UNUSED_VARIABLE(c)
 }
 
 EIGEN_DECLARE_TEST(jacobisvd)
 {
-  CALL_SUBTEST_3(( jacobisvd_verify_assert(Matrix3f()) ));
-  CALL_SUBTEST_4(( jacobisvd_verify_assert(Matrix4d()) ));
-  CALL_SUBTEST_7(( jacobisvd_verify_assert(MatrixXf(10,12)) ));
-  CALL_SUBTEST_8(( jacobisvd_verify_assert(MatrixXcd(7,5)) ));
-  
-  CALL_SUBTEST_11(svd_all_trivial_2x2(jacobisvd<Matrix2cd>));
-  CALL_SUBTEST_12(svd_all_trivial_2x2(jacobisvd<Matrix2d>));
+  CALL_SUBTEST_1((jacobisvd_verify_inputs<Matrix4d>()));
+  CALL_SUBTEST_1((jacobisvd_verify_inputs(Matrix<float, 5, Dynamic>(5, 6))));
+  CALL_SUBTEST_1((jacobisvd_verify_inputs<Matrix<std::complex<double>, 7, 5>>()));
 
-  for(int i = 0; i < g_repeat; i++) {
-    CALL_SUBTEST_3(( jacobisvd<Matrix3f>() ));
-    CALL_SUBTEST_4(( jacobisvd<Matrix4d>() ));
-    CALL_SUBTEST_5(( jacobisvd<Matrix<float,3,5> >() ));
-    CALL_SUBTEST_6(( jacobisvd<Matrix<double,Dynamic,2> >(Matrix<double,Dynamic,2>(10,2)) ));
+  CALL_SUBTEST_2((jacobisvd_verify_assert<Matrix3f>()));
+  CALL_SUBTEST_2((jacobisvd_verify_assert<Matrix4d>()));
+  CALL_SUBTEST_2((jacobisvd_verify_assert<Matrix<float, 10, 12>>()));
+  CALL_SUBTEST_2((jacobisvd_verify_assert<Matrix<float, 12, 10>>()));
+  CALL_SUBTEST_2((jacobisvd_verify_assert<MatrixXf>(MatrixXf(10, 12))));
+  CALL_SUBTEST_2((jacobisvd_verify_assert<MatrixXcd>(MatrixXcd(7, 5))));
 
+  CALL_SUBTEST_3(svd_all_trivial_2x2(jacobisvd_all_options<Matrix2cd>));
+  CALL_SUBTEST_4(svd_all_trivial_2x2(jacobisvd_all_options<Matrix2d>));
+
+  for (int i = 0; i < g_repeat; i++) {
     int r = internal::random<int>(1, 30),
         c = internal::random<int>(1, 30);
     
     TEST_SET_BUT_UNUSED_VARIABLE(r)
     TEST_SET_BUT_UNUSED_VARIABLE(c)
     
-    CALL_SUBTEST_10(( jacobisvd<MatrixXd>(MatrixXd(r,c)) ));
-    CALL_SUBTEST_7(( jacobisvd<MatrixXf>(MatrixXf(r,c)) ));
-    CALL_SUBTEST_8(( jacobisvd<MatrixXcd>(MatrixXcd(r,c)) ));
-    (void) r;
-    (void) c;
+    CALL_SUBTEST_5((jacobisvd_all_options<Matrix3f>()));
+    CALL_SUBTEST_6((jacobisvd_all_options<Matrix4d>()));
+    CALL_SUBTEST_7((jacobisvd_all_options<Matrix<float, 2, 3>>()));
+    CALL_SUBTEST_8((jacobisvd_all_options<Matrix<double, 4, 7>>()));
+    CALL_SUBTEST_9((jacobisvd_all_options<Matrix<double, 7, 4>>()));
+    CALL_SUBTEST_10((jacobisvd_all_options<Matrix<double, Dynamic, 5>>(Matrix<double, Dynamic, 5>(r, 5))));
+    CALL_SUBTEST_11((jacobisvd_all_options<Matrix<double, 5, Dynamic>>(Matrix<double, 5, Dynamic>(5, c))));
+    CALL_SUBTEST_12((jacobisvd_all_options<MatrixXf>(MatrixXf(r, c))));
+    CALL_SUBTEST_13((jacobisvd_all_options<MatrixXcd>(MatrixXcd(r, c))));
+    CALL_SUBTEST_14((jacobisvd_all_options<MatrixXd>(MatrixXd(r, c))));
+    CALL_SUBTEST_15((jacobisvd_all_options<Matrix<double, 5, 7, RowMajor>>()));
+    CALL_SUBTEST_16((jacobisvd_all_options<Matrix<double, 7, 5, RowMajor>>()));
+
+    MatrixXcd noQRTest = MatrixXcd(r, r);
+    svd_fill_random(noQRTest);
+    CALL_SUBTEST_17((svd_option_checks<MatrixXcd, NoQRPreconditioner>(noQRTest)));
+
+    CALL_SUBTEST_18((
+        svd_check_max_size_matrix<Matrix<float, Dynamic, Dynamic, ColMajor, 13, 15>, ColPivHouseholderQRPreconditioner>(
+            r, c)));
+    CALL_SUBTEST_18(
+        (svd_check_max_size_matrix<Matrix<float, Dynamic, Dynamic, ColMajor, 15, 13>, HouseholderQRPreconditioner>(r,
+                                                                                                                   c)));
+    CALL_SUBTEST_18((
+        svd_check_max_size_matrix<Matrix<float, Dynamic, Dynamic, RowMajor, 13, 15>, ColPivHouseholderQRPreconditioner>(
+            r, c)));
+    CALL_SUBTEST_18(
+        (svd_check_max_size_matrix<Matrix<float, Dynamic, Dynamic, RowMajor, 15, 13>, HouseholderQRPreconditioner>(r,
+                                                                                                                   c)));
 
     // Test on inf/nan matrix
-    CALL_SUBTEST_7(  (svd_inf_nan<JacobiSVD<MatrixXf>, MatrixXf>()) );
-    CALL_SUBTEST_10( (svd_inf_nan<JacobiSVD<MatrixXd>, MatrixXd>()) );
+    CALL_SUBTEST_19((svd_inf_nan<MatrixXf>()));
+    CALL_SUBTEST_19((svd_inf_nan<MatrixXd>()));
 
-    // bug1395 test compile-time vectors as input
-    CALL_SUBTEST_13(( jacobisvd_verify_assert(Matrix<double,6,1>()) ));
-    CALL_SUBTEST_13(( jacobisvd_verify_assert(Matrix<double,1,6>()) ));
-    CALL_SUBTEST_13(( jacobisvd_verify_assert(Matrix<double,Dynamic,1>(r)) ));
-    CALL_SUBTEST_13(( jacobisvd_verify_assert(Matrix<double,1,Dynamic>(c)) ));
+    CALL_SUBTEST_20((jacobisvd_verify_assert<Matrix<double, 6, 1>>()));
+    CALL_SUBTEST_20((jacobisvd_verify_assert<Matrix<double, 1, 6>>()));
+    CALL_SUBTEST_20((jacobisvd_verify_assert<Matrix<double, Dynamic, 1>>(Matrix<double, Dynamic, 1>(r))));
+    CALL_SUBTEST_20((jacobisvd_verify_assert<Matrix<double, 1, Dynamic>>(Matrix<double, 1, Dynamic>(c))));
   }
 
-  CALL_SUBTEST_7(( jacobisvd<MatrixXf>(MatrixXf(internal::random<int>(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE/2), internal::random<int>(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE/2))) ));
-  CALL_SUBTEST_8(( jacobisvd<MatrixXcd>(MatrixXcd(internal::random<int>(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE/3), internal::random<int>(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE/3))) ));
+  CALL_SUBTEST_21((jacobisvd_all_options<MatrixXd>(
+      MatrixXd(internal::random<int>(EIGEN_TEST_MAX_SIZE / 4, EIGEN_TEST_MAX_SIZE / 2),
+               internal::random<int>(EIGEN_TEST_MAX_SIZE / 4, EIGEN_TEST_MAX_SIZE / 2)))));
+  CALL_SUBTEST_22((jacobisvd_all_options<MatrixXcd>(
+      MatrixXcd(internal::random<int>(EIGEN_TEST_MAX_SIZE / 4, EIGEN_TEST_MAX_SIZE / 3),
+                internal::random<int>(EIGEN_TEST_MAX_SIZE / 4, EIGEN_TEST_MAX_SIZE / 3)))));
 
   // test matrixbase method
-  CALL_SUBTEST_1(( jacobisvd_method<Matrix2cd>() ));
-  CALL_SUBTEST_3(( jacobisvd_method<Matrix3f>() ));
+  CALL_SUBTEST_23(( jacobisvd_method<Matrix2cd>() ));
+  CALL_SUBTEST_23(( jacobisvd_method<Matrix3f>() ));
 
   // Test problem size constructors
-  CALL_SUBTEST_7( JacobiSVD<MatrixXf>(10,10) );
+  CALL_SUBTEST_24( JacobiSVD<MatrixXf>(10,10) );
 
   // Check that preallocation avoids subsequent mallocs
-  CALL_SUBTEST_9( svd_preallocate<void>() );
+  CALL_SUBTEST_25( svd_preallocate<void>() );
 
-  CALL_SUBTEST_2( svd_underoverflow<void>() );
+  CALL_SUBTEST_26( svd_underoverflow<void>() );
 
   msvc_workaround();
 }
diff --git a/libs/eigen/test/main.h b/libs/eigen/test/main.h
index 07f3794..a52da9e 100644
--- a/libs/eigen/test/main.h
+++ b/libs/eigen/test/main.h
@@ -1,4 +1,3 @@
-
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
@@ -23,7 +22,7 @@
 // The following includes of STL headers have to be done _before_ the
 // definition of macros min() and max().  The reason is that many STL
 // implementations will not work properly as the min and max symbols collide
-// with the STL functions std:min() and std::max().  The STL headers may check
+// with the STL functions std::min() and std::max().  The STL headers may check
 // for the macro definition of min/max and issue a warning or undefine the
 // macros.
 //
@@ -54,27 +53,41 @@
 #include <future>
 #endif
 #endif
+#if __cplusplus > 201703L
+// libstdc++ 9's <memory> indirectly uses max() via <bit>.
+// libstdc++ 10's <memory> indirectly uses max() via ranges headers.
+#include <memory>
+// libstdc++ 11's <thread> indirectly uses max() via semaphore headers.
+#include <thread>
+#endif
 
-// Same for cuda_fp16.h
-#if defined(__CUDACC__) && !defined(EIGEN_NO_CUDA)
-  // Means the compiler is either nvcc or clang with CUDA enabled
+// Configure GPU.
+#if defined(EIGEN_USE_HIP)
+  #if defined(__HIPCC__) && !defined(EIGEN_NO_HIP)
+    #define EIGEN_HIPCC __HIPCC__
+    #include <hip/hip_runtime.h>
+    #include <hip/hip_runtime_api.h>
+  #endif
+#elif defined(__CUDACC__) && !defined(EIGEN_NO_CUDA)
   #define EIGEN_CUDACC __CUDACC__
+  #include <cuda.h>
+  #include <cuda_runtime.h>
+  #include <cuda_runtime_api.h>
+  #if CUDA_VERSION >= 7050
+    #include <cuda_fp16.h>
+  #endif
 #endif
-#if defined(EIGEN_CUDACC)
-#include <cuda.h>
-  #define EIGEN_CUDA_SDK_VER (CUDA_VERSION * 10)
-#else
-  #define EIGEN_CUDA_SDK_VER 0
-#endif
-#if EIGEN_CUDA_SDK_VER >= 70500
-#include <cuda_fp16.h>
+
+#if defined(EIGEN_CUDACC) || defined(EIGEN_HIPCC)
+  #define EIGEN_TEST_NO_LONGDOUBLE
+  #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
 #endif
 
 // To test that all calls from Eigen code to std::min() and std::max() are
 // protected by parenthesis against macro expansion, the min()/max() macros
 // are defined here and any not-parenthesized min/max call will cause a
 // compiler error.
-#if !defined(__HIPCC__) && !defined(EIGEN_USE_SYCL)
+#if !defined(__HIPCC__) && !defined(EIGEN_USE_SYCL) && !defined(EIGEN_POCKETFFT_DEFAULT)
   //
   // HIP header files include the following files
   //  <thread>
@@ -289,9 +302,8 @@ namespace Eigen
     #endif //EIGEN_EXCEPTIONS
 
   #elif !defined(__CUDACC__) && !defined(__HIPCC__) && !defined(SYCL_DEVICE_ONLY) // EIGEN_DEBUG_ASSERTS
-    // see bug 89. The copy_bool here is working around a bug in gcc <= 4.3
     #define eigen_assert(a) \
-      if( (!Eigen::internal::copy_bool(a)) && (!no_more_assert) )\
+      if( (!(a)) && (!no_more_assert) )       \
       {                                       \
         Eigen::no_more_assert = true;         \
         if(report_on_cerr_on_assert_failure)  \
@@ -314,36 +326,10 @@ namespace Eigen
     #endif // EIGEN_EXCEPTIONS
   #endif // EIGEN_DEBUG_ASSERTS
 
-  #if defined(TEST_CHECK_STATIC_ASSERTIONS) && defined(EIGEN_EXCEPTIONS)
-    #define EIGEN_STATIC_ASSERT(a,MSG) \
-      if( (!Eigen::internal::copy_bool(a)) && (!no_more_assert) )\
-      {                                       \
-        Eigen::no_more_assert = true;         \
-        if(report_on_cerr_on_assert_failure)  \
-          eigen_plain_assert((a) && #MSG);      \
-        else                                  \
-          EIGEN_THROW_X(Eigen::eigen_static_assert_exception()); \
-      }
-    #define VERIFY_RAISES_STATIC_ASSERT(a) {                    \
-      Eigen::no_more_assert = false;                            \
-      Eigen::report_on_cerr_on_assert_failure = false;          \
-      try {                                                     \
-        a;                                                      \
-        VERIFY(Eigen::should_raise_an_assert && # a);           \
-      }                                                         \
-      catch (Eigen::eigen_static_assert_exception&) { VERIFY(true); }  \
-      Eigen::report_on_cerr_on_assert_failure = true;           \
-    }
-  #endif // TEST_CHECK_STATIC_ASSERTIONS
-
 #ifndef VERIFY_RAISES_ASSERT
   #define VERIFY_RAISES_ASSERT(a) \
     std::cout << "Can't VERIFY_RAISES_ASSERT( " #a " ) with exceptions disabled\n";
 #endif
-#ifndef VERIFY_RAISES_STATIC_ASSERT
-  #define VERIFY_RAISES_STATIC_ASSERT(a) \
-    std::cout << "Can't VERIFY_RAISES_STATIC_ASSERT( " #a " ) with exceptions disabled\n";
-#endif
 
   #if !defined(__CUDACC__) && !defined(__HIPCC__) && !defined(SYCL_DEVICE_ONLY)
   #define EIGEN_USE_CUSTOM_ASSERT
@@ -352,12 +338,11 @@ namespace Eigen
 #else // EIGEN_NO_ASSERTION_CHECKING
 
   #define VERIFY_RAISES_ASSERT(a) {}
-  #define VERIFY_RAISES_STATIC_ASSERT(a) {}
 
 #endif // EIGEN_NO_ASSERTION_CHECKING
 
 #define EIGEN_INTERNAL_DEBUGGING
-#include <Eigen/QR> // required for createRandomPIMatrixOfRank
+#include <Eigen/QR> // required for createRandomPIMatrixOfRank and generateRandomMatrixSvs
 
 inline void verify_impl(bool condition, const char *testname, const char *file, int line, const char *condition_as_string)
 {
@@ -391,6 +376,8 @@ inline void verify_impl(bool condition, const char *testname, const char *file,
 #define VERIFY_IS_NOT_MUCH_SMALLER_THAN(a, b) VERIFY(!test_isMuchSmallerThan(a, b))
 #define VERIFY_IS_APPROX_OR_LESS_THAN(a, b) VERIFY(test_isApproxOrLessThan(a, b))
 #define VERIFY_IS_NOT_APPROX_OR_LESS_THAN(a, b) VERIFY(!test_isApproxOrLessThan(a, b))
+#define VERIFY_IS_CWISE_EQUAL(a, b) VERIFY(verifyIsCwiseApprox(a, b, true))
+#define VERIFY_IS_CWISE_APPROX(a, b) VERIFY(verifyIsCwiseApprox(a, b, false))
 
 #define VERIFY_IS_UNITARY(a) VERIFY(test_isUnitary(a))
 
@@ -403,10 +390,25 @@ inline void verify_impl(bool condition, const char *testname, const char *file,
   } while (0)
 
 
+// Forward declarations to avoid ICC warnings
+#if EIGEN_COMP_ICC
+
+template<typename T> std::string type_name();
+
+namespace Eigen {
+
+template<typename T, typename U>
+bool test_is_equal(const T& actual, const U& expected, bool expect_equal=true);
+
+} // end namespace Eigen
+
+#endif  // EIGEN_COMP_ICC
+
+
 namespace Eigen {
 
 template<typename T1,typename T2>
-typename internal::enable_if<internal::is_same<T1,T2>::value,bool>::type
+std::enable_if_t<internal::is_same<T1,T2>::value,bool>
 is_same_type(const T1&, const T2&)
 {
   return true;
@@ -422,7 +424,13 @@ template<> inline long double test_precision<std::complex<long double> >() { ret
 
 #define EIGEN_TEST_SCALAR_TEST_OVERLOAD(TYPE)                             \
   inline bool test_isApprox(TYPE a, TYPE b)                               \
-  { return internal::isApprox(a, b, test_precision<TYPE>()); }            \
+  { return numext::equal_strict(a, b) ||                                  \
+      ((numext::isnan)(a) && (numext::isnan)(b)) ||                       \
+      (internal::isApprox(a, b, test_precision<TYPE>())); }               \
+  inline bool test_isCwiseApprox(TYPE a, TYPE b, bool exact)              \
+  { return numext::equal_strict(a, b) ||                                  \
+      ((numext::isnan)(a) && (numext::isnan)(b)) ||                       \
+      (!exact && internal::isApprox(a, b, test_precision<TYPE>())); }     \
   inline bool test_isMuchSmallerThan(TYPE a, TYPE b)                      \
   { return internal::isMuchSmallerThan(a, b, test_precision<TYPE>()); }   \
   inline bool test_isApproxOrLessThan(TYPE a, TYPE b)                     \
@@ -434,10 +442,8 @@ EIGEN_TEST_SCALAR_TEST_OVERLOAD(int)
 EIGEN_TEST_SCALAR_TEST_OVERLOAD(unsigned int)
 EIGEN_TEST_SCALAR_TEST_OVERLOAD(long)
 EIGEN_TEST_SCALAR_TEST_OVERLOAD(unsigned long)
-#if EIGEN_HAS_CXX11
 EIGEN_TEST_SCALAR_TEST_OVERLOAD(long long)
 EIGEN_TEST_SCALAR_TEST_OVERLOAD(unsigned long long)
-#endif
 EIGEN_TEST_SCALAR_TEST_OVERLOAD(float)
 EIGEN_TEST_SCALAR_TEST_OVERLOAD(double)
 EIGEN_TEST_SCALAR_TEST_OVERLOAD(half)
@@ -543,7 +549,7 @@ typename T1::RealScalar test_relative_error(const SparseMatrixBase<T1> &a, const
 }
 
 template<typename T1,typename T2>
-typename NumTraits<typename NumTraits<T1>::Real>::NonInteger test_relative_error(const T1 &a, const T2 &b, typename internal::enable_if<internal::is_arithmetic<typename NumTraits<T1>::Real>::value, T1>::type* = 0)
+typename NumTraits<typename NumTraits<T1>::Real>::NonInteger test_relative_error(const T1 &a, const T2 &b, std::enable_if_t<internal::is_arithmetic<typename NumTraits<T1>::Real>::value, T1>* = 0)
 {
   typedef typename NumTraits<typename NumTraits<T1>::Real>::NonInteger RealScalar;
   return numext::sqrt(RealScalar(numext::abs2(a-b))/(numext::mini)(RealScalar(numext::abs2(a)),RealScalar(numext::abs2(b))));
@@ -575,7 +581,7 @@ typename NumTraits<typename T::Scalar>::Real get_test_precision(const T&, const
 }
 
 template<typename T>
-typename NumTraits<T>::Real get_test_precision(const T&,typename internal::enable_if<internal::is_arithmetic<typename NumTraits<T>::Real>::value, T>::type* = 0)
+typename NumTraits<T>::Real get_test_precision(const T&,std::enable_if_t<internal::is_arithmetic<typename NumTraits<T>::Real>::value, T>* = 0)
 {
   return test_precision<typename NumTraits<T>::Real>();
 }
@@ -592,6 +598,22 @@ inline bool verifyIsApprox(const Type1& a, const Type2& b)
   return ret;
 }
 
+// verifyIsCwiseApprox is a wrapper to test_isCwiseApprox that outputs the relative difference magnitude if the test fails.
+template<typename Type1, typename Type2>
+inline bool verifyIsCwiseApprox(const Type1& a, const Type2& b, bool exact)
+{
+  bool ret = test_isCwiseApprox(a,b,exact);
+  if(!ret) {
+    if (exact) {
+      std::cerr << "Values are not an exact match";
+    } else {
+      std::cerr << "Difference too large wrt tolerance " << get_test_precision(a);
+    }
+    std::cerr << ", relative error is: " << test_relative_error(a,b) << std::endl;
+  }
+  return ret;
+}
+
 // The idea behind this function is to compare the two scalars a and b where
 // the scalar ref is a hint about the expected order of magnitude of a and b.
 // WARNING: the scalar a and b must be positive
@@ -625,14 +647,39 @@ inline bool test_isUnitary(const MatrixBase<Derived>& m)
   return m.isUnitary(test_precision<typename internal::traits<Derived>::Scalar>());
 }
 
-// Forward declaration to avoid ICC warning
-template<typename T, typename U>
-bool test_is_equal(const T& actual, const U& expected, bool expect_equal=true);
+// Checks component-wise, works with infs and nans.
+template<typename Derived1, typename Derived2>
+bool test_isCwiseApprox(const DenseBase<Derived1>& m1,
+                        const DenseBase<Derived2>& m2,
+                        bool exact) {
+  if (m1.rows() != m2.rows()) {
+    return false;
+  }
+  if (m1.cols() != m2.cols()) {
+    return false;
+  }
+  for (Index r = 0; r < m1.rows(); ++r) {
+    for (Index c = 0; c < m1.cols(); ++c) {
+      if (m1(r, c) != m2(r, c)
+          && !((numext::isnan)(m1(r, c)) && (numext::isnan)(m2(r, c))) 
+          && (exact || !test_isApprox(m1(r, c), m2(r, c)))) {
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+template <typename Derived1, typename Derived2>
+bool test_isCwiseApprox(const SparseMatrixBase<Derived1>& m1,
+                        const SparseMatrixBase<Derived2>& m2, bool exact) {
+  return test_isCwiseApprox(m1.toDense(), m2.toDense(), exact);
+}
 
 template<typename T, typename U>
 bool test_is_equal(const T& actual, const U& expected, bool expect_equal)
 {
-    if ((actual==expected) == expect_equal)
+    if (numext::equal_strict(actual, expected) == expect_equal)
         return true;
     // false:
     std::cerr
@@ -641,80 +688,39 @@ bool test_is_equal(const T& actual, const U& expected, bool expect_equal)
     return false;
 }
 
-/** Creates a random Partial Isometry matrix of given rank.
-  *
-  * A partial isometry is a matrix all of whose singular values are either 0 or 1.
-  * This is very useful to test rank-revealing algorithms.
-  */
-// Forward declaration to avoid ICC warning
-template<typename MatrixType>
-void createRandomPIMatrixOfRank(Index desired_rank, Index rows, Index cols, MatrixType& m);
-template<typename MatrixType>
-void createRandomPIMatrixOfRank(Index desired_rank, Index rows, Index cols, MatrixType& m)
-{
-  typedef typename internal::traits<MatrixType>::Scalar Scalar;
-  enum { Rows = MatrixType::RowsAtCompileTime, Cols = MatrixType::ColsAtCompileTime };
 
-  typedef Matrix<Scalar, Dynamic, 1> VectorType;
-  typedef Matrix<Scalar, Rows, Rows> MatrixAType;
-  typedef Matrix<Scalar, Cols, Cols> MatrixBType;
-
-  if(desired_rank == 0)
-  {
-    m.setZero(rows,cols);
-    return;
-  }
-
-  if(desired_rank == 1)
-  {
-    // here we normalize the vectors to get a partial isometry
-    m = VectorType::Random(rows).normalized() * VectorType::Random(cols).normalized().transpose();
-    return;
-  }
-
-  MatrixAType a = MatrixAType::Random(rows,rows);
-  MatrixType d = MatrixType::Identity(rows,cols);
-  MatrixBType  b = MatrixBType::Random(cols,cols);
-
-  // set the diagonal such that only desired_rank non-zero entries reamain
-  const Index diag_size = (std::min)(d.rows(),d.cols());
-  if(diag_size != desired_rank)
-    d.diagonal().segment(desired_rank, diag_size-desired_rank) = VectorType::Zero(diag_size-desired_rank);
-
-  HouseholderQR<MatrixAType> qra(a);
-  HouseholderQR<MatrixBType> qrb(b);
-  m = qra.householderQ() * d * qrb.householderQ();
-}
-
-// Forward declaration to avoid ICC warning
-template<typename PermutationVectorType>
-void randomPermutationVector(PermutationVectorType& v, Index size);
-template<typename PermutationVectorType>
-void randomPermutationVector(PermutationVectorType& v, Index size)
-{
-  typedef typename PermutationVectorType::Scalar Scalar;
-  v.resize(size);
-  for(Index i = 0; i < size; ++i) v(i) = Scalar(i);
-  if(size == 1) return;
-  for(Index n = 0; n < 3 * size; ++n)
-  {
-    Index i = internal::random<Index>(0, size-1);
-    Index j;
-    do j = internal::random<Index>(0, size-1); while(j==i);
-    std::swap(v(i), v(j));
-  }
-}
 
+/**
+ * Check if number is "not a number" (NaN).
+ *
+ * @tparam T input type
+ * @param x input value
+ * @return true, if input value is "not a number" (NaN)
+ */
 template<typename T> bool isNotNaN(const T& x)
 {
   return x==x;
 }
 
+/**
+ * Check if number is plus infinity.
+ *
+ * @tparam T input type
+ * @param x input value
+ * @return true, if input value is plus infinity
+ */
 template<typename T> bool isPlusInf(const T& x)
 {
   return x > NumTraits<T>::highest();
 }
 
+/**
+ * Check if number is minus infinity.
+ *
+ * @tparam T input type
+ * @param x input value
+ * @return true, if input value is minus infinity
+ */
 template<typename T> bool isMinusInf(const T& x)
 {
   return x < NumTraits<T>::lowest();
@@ -722,6 +728,10 @@ template<typename T> bool isMinusInf(const T& x)
 
 } // end namespace Eigen
 
+
+#include "random_matrix_helper.h"
+
+
 template<typename T> struct GetDifferentType;
 
 template<> struct GetDifferentType<float> { typedef double type; };
@@ -729,8 +739,6 @@ template<> struct GetDifferentType<double> { typedef float type; };
 template<typename T> struct GetDifferentType<std::complex<T> >
 { typedef std::complex<typename GetDifferentType<T>::type> type; };
 
-// Forward declaration to avoid ICC warning
-template<typename T> std::string type_name();
 template<typename T> std::string type_name()                    { return "other"; }
 template<> std::string type_name<float>()                       { return "float"; }
 template<> std::string type_name<double>()                      { return "double"; }
@@ -743,6 +751,11 @@ template<> std::string type_name<std::complex<int> >()          { return "comple
 
 using namespace Eigen;
 
+/**
+ * Set number of repetitions for unit test from input string.
+ *
+ * @param str input string
+ */
 inline void set_repeat_from_string(const char *str)
 {
   errno = 0;
@@ -755,6 +768,11 @@ inline void set_repeat_from_string(const char *str)
   g_has_set_repeat = true;
 }
 
+/**
+ * Set seed for randomized unit tests from input string.
+ *
+ * @param str input string
+ */
 inline void set_seed_from_string(const char *str)
 {
   errno = 0;
@@ -855,3 +873,5 @@ int main(int argc, char *argv[])
   // 4503 - decorated name length exceeded, name was truncated
   #pragma warning( disable : 4503)
 #endif
+
+#include "gpu_test_helper.h"
diff --git a/libs/eigen/test/mapped_matrix.cpp b/libs/eigen/test/mapped_matrix.cpp
index 0ea136a..1dd6959 100644
--- a/libs/eigen/test/mapped_matrix.cpp
+++ b/libs/eigen/test/mapped_matrix.cpp
@@ -7,10 +7,6 @@
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
-#ifndef EIGEN_NO_STATIC_ASSERT
-#define EIGEN_NO_STATIC_ASSERT // turn static asserts into runtime asserts in order to check them
-#endif
-
 #include "main.h"
 
 #define EIGEN_TESTMAP_MAX_SIZE 256
@@ -24,7 +20,9 @@ template<typename VectorType> void map_class_vector(const VectorType& m)
   Scalar* array1 = internal::aligned_new<Scalar>(size);
   Scalar* array2 = internal::aligned_new<Scalar>(size);
   Scalar* array3 = new Scalar[size+1];
-  Scalar* array3unaligned = (internal::UIntPtr(array3)%EIGEN_MAX_ALIGN_BYTES) == 0 ? array3+1 : array3;
+  // In case of no alignment, avoid division by zero.
+  constexpr int alignment = (std::max<int>)(EIGEN_MAX_ALIGN_BYTES, 1);
+  Scalar* array3unaligned = (internal::UIntPtr(array3)%alignment) == 0 ? array3+1 : array3;
   Scalar  array4[EIGEN_TESTMAP_MAX_SIZE];
 
   Map<VectorType, AlignedMax>(array1, size) = VectorType::Random(size);
@@ -64,7 +62,9 @@ template<typename MatrixType> void map_class_matrix(const MatrixType& m)
   Scalar* array3 = new Scalar[size+1];
   Index sizep1 = size + 1; // <- without this temporary MSVC 2103 generates bad code
   for(Index i = 0; i < sizep1; i++) array3[i] = Scalar(1);
-  Scalar* array3unaligned = (internal::UIntPtr(array3)%EIGEN_MAX_ALIGN_BYTES) == 0 ? array3+1 : array3;
+    // In case of no alignment, avoid division by zero.
+  constexpr int alignment = (std::max<int>)(EIGEN_MAX_ALIGN_BYTES, 1);
+  Scalar* array3unaligned = (internal::UIntPtr(array3)%alignment) == 0 ? array3+1 : array3;
   Scalar array4[256];
   if(size<=256)
     for(int i = 0; i < size; i++) array4[i] = Scalar(1);
@@ -127,7 +127,9 @@ template<typename VectorType> void map_static_methods(const VectorType& m)
   Scalar* array1 = internal::aligned_new<Scalar>(size);
   Scalar* array2 = internal::aligned_new<Scalar>(size);
   Scalar* array3 = new Scalar[size+1];
-  Scalar* array3unaligned = internal::UIntPtr(array3)%EIGEN_MAX_ALIGN_BYTES == 0 ? array3+1 : array3;
+    // In case of no alignment, avoid division by zero.
+  constexpr int alignment = (std::max<int>)(EIGEN_MAX_ALIGN_BYTES, 1);
+  Scalar* array3unaligned = (internal::UIntPtr(array3)%alignment) == 0 ? array3+1 : array3;
 
   VectorType::MapAligned(array1, size) = VectorType::Random(size);
   VectorType::Map(array2, size) = VectorType::Map(array1, size);
@@ -150,7 +152,7 @@ template<typename PlainObjectType> void check_const_correctness(const PlainObjec
   // CMake can help with that.
 
   // verify that map-to-const don't have LvalueBit
-  typedef typename internal::add_const<PlainObjectType>::type ConstPlainObjectType;
+  typedef std::add_const_t<PlainObjectType> ConstPlainObjectType;
   VERIFY( !(internal::traits<Map<ConstPlainObjectType> >::Flags & LvalueBit) );
   VERIFY( !(internal::traits<Map<ConstPlainObjectType, AlignedMax> >::Flags & LvalueBit) );
   VERIFY( !(Map<ConstPlainObjectType>::Flags & LvalueBit) );
diff --git a/libs/eigen/test/mapstride.cpp b/libs/eigen/test/mapstride.cpp
index fde73f2..42ceb0c 100644
--- a/libs/eigen/test/mapstride.cpp
+++ b/libs/eigen/test/mapstride.cpp
@@ -29,8 +29,8 @@ template<int Alignment,typename VectorType> void map_class_vector(const VectorTy
     map = v;
     for(int i = 0; i < size; ++i)
     {
-      VERIFY(array[3*i] == v[i]);
-      VERIFY(map[i] == v[i]);
+      VERIFY_IS_EQUAL(array[3*i], v[i]);
+      VERIFY_IS_EQUAL(map[i], v[i]);
     }
   }
 
@@ -39,8 +39,8 @@ template<int Alignment,typename VectorType> void map_class_vector(const VectorTy
     map = v;
     for(int i = 0; i < size; ++i)
     {
-      VERIFY(array[2*i] == v[i]);
-      VERIFY(map[i] == v[i]);
+      VERIFY_IS_EQUAL(array[2*i], v[i]);
+      VERIFY_IS_EQUAL(map[i], v[i]);
     }
   }
 
@@ -65,10 +65,13 @@ template<int Alignment,typename MatrixType> void map_class_matrix(const MatrixTy
 
   Scalar a_array2[256];
   Scalar* array2 = a_array2;
-  if(Alignment!=Aligned)
+  if(Alignment!=Aligned) {
     array2 = (Scalar*)(internal::IntPtr(a_array2) + (internal::packet_traits<Scalar>::AlignedOnScalar?sizeof(Scalar):sizeof(typename NumTraits<Scalar>::Real)));
-  else
-    array2 = (Scalar*)(((internal::UIntPtr(a_array2)+EIGEN_MAX_ALIGN_BYTES-1)/EIGEN_MAX_ALIGN_BYTES)*EIGEN_MAX_ALIGN_BYTES);
+  } else {
+    // In case there is no alignment, default to pointing to the start.
+    constexpr int alignment = (std::max<int>)(EIGEN_MAX_ALIGN_BYTES, 1);
+    array2 = (Scalar*)(((internal::UIntPtr(a_array2)+alignment-1)/alignment)*alignment);
+  }
   Index maxsize2 = a_array2 - array2 + 256;
   
   // test no inner stride and some dynamic outer stride
@@ -84,8 +87,8 @@ template<int Alignment,typename MatrixType> void map_class_matrix(const MatrixTy
     for(int i = 0; i < m.outerSize(); ++i)
       for(int j = 0; j < m.innerSize(); ++j)
       {
-        VERIFY(array[map.outerStride()*i+j] == m.coeffByOuterInner(i,j));
-        VERIFY(map.coeffByOuterInner(i,j) == m.coeffByOuterInner(i,j));
+        VERIFY_IS_EQUAL(array[map.outerStride()*i+j], m.coeffByOuterInner(i,j));
+        VERIFY_IS_EQUAL(map.coeffByOuterInner(i,j), m.coeffByOuterInner(i,j));
       }
     VERIFY_IS_APPROX(s1*map,s1*m);
     map *= s1;
@@ -111,8 +114,8 @@ template<int Alignment,typename MatrixType> void map_class_matrix(const MatrixTy
     for(int i = 0; i < m.outerSize(); ++i)
       for(int j = 0; j < m.innerSize(); ++j)
       {
-        VERIFY(array[map.outerStride()*i+j] == m.coeffByOuterInner(i,j));
-        VERIFY(map.coeffByOuterInner(i,j) == m.coeffByOuterInner(i,j));
+        VERIFY_IS_EQUAL(array[map.outerStride()*i+j], m.coeffByOuterInner(i,j));
+        VERIFY_IS_EQUAL(map.coeffByOuterInner(i,j), m.coeffByOuterInner(i,j));
       }
     VERIFY_IS_APPROX(s1*map,s1*m);
     map *= s1;
@@ -133,8 +136,8 @@ template<int Alignment,typename MatrixType> void map_class_matrix(const MatrixTy
     for(int i = 0; i < m.outerSize(); ++i)
       for(int j = 0; j < m.innerSize(); ++j)
       {
-        VERIFY(array[map.outerStride()*i+map.innerStride()*j] == m.coeffByOuterInner(i,j));
-        VERIFY(map.coeffByOuterInner(i,j) == m.coeffByOuterInner(i,j));
+        VERIFY_IS_EQUAL(array[map.outerStride()*i+map.innerStride()*j], m.coeffByOuterInner(i,j));
+        VERIFY_IS_EQUAL(map.coeffByOuterInner(i,j), m.coeffByOuterInner(i,j));
       }
     VERIFY_IS_APPROX(s1*map,s1*m);
     map *= s1;
@@ -154,8 +157,8 @@ template<int Alignment,typename MatrixType> void map_class_matrix(const MatrixTy
     for(int i = 0; i < m.outerSize(); ++i)
       for(int j = 0; j < m.innerSize(); ++j)
       {
-        VERIFY(array[map.innerSize()*i*2+j*2] == m.coeffByOuterInner(i,j));
-        VERIFY(map.coeffByOuterInner(i,j) == m.coeffByOuterInner(i,j));
+        VERIFY_IS_EQUAL(array[map.innerSize()*i*2+j*2], m.coeffByOuterInner(i,j));
+        VERIFY_IS_EQUAL(map.coeffByOuterInner(i,j), m.coeffByOuterInner(i,j));
       }
     VERIFY_IS_APPROX(s1*map,s1*m);
     map *= s1;
diff --git a/libs/eigen/test/meta.cpp b/libs/eigen/test/meta.cpp
index 7a8b93c..cac7af1 100644
--- a/libs/eigen/test/meta.cpp
+++ b/libs/eigen/test/meta.cpp
@@ -29,47 +29,28 @@ struct MyImpl : public MyInterface {
 
 EIGEN_DECLARE_TEST(meta)
 {
-  VERIFY((internal::conditional<(3<4),internal::true_type, internal::false_type>::type::value));
   VERIFY(( internal::is_same<float,float>::value));
   VERIFY((!internal::is_same<float,double>::value));
   VERIFY((!internal::is_same<float,float&>::value));
   VERIFY((!internal::is_same<float,const float&>::value));
 
-  VERIFY(( internal::is_same<float,internal::remove_all<const float&>::type >::value));
-  VERIFY(( internal::is_same<float,internal::remove_all<const float*>::type >::value));
-  VERIFY(( internal::is_same<float,internal::remove_all<const float*&>::type >::value));
-  VERIFY(( internal::is_same<float,internal::remove_all<float**>::type >::value));
-  VERIFY(( internal::is_same<float,internal::remove_all<float**&>::type >::value));
-  VERIFY(( internal::is_same<float,internal::remove_all<float* const *&>::type >::value));
-  VERIFY(( internal::is_same<float,internal::remove_all<float* const>::type >::value));
-
-  // test add_const
-  VERIFY(( internal::is_same< internal::add_const<float>::type, const float >::value));
-  VERIFY(( internal::is_same< internal::add_const<float*>::type, float* const>::value));
-  VERIFY(( internal::is_same< internal::add_const<float const*>::type, float const* const>::value));
-  VERIFY(( internal::is_same< internal::add_const<float&>::type, float& >::value));
-
-  // test remove_const
-  VERIFY(( internal::is_same< internal::remove_const<float const* const>::type, float const* >::value));
-  VERIFY(( internal::is_same< internal::remove_const<float const*>::type, float const* >::value));
-  VERIFY(( internal::is_same< internal::remove_const<float* const>::type, float* >::value));
+  VERIFY(( internal::is_same<float,internal::remove_all_t<const float&> >::value));
+  VERIFY(( internal::is_same<float,internal::remove_all_t<const float*> >::value));
+  VERIFY(( internal::is_same<float,internal::remove_all_t<const float*&> >::value));
+  VERIFY(( internal::is_same<float,internal::remove_all_t<float**> >::value));
+  VERIFY(( internal::is_same<float,internal::remove_all_t<float**&> >::value));
+  VERIFY(( internal::is_same<float,internal::remove_all_t<float* const *&> >::value));
+  VERIFY(( internal::is_same<float,internal::remove_all_t<float* const> >::value));
 
   // test add_const_on_value_type
-  VERIFY(( internal::is_same< internal::add_const_on_value_type<float&>::type, float const& >::value));
-  VERIFY(( internal::is_same< internal::add_const_on_value_type<float*>::type, float const* >::value));
+  VERIFY(( internal::is_same< internal::add_const_on_value_type_t<float&>, float const& >::value));
+  VERIFY(( internal::is_same< internal::add_const_on_value_type_t<float*>, float const* >::value));
 
-  VERIFY(( internal::is_same< internal::add_const_on_value_type<float>::type, const float >::value));
-  VERIFY(( internal::is_same< internal::add_const_on_value_type<const float>::type, const float >::value));
-
-  VERIFY(( internal::is_same< internal::add_const_on_value_type<const float* const>::type, const float* const>::value));
-  VERIFY(( internal::is_same< internal::add_const_on_value_type<float* const>::type, const float* const>::value));
-
-  VERIFY(( internal::is_same<float,internal::remove_reference<float&>::type >::value));
-  VERIFY(( internal::is_same<const float,internal::remove_reference<const float&>::type >::value));
-  VERIFY(( internal::is_same<float,internal::remove_pointer<float*>::type >::value));
-  VERIFY(( internal::is_same<const float,internal::remove_pointer<const float*>::type >::value));
-  VERIFY(( internal::is_same<float,internal::remove_pointer<float* const >::type >::value));
+  VERIFY(( internal::is_same< internal::add_const_on_value_type_t<float>, const float >::value));
+  VERIFY(( internal::is_same< internal::add_const_on_value_type_t<const float>, const float >::value));
 
+  VERIFY(( internal::is_same< internal::add_const_on_value_type_t<const float* const>, const float* const>::value));
+  VERIFY(( internal::is_same< internal::add_const_on_value_type_t<float* const>, const float* const>::value));
 
   // is_convertible
   STATIC_CHECK(( internal::is_convertible<float,double>::value ));
@@ -114,13 +95,7 @@ EIGEN_DECLARE_TEST(meta)
   // So the following tests are expected to fail with recent compilers.
 
   STATIC_CHECK(( !internal::is_convertible<MyInterface, MyImpl>::value ));
-  #if (!EIGEN_COMP_GNUC_STRICT) || (EIGEN_GNUC_AT_LEAST(4,8))
-  // GCC prior to 4.8 fails to compile this test:
-  // error: cannot allocate an object of abstract type 'MyInterface'
-  // In other word, it does not obey SFINAE.
-  // Nevertheless, we don't really care about supporting abstract type as scalar type!
   STATIC_CHECK(( !internal::is_convertible<MyImpl, MyInterface>::value ));
-  #endif
   STATIC_CHECK((  internal::is_convertible<MyImpl, const MyInterface&>::value ));
 
   #endif
diff --git a/libs/eigen/test/mixingtypes.cpp b/libs/eigen/test/mixingtypes.cpp
index d450dbf..fe760b7 100644
--- a/libs/eigen/test/mixingtypes.cpp
+++ b/libs/eigen/test/mixingtypes.cpp
@@ -10,10 +10,6 @@
 
 #if defined(EIGEN_TEST_PART_7)
 
-#ifndef EIGEN_NO_STATIC_ASSERT
-#define EIGEN_NO_STATIC_ASSERT // turn static asserts into runtime asserts in order to check them
-#endif
-
 // ignore double-promotion diagnostic for clang and gcc, if we check for static assertion anyway:
 // TODO do the same for MSVC?
 #if defined(__clang__)
@@ -49,28 +45,6 @@ using namespace std;
   VERIFY_IS_APPROX(XPR,REF); \
   VERIFY( g_called && #XPR" not properly optimized");
 
-template<int SizeAtCompileType>
-void raise_assertion(Index size = SizeAtCompileType)
-{
-  // VERIFY_RAISES_ASSERT(mf+md); // does not even compile
-  Matrix<float, SizeAtCompileType, 1> vf; vf.setRandom(size);
-  Matrix<double, SizeAtCompileType, 1> vd; vd.setRandom(size);
-  VERIFY_RAISES_ASSERT(vf=vd);
-  VERIFY_RAISES_ASSERT(vf+=vd);
-  VERIFY_RAISES_ASSERT(vf-=vd);
-  VERIFY_RAISES_ASSERT(vd=vf);
-  VERIFY_RAISES_ASSERT(vd+=vf);
-  VERIFY_RAISES_ASSERT(vd-=vf);
-
-  //   vd.asDiagonal() * mf;    // does not even compile
-  //   vcd.asDiagonal() * mf;   // does not even compile
-
-#if 0 // we get other compilation errors here than just static asserts
-  VERIFY_RAISES_ASSERT(vd.dot(vf));
-#endif
-}
-
-
 template<int SizeAtCompileType> void mixingtypes(int size = SizeAtCompileType)
 {
   typedef std::complex<float>   CF;
@@ -139,11 +113,12 @@ template<int SizeAtCompileType> void mixingtypes(int size = SizeAtCompileType)
   VERIFY_MIX_SCALAR(scd - vd.array() , scd - vd.template cast<complex<double> >().array());
 
   // check scalar powers
-  VERIFY_MIX_SCALAR( pow(vcf.array(), sf),        Eigen::pow(vcf.array(), complex<float>(sf)) );
-  VERIFY_MIX_SCALAR( vcf.array().pow(sf) ,        Eigen::pow(vcf.array(), complex<float>(sf)) );
+  // NOTE: scalar exponents use a unary op.
+  VERIFY_IS_APPROX( pow(vcf.array(), sf),        Eigen::pow(vcf.array(), complex<float>(sf)) );
+  VERIFY_IS_APPROX( vcf.array().pow(sf) ,        Eigen::pow(vcf.array(), complex<float>(sf)) );
   VERIFY_MIX_SCALAR( pow(sd, vcd.array()),        Eigen::pow(complex<double>(sd), vcd.array()) );
-  VERIFY_MIX_SCALAR( Eigen::pow(vf.array(), scf), Eigen::pow(vf.template cast<complex<float> >().array(), scf) );
-  VERIFY_MIX_SCALAR( vf.array().pow(scf) ,        Eigen::pow(vf.template cast<complex<float> >().array(), scf) );
+  VERIFY_IS_APPROX( Eigen::pow(vf.array(), scf), Eigen::pow(vf.template cast<complex<float> >().array(), scf) );
+  VERIFY_IS_APPROX( vf.array().pow(scf) ,        Eigen::pow(vf.template cast<complex<float> >().array(), scf) );
   VERIFY_MIX_SCALAR( Eigen::pow(scd, vd.array()), Eigen::pow(scd, vd.template cast<complex<double> >().array()) );
 
   // check dot product
@@ -320,10 +295,5 @@ EIGEN_DECLARE_TEST(mixingtypes)
     CALL_SUBTEST_4(mixingtypes<3>());
     CALL_SUBTEST_5(mixingtypes<4>());
     CALL_SUBTEST_6(mixingtypes<Dynamic>(internal::random<int>(1,EIGEN_TEST_MAX_SIZE)));
-    CALL_SUBTEST_7(raise_assertion<Dynamic>(internal::random<int>(1,EIGEN_TEST_MAX_SIZE)));
   }
-  CALL_SUBTEST_7(raise_assertion<0>());
-  CALL_SUBTEST_7(raise_assertion<3>());
-  CALL_SUBTEST_7(raise_assertion<4>());
-  CALL_SUBTEST_7(raise_assertion<Dynamic>(0));
 }
diff --git a/libs/eigen/test/nestbyvalue.cpp b/libs/eigen/test/nestbyvalue.cpp
index c5356bc..c25f0bf 100644
--- a/libs/eigen/test/nestbyvalue.cpp
+++ b/libs/eigen/test/nestbyvalue.cpp
@@ -26,12 +26,14 @@ EIGEN_DECLARE_TEST(nestbyvalue)
   for(int i = 0; i < g_repeat; i++) {
     Index rows = internal::random<Index>(1,EIGEN_TEST_MAX_SIZE);
     Index cols = internal::random<Index>(1,EIGEN_TEST_MAX_SIZE);
-    MatrixXd a = MatrixXd(rows,cols);
+    MatrixXd a = MatrixXd::Random(rows,cols);
     nb_temporaries = 0;
     XprType x = get_xpr_with_temps(a);
     VERIFY_IS_EQUAL(nb_temporaries,6);
     MatrixXd b = x;
     VERIFY_IS_EQUAL(nb_temporaries,6+1);
     VERIFY_IS_APPROX(b, a.rowwise().reverse().eval() + (a+a).eval());
+    // Block expressions work with dense NestByValue.
+    VERIFY_IS_APPROX(b, a.nestByValue().rowwise().reverse().eval() + (a.nestByValue()+a.nestByValue()).eval());
   }
 }
diff --git a/libs/eigen/test/nesting_ops.cpp b/libs/eigen/test/nesting_ops.cpp
index 4b5fc21..1350994 100644
--- a/libs/eigen/test/nesting_ops.cpp
+++ b/libs/eigen/test/nesting_ops.cpp
@@ -27,7 +27,7 @@ template <int N, typename ReferenceType, typename XprType>
 bool verify_eval_type(const XprType &, const ReferenceType&)
 {
   typedef typename internal::nested_eval<XprType,N>::type EvalType;
-  return internal::is_same<typename internal::remove_all<EvalType>::type, typename internal::remove_all<ReferenceType>::type>::value;
+  return internal::is_same<internal::remove_all_t<EvalType>, internal::remove_all_t<ReferenceType>>::value;
 }
 
 template <typename MatrixType> void run_nesting_ops_1(const MatrixType& _m)
diff --git a/libs/eigen/test/nomalloc.cpp b/libs/eigen/test/nomalloc.cpp
index cb4c073..689a4cc 100644
--- a/libs/eigen/test/nomalloc.cpp
+++ b/libs/eigen/test/nomalloc.cpp
@@ -152,7 +152,7 @@ void ctms_decompositions()
   x = fpQR.solve(b);
 
   // SVD module
-  Eigen::JacobiSVD<Matrix> jSVD; jSVD.compute(A, ComputeFullU | ComputeFullV);
+  Eigen::JacobiSVD<Matrix, ComputeFullU | ComputeFullV> jSVD; jSVD.compute(A);
 }
 
 void test_zerosized() {
diff --git a/libs/eigen/test/nullary.cpp b/libs/eigen/test/nullary.cpp
index 9b25ea4..e524837 100644
--- a/libs/eigen/test/nullary.cpp
+++ b/libs/eigen/test/nullary.cpp
@@ -13,24 +13,20 @@
 template<typename MatrixType>
 bool equalsIdentity(const MatrixType& A)
 {
-  typedef typename MatrixType::Scalar Scalar;
-  Scalar zero = static_cast<Scalar>(0);
-
   bool offDiagOK = true;
   for (Index i = 0; i < A.rows(); ++i) {
     for (Index j = i+1; j < A.cols(); ++j) {
-      offDiagOK = offDiagOK && (A(i,j) == zero);
+      offDiagOK = offDiagOK && numext::is_exactly_zero(A(i, j));
     }
   }
   for (Index i = 0; i < A.rows(); ++i) {
     for (Index j = 0; j < (std::min)(i, A.cols()); ++j) {
-      offDiagOK = offDiagOK && (A(i,j) == zero);
+      offDiagOK = offDiagOK && numext::is_exactly_zero(A(i, j));
     }
   }
 
   bool diagOK = (A.diagonal().array() == 1).all();
   return offDiagOK && diagOK;
-
 }
 
 template<typename VectorType>
@@ -82,8 +78,9 @@ void testVectorType(const VectorType& base)
   const Scalar step = ((size == 1) ? 1 : (high-low)/RealScalar(size-1));
 
   // check whether the result yields what we expect it to do
-  VectorType m(base);
+  VectorType m(base), o(base);
   m.setLinSpaced(size,low,high);
+  o.setEqualSpaced(size, low, step);
 
   if(!NumTraits<Scalar>::IsInteger)
   {
@@ -91,6 +88,7 @@ void testVectorType(const VectorType& base)
     for (int i=0; i<size; ++i)
       n(i) = low+RealScalar(i)*step;
     VERIFY_IS_APPROX(m,n);
+    VERIFY_IS_APPROX(n,o);
 
     CALL_SUBTEST( check_extremity_accuracy(m, low, high) );
   }
@@ -260,11 +258,12 @@ void nullary_overflow()
 {
   // Check possible overflow issue
   int n = 60000;
-  ArrayXi a1(n), a2(n);
-  a1.setLinSpaced(n, 0, n-1);
-  for(int i=0; i<n; ++i)
-    a2(i) = i;
-  VERIFY_IS_APPROX(a1,a2);
+  ArrayXi a1(n), a2(n), a_ref(n);
+  a1.setLinSpaced(n, 0, n - 1);
+  a2.setEqualSpaced(n, 0, 1);
+  for (int i = 0; i < n; ++i) a_ref(i) = i;
+  VERIFY_IS_APPROX(a1, a_ref);
+  VERIFY_IS_APPROX(a2, a_ref);
 }
 
 template<int>
diff --git a/libs/eigen/test/num_dimensions.cpp b/libs/eigen/test/num_dimensions.cpp
index 7ad7ef6..528c8f6 100644
--- a/libs/eigen/test/num_dimensions.cpp
+++ b/libs/eigen/test/num_dimensions.cpp
@@ -15,7 +15,6 @@ void check_dim(const Xpr& ) {
   STATIC_CHECK( Xpr::NumDimensions == ExpectedDim );
 }
 
-#if EIGEN_HAS_CXX11
 template<template <typename,int,int> class Object>
 void map_num_dimensions()
 {
@@ -58,8 +57,6 @@ using TArray = Array<Scalar,Rows,Cols>;
 template<typename Scalar, int Rows, int Cols>
 using TMatrix = Matrix<Scalar,Rows,Cols>;
 
-#endif
-
 EIGEN_DECLARE_TEST(num_dimensions)
 {
   int n = 10;
@@ -81,10 +78,7 @@ EIGEN_DECLARE_TEST(num_dimensions)
   SparseVector<double> s(n);
   CALL_SUBTEST( check_dim<1>(s) );
   CALL_SUBTEST( check_dim<1>(s.head(2)) );
-  
 
-  #if EIGEN_HAS_CXX11
   CALL_SUBTEST( map_num_dimensions<TArray>() );
   CALL_SUBTEST( map_num_dimensions<TMatrix>() );
-  #endif
 }
diff --git a/libs/eigen/test/numext.cpp b/libs/eigen/test/numext.cpp
index 8a2fde5..e99eddc 100644
--- a/libs/eigen/test/numext.cpp
+++ b/libs/eigen/test/numext.cpp
@@ -11,7 +11,7 @@
 
 template<typename T, typename U>
 bool check_if_equal_or_nans(const T& actual, const U& expected) {
-  return ((actual == expected) || ((numext::isnan)(actual) && (numext::isnan)(expected)));
+  return (numext::equal_strict(actual, expected) || ((numext::isnan)(actual) && (numext::isnan)(expected)));
 }
 
 template<typename T, typename U>
@@ -239,6 +239,63 @@ void check_rsqrt() {
   check_rsqrt_impl<T>::run();
 }
 
+
+template <typename T>
+struct check_signbit_impl {
+  static void run() {
+    T true_mask;
+    std::memset(static_cast<void*>(&true_mask), 0xff, sizeof(T));
+    T false_mask;
+    std::memset(static_cast<void*>(&false_mask), 0x00, sizeof(T));
+
+    std::vector<T> negative_values;
+    std::vector<T> non_negative_values;
+
+    if (NumTraits<T>::IsInteger) {
+      negative_values = {static_cast<T>(-1),
+        static_cast<T>(NumTraits<T>::lowest())};
+      non_negative_values = {static_cast<T>(0), static_cast<T>(1),
+        static_cast<T>(NumTraits<T>::highest())};
+    } else {
+      // has sign bit
+      const T neg_zero = static_cast<T>(-0.0);
+      const T neg_one = static_cast<T>(-1.0);
+      const T neg_inf = -std::numeric_limits<T>::infinity();
+      const T neg_nan = -std::numeric_limits<T>::quiet_NaN();
+      // does not have sign bit
+      const T pos_zero = static_cast<T>(0.0);
+      const T pos_one = static_cast<T>(1.0);
+      const T pos_inf = std::numeric_limits<T>::infinity();
+      const T pos_nan = std::numeric_limits<T>::quiet_NaN();
+      negative_values = {neg_zero, neg_one, neg_inf, neg_nan};
+      non_negative_values = {pos_zero, pos_one, pos_inf, pos_nan};
+    }
+
+
+    auto check_all = [](auto values, auto expected) {
+      bool all_pass = true;
+      for (T val : values) {
+        const T numext_val = numext::signbit(val);
+        bool not_same = internal::predux_any(
+            internal::bitwise_helper<T>::bitwise_xor(expected, numext_val));
+        all_pass = all_pass && !not_same;
+        if (not_same)
+          std::cout << "signbit(" << val << ") = " << numext_val
+                    << " != " << expected << std::endl;
+      }
+      return all_pass;
+    };
+
+    bool all_pass = check_all(non_negative_values, false_mask);
+    all_pass = all_pass && check_all(negative_values, (NumTraits<T>::IsSigned ? true_mask : false_mask));
+    VERIFY(all_pass);
+  }
+};
+template <typename T>
+void check_signbit() {
+  check_signbit_impl<T>::run();
+}
+
 EIGEN_DECLARE_TEST(numext) {
   for(int k=0; k<g_repeat; ++k)
   {
@@ -266,10 +323,25 @@ EIGEN_DECLARE_TEST(numext) {
     CALL_SUBTEST( check_sqrt<double>() );
     CALL_SUBTEST( check_sqrt<std::complex<float> >() );
     CALL_SUBTEST( check_sqrt<std::complex<double> >() );
-    
+
     CALL_SUBTEST( check_rsqrt<float>() );
     CALL_SUBTEST( check_rsqrt<double>() );
     CALL_SUBTEST( check_rsqrt<std::complex<float> >() );
     CALL_SUBTEST( check_rsqrt<std::complex<double> >() );
+
+    CALL_SUBTEST( check_signbit<half>());
+    CALL_SUBTEST( check_signbit<bfloat16>());
+    CALL_SUBTEST( check_signbit<float>());
+    CALL_SUBTEST( check_signbit<double>());
+
+    CALL_SUBTEST( check_signbit<uint8_t>());
+    CALL_SUBTEST( check_signbit<uint16_t>());
+    CALL_SUBTEST( check_signbit<uint32_t>());
+    CALL_SUBTEST( check_signbit<uint64_t>());
+
+    CALL_SUBTEST( check_signbit<int8_t>());
+    CALL_SUBTEST( check_signbit<int16_t>());
+    CALL_SUBTEST( check_signbit<int32_t>());
+    CALL_SUBTEST( check_signbit<int64_t>());
   }
 }
diff --git a/libs/eigen/test/packetmath.cpp b/libs/eigen/test/packetmath.cpp
index 121ec72..f082985 100644
--- a/libs/eigen/test/packetmath.cpp
+++ b/libs/eigen/test/packetmath.cpp
@@ -24,10 +24,30 @@ inline T REF_MUL(const T& a, const T& b) {
   return a * b;
 }
 template <typename T>
+inline T REF_MADD(const T& a, const T& b, const T& c) {
+  return a * b + c;
+}
+template <typename T>
+inline T REF_MSUB(const T& a, const T& b, const T& c) {
+  return a * b - c;
+}
+template <typename T>
+inline T REF_NMADD(const T& a, const T& b, const T& c) {
+  return (-a * b) + c;
+}
+template <typename T>
+inline T REF_NMSUB(const T& a, const T& b, const T& c) {
+  return (-a * b) - c;
+}
+template <typename T>
 inline T REF_DIV(const T& a, const T& b) {
   return a / b;
 }
 template <typename T>
+inline T REF_RECIPROCAL(const T& a) {
+  return T(1) / a;
+}
+template <typename T>
 inline T REF_ABS_DIFF(const T& a, const T& b) {
   return a > b ? a - b : b - a;
 }
@@ -45,13 +65,23 @@ template <>
 inline bool REF_MUL(const bool& a, const bool& b) {
   return a && b;
 }
+template <>
+inline bool REF_MADD(const bool& a, const bool& b, const bool& c) {
+  return (a && b) || c;
+}
 
 template <typename T>
 inline T REF_FREXP(const T& x, T& exp) {
-  int iexp;
+  int iexp = 0;
   EIGEN_USING_STD(frexp)
   const T out = static_cast<T>(frexp(x, &iexp));
   exp = static_cast<T>(iexp);
+  
+  // The exponent value is unspecified if the input is inf or NaN, but MSVC
+  // seems to set it to 1.  We need to set it back to zero for consistency.
+  if (!(numext::isfinite)(x)) {
+    exp = T(0);
+  }
   return out;
 }
 
@@ -235,7 +265,7 @@ struct packetmath_pcast_ops_runner {
 
 // Only some types support cast from std::complex<>.
 template <typename Scalar, typename Packet>
-struct packetmath_pcast_ops_runner<Scalar, Packet, typename internal::enable_if<NumTraits<Scalar>::IsComplex>::type> {
+struct packetmath_pcast_ops_runner<Scalar, Packet, std::enable_if_t<NumTraits<Scalar>::IsComplex>> {
   static void run() {
     test_cast_runner<Packet, std::complex<float> >::run();
     test_cast_runner<Packet, std::complex<double> >::run();
@@ -353,10 +383,10 @@ template <typename Scalar, typename Packet>
 void packetmath_minus_zero_add() {
   const int PacketSize = internal::unpacket_traits<Packet>::size;
   const int size = 2 * PacketSize;
-  EIGEN_ALIGN_MAX Scalar data1[size];
-  EIGEN_ALIGN_MAX Scalar data2[size];
-  EIGEN_ALIGN_MAX Scalar ref[size];
-
+  EIGEN_ALIGN_MAX Scalar data1[size] = {};
+  EIGEN_ALIGN_MAX Scalar data2[size] = {};
+  EIGEN_ALIGN_MAX Scalar ref[size] = {};
+  
   for (int i = 0; i < PacketSize; ++i) {
     data1[i] = Scalar(-0.0);
     data1[i + PacketSize] = Scalar(-0.0);
@@ -374,11 +404,11 @@ struct eigen_optimization_barrier_test {
 };
 
 template<typename Packet>
-struct eigen_optimization_barrier_test<Packet, typename internal::enable_if<
+struct eigen_optimization_barrier_test<Packet, std::enable_if_t<
     !NumTraits<Packet>::IsComplex &&
     !internal::is_same<Packet, Eigen::half>::value &&
     !internal::is_same<Packet, Eigen::bfloat16>::value
-  >::type> {
+  >> {
   static void run() {
     typedef typename internal::unpacket_traits<Packet>::type Scalar;
     Scalar s = internal::random<Scalar>();
@@ -428,6 +458,36 @@ void packetmath() {
     VERIFY(test::areApprox(data1, data2 + offset, PacketSize) && "internal::pstoreu");
   }
 
+  for (int M = 0; M < PacketSize; ++M) {
+    for (int N = 0; N <= PacketSize; ++N) {
+      for (int j = 0; j < size; ++j) {
+        data1[j] = internal::random<Scalar>() / RealScalar(PacketSize);
+        data2[j] = internal::random<Scalar>() / RealScalar(PacketSize);
+        refvalue = (std::max)(refvalue, numext::abs(data1[j]));
+      }
+
+      if (M == 0) {
+        internal::pstore_partial(data2, internal::pload_partial<Packet>(data1, N), N);
+        VERIFY(test::areApprox(data1, data2, N) && "aligned loadN/storeN");
+
+        for (int offset = 0; offset < PacketSize; ++offset) {
+          internal::pstore_partial(data2, internal::ploadu_partial<Packet>(data1 + offset, N), N);
+          VERIFY(test::areApprox(data1 + offset, data2, N) && "internal::ploadu_partial");
+        }
+
+        for (int offset = 0; offset < PacketSize; ++offset) {
+          internal::pstoreu_partial(data2 + offset, internal::pload_partial<Packet>(data1, N), N);
+          VERIFY(test::areApprox(data1, data2 + offset, N) && "internal::pstoreu_partial");
+        }
+      }
+
+      if (N + M > PacketSize) continue;  // Don't read or write past end of Packet
+
+      internal::pstore_partial(data2, internal::pload_partial<Packet>(data1, N, M), N, M);
+      VERIFY(test::areApprox(data1, data2, N) && "aligned offset loadN/storeN");
+    }
+  }
+
   if (internal::unpacket_traits<Packet>::masked_load_available) {
     test::packet_helper<internal::unpacket_traits<Packet>::masked_load_available, Packet> h;
     unsigned long long max_umask = (0x1ull << PacketSize);
@@ -464,8 +524,11 @@ void packetmath() {
   CHECK_CWISE2_IF(PacketTraits::HasMul, REF_MUL, internal::pmul);
   CHECK_CWISE2_IF(PacketTraits::HasDiv, REF_DIV, internal::pdiv);
 
-  if (PacketTraits::HasNegate) CHECK_CWISE1(internal::negate, internal::pnegate);
+  CHECK_CWISE1_IF(PacketTraits::HasNegate, internal::negate, internal::pnegate);
+  CHECK_CWISE1_IF(PacketTraits::HasReciprocal, REF_RECIPROCAL, internal::preciprocal);
   CHECK_CWISE1(numext::conj, internal::pconj);
+  CHECK_CWISE1_IF(PacketTraits::HasSign, numext::sign, internal::psign);
+
 
   for (int offset = 0; offset < 3; ++offset) {
     for (int i = 0; i < PacketSize; ++i) ref[i] = data1[offset];
@@ -616,6 +679,23 @@ void packetmath() {
   }
   CHECK_CWISE1_IF(PacketTraits::HasSqrt, numext::sqrt, internal::psqrt);
   CHECK_CWISE1_IF(PacketTraits::HasRsqrt, numext::rsqrt, internal::prsqrt);
+  CHECK_CWISE3_IF(true, REF_MADD, internal::pmadd);
+  if (!std::is_same<Scalar, bool>::value && NumTraits<Scalar>::IsSigned) {
+    CHECK_CWISE3_IF(true, REF_NMSUB, internal::pnmsub);
+  }
+  
+  // For pmsub, pnmadd, the values can cancel each other to become near zero,
+  // which can lead to very flaky tests. Here we ensure the signs are such that
+  // they do not cancel.
+  for (int i = 0; i < PacketSize; ++i) {
+    data1[i] = numext::abs(internal::random<Scalar>());
+    data1[i + PacketSize] = numext::abs(internal::random<Scalar>());
+    data1[i + 2 * PacketSize] = -numext::abs(internal::random<Scalar>());
+  }
+  if (!std::is_same<Scalar, bool>::value && NumTraits<Scalar>::IsSigned) {
+    CHECK_CWISE3_IF(true, REF_MSUB, internal::pmsub);
+    CHECK_CWISE3_IF(true, REF_NMADD, internal::pnmadd);
+  }
 }
 
 // Notice that this definition works for complex types as well.
@@ -625,15 +705,104 @@ Scalar log2(Scalar x) {
   return Scalar(EIGEN_LOG2E) * std::log(x);
 }
 
+// Create a functor out of a function so it can be passed (with overloads)
+// to another function as an input argument.
+#define CREATE_FUNCTOR(Name, Func)     \
+struct Name {                          \
+  template<typename T>                 \
+  T operator()(const T& val) const {   \
+    return Func(val);                  \
+  }                                    \
+}
+
+CREATE_FUNCTOR(psqrt_functor, internal::psqrt);
+CREATE_FUNCTOR(prsqrt_functor, internal::prsqrt);
+
+// TODO(rmlarsen): Run this test for more functions.
+template <bool Cond, typename Scalar, typename Packet, typename RefFunctorT, typename FunctorT>
+void packetmath_test_IEEE_corner_cases(const RefFunctorT& ref_fun,
+                                       const FunctorT& fun) {
+  const int PacketSize = internal::unpacket_traits<Packet>::size;
+  const Scalar norm_min = (std::numeric_limits<Scalar>::min)();
+  const Scalar norm_max = (std::numeric_limits<Scalar>::max)();
+
+  constexpr int size = PacketSize * 2;
+  EIGEN_ALIGN_MAX Scalar data1[size];
+  EIGEN_ALIGN_MAX Scalar data2[size];
+  EIGEN_ALIGN_MAX Scalar ref[size];
+  for (int i = 0; i < size; ++i) {
+    data1[i] = data2[i] = ref[i] = Scalar(0);
+  }
+
+  // Test for subnormals.
+  if (Cond && std::numeric_limits<Scalar>::has_denorm == std::denorm_present) {
+
+    for (int scale = 1; scale < 5; ++scale) {
+      // When EIGEN_FAST_MATH is 1 we relax the conditions slightly, and allow the function
+      // to return the same value for subnormals as the reference would return for zero with
+      // the same sign as the input.
+      #if EIGEN_FAST_MATH
+        data1[0] = Scalar(scale) * std::numeric_limits<Scalar>::denorm_min();
+        data1[1] = -data1[0];
+        test::packet_helper<Cond, Packet> h;
+        h.store(data2, fun(h.load(data1)));
+        for (int i=0; i < PacketSize; ++i) {
+          const Scalar ref_zero = ref_fun(data1[i] < 0 ? -Scalar(0) : Scalar(0));
+          const Scalar ref_val = ref_fun(data1[i]);
+          VERIFY(((std::isnan)(data2[i]) && (std::isnan)(ref_val)) || data2[i] == ref_zero ||
+                verifyIsApprox(data2[i], ref_val));
+        }
+      #else
+        CHECK_CWISE1_IF(Cond, ref_fun, fun);
+      #endif
+    }
+  }
+
+  // Test for smallest normalized floats.
+  data1[0] = norm_min;
+  data1[1] = -data1[0];
+  CHECK_CWISE1_IF(Cond, ref_fun, fun);
+  
+  // Test for largest floats.
+  data1[0] = norm_max;
+  data1[1] = -data1[0];
+  CHECK_CWISE1_IF(Cond, ref_fun, fun);
+
+  // Test for zeros.
+  data1[0] = Scalar(0.0);
+  data1[1] = -data1[0];
+  CHECK_CWISE1_IF(Cond, ref_fun, fun);
+
+  // Test for infinities.
+  data1[0] = NumTraits<Scalar>::infinity();
+  data1[1] = -data1[0];
+  CHECK_CWISE1_IF(Cond, ref_fun, fun);
+
+  // Test for quiet NaNs.
+  data1[0] = std::numeric_limits<Scalar>::quiet_NaN();
+  data1[1] = -std::numeric_limits<Scalar>::quiet_NaN();
+  CHECK_CWISE1_IF(Cond, ref_fun, fun);
+}
+
 template <typename Scalar, typename Packet>
 void packetmath_real() {
   typedef internal::packet_traits<Scalar> PacketTraits;
   const int PacketSize = internal::unpacket_traits<Packet>::size;
 
   const int size = PacketSize * 4;
-  EIGEN_ALIGN_MAX Scalar data1[PacketSize * 4];
-  EIGEN_ALIGN_MAX Scalar data2[PacketSize * 4];
-  EIGEN_ALIGN_MAX Scalar ref[PacketSize * 4];
+  EIGEN_ALIGN_MAX Scalar data1[PacketSize * 4] = {};
+  EIGEN_ALIGN_MAX Scalar data2[PacketSize * 4] = {};
+  EIGEN_ALIGN_MAX Scalar ref[PacketSize * 4] = {};
+  
+  // Negate with -0.
+  if (PacketTraits::HasNegate) {
+    test::packet_helper<PacketTraits::HasNegate,Packet> h;
+    data1[0] = Scalar{-0};
+    h.store(data2, internal::pnegate(h.load(data1)));
+    typedef typename internal::make_unsigned<typename internal::make_integer<Scalar>::type>::type Bits;
+    Bits bits = numext::bit_cast<Bits>(data2[0]);
+    VERIFY_IS_EQUAL(bits, static_cast<Bits>(Bits(1)<<(sizeof(Scalar)*CHAR_BIT - 1)));
+  }
 
   for (int i = 0; i < size; ++i) {
     data1[i] = Scalar(internal::random<double>(0, 1) * std::pow(10., internal::random<double>(-6, 6)));
@@ -658,6 +827,7 @@ void packetmath_real() {
   CHECK_CWISE1_EXACT_IF(PacketTraits::HasCeil, numext::ceil, internal::pceil);
   CHECK_CWISE1_EXACT_IF(PacketTraits::HasFloor, numext::floor, internal::pfloor);
   CHECK_CWISE1_EXACT_IF(PacketTraits::HasRint, numext::rint, internal::print);
+  CHECK_CWISE1_IF(PacketTraits::HasSign, numext::sign, internal::psign);
 
   packetmath_boolean_mask_ops_real<Scalar,Packet>();
   
@@ -713,6 +883,7 @@ void packetmath_real() {
   for (int i = 0; i < size; ++i) {
     data1[i] = Scalar(internal::random<double>(-87, 88));
     data2[i] = Scalar(internal::random<double>(-87, 88));
+    data1[0] = -NumTraits<Scalar>::infinity();
   }
   CHECK_CWISE1_IF(PacketTraits::HasExp, std::exp, internal::pexp);
   
@@ -847,7 +1018,7 @@ void packetmath_real() {
     }
   }
 
-#if EIGEN_HAS_C99_MATH && (EIGEN_COMP_CXXVER >= 11)
+#if EIGEN_HAS_C99_MATH
   data1[0] = NumTraits<Scalar>::infinity();
   data1[1] = Scalar(-1);
   CHECK_CWISE1_IF(PacketTraits::HasLog1p, std::log1p, internal::plog1p);
@@ -889,8 +1060,10 @@ void packetmath_real() {
         data1[0] = std::numeric_limits<Scalar>::denorm_min();
         data1[1] = -std::numeric_limits<Scalar>::denorm_min();
         h.store(data2, internal::plog(h.load(data1)));
-        // TODO(rmlarsen): Reenable.
-        //        VERIFY_IS_EQUAL(std::log(std::numeric_limits<Scalar>::denorm_min()), data2[0]);
+        // TODO(rmlarsen): Re-enable for bfloat16.
+        if (!internal::is_same<Scalar, bfloat16>::value) {
+          VERIFY_IS_APPROX(std::log(std::numeric_limits<Scalar>::denorm_min()), data2[0]);
+        }
         VERIFY((numext::isnan)(data2[1]));
       }
 #endif
@@ -911,18 +1084,10 @@ void packetmath_real() {
       VERIFY((numext::isnan)(data2[0]));
       VERIFY((numext::isnan)(data2[1]));
     }
-    if (PacketTraits::HasSqrt) {
-      test::packet_helper<PacketTraits::HasSqrt, Packet> h;
-      data1[0] = Scalar(-1.0f);
-      if (std::numeric_limits<Scalar>::has_denorm == std::denorm_present) {
-        data1[1] = -std::numeric_limits<Scalar>::denorm_min();
-      } else {
-        data1[1] = -NumTraits<Scalar>::epsilon();
-      }
-      h.store(data2, internal::psqrt(h.load(data1)));
-      VERIFY((numext::isnan)(data2[0]));
-      VERIFY((numext::isnan)(data2[1]));
-    }
+
+    packetmath_test_IEEE_corner_cases<PacketTraits::HasSqrt, Scalar, Packet>(numext::sqrt<Scalar>, psqrt_functor());
+    packetmath_test_IEEE_corner_cases<PacketTraits::HasRsqrt, Scalar, Packet>(numext::rsqrt<Scalar>, prsqrt_functor());
+
     // TODO(rmlarsen): Re-enable for half and bfloat16.
     if (PacketTraits::HasCos
         && !internal::is_same<Scalar, half>::value
@@ -972,6 +1137,23 @@ void packetmath_real() {
       VERIFY_IS_EQUAL(data2[0], Scalar(1));
     }
   }
+  if (PacketTraits::HasReciprocal && PacketSize >= 2) {
+    test::packet_helper<PacketTraits::HasReciprocal, Packet> h;
+    const Scalar inf = NumTraits<Scalar>::infinity();
+    const Scalar zero = Scalar(0);
+    data1[0] = zero;
+    data1[1] = -zero;
+    h.store(data2, internal::preciprocal(h.load(data1)));
+    VERIFY_IS_EQUAL(data2[0], inf);
+    VERIFY_IS_EQUAL(data2[1], -inf);
+
+    data1[0] = inf;
+    data1[1] = -inf;
+    h.store(data2, internal::preciprocal(h.load(data1)));
+    VERIFY_IS_EQUAL(data2[0], zero);
+    VERIFY_IS_EQUAL(data2[1], -zero);
+
+  }
 }
 
 #define CAST_CHECK_CWISE1_IF(COND, REFOP, POP, SCALAR, REFTYPE) if(COND) { \
@@ -1170,6 +1352,7 @@ void packetmath_complex() {
       data1[i] = Scalar(internal::random<RealScalar>(), internal::random<RealScalar>());
     }
     CHECK_CWISE1_N(numext::sqrt, internal::psqrt, size);
+    CHECK_CWISE1_IF(PacketTraits::HasSign, numext::sign, internal::psign);
 
     // Test misc. corner cases.
     const RealScalar zero = RealScalar(0);
@@ -1243,6 +1426,36 @@ void packetmath_scatter_gather() {
   for (int i = 0; i < PacketSize; ++i) {
     VERIFY(test::isApproxAbs(data1[i], buffer[i * 7], refvalue) && "pgather");
   }
+
+  for (Index N = 0; N <= PacketSize; ++N) {
+    for (Index i = 0; i < N; ++i) {
+      data1[i] = internal::random<Scalar>() / RealScalar(PacketSize);
+    }
+
+    for (Index i = 0; i < N * 20; ++i) {
+      buffer[i] = Scalar(0);
+    }
+
+    packet = internal::pload_partial<Packet>(data1, N);
+    internal::pscatter_partial<Scalar, Packet>(buffer, packet, stride, N);
+
+    for (Index i = 0; i < N * 20; ++i) {
+      if ((i % stride) == 0 && i < stride * N) {
+        VERIFY(test::isApproxAbs(buffer[i], data1[i / stride], refvalue) && "pscatter_partial");
+      } else {
+        VERIFY(test::isApproxAbs(buffer[i], Scalar(0), refvalue) && "pscatter_partial");
+      }
+    }
+
+    for (Index i = 0; i < N * 7; ++i) {
+      buffer[i] = internal::random<Scalar>() / RealScalar(PacketSize);
+    }
+    packet = internal::pgather_partial<Scalar, Packet>(buffer, 7, N);
+    internal::pstore_partial(data1, packet, N);
+    for (Index i = 0; i < N; ++i) {
+      VERIFY(test::isApproxAbs(data1[i], buffer[i * 7], refvalue) && "pgather_partial");
+    }
+  }
 }
 
 namespace Eigen {
diff --git a/libs/eigen/test/packetmath_test_shared.h b/libs/eigen/test/packetmath_test_shared.h
index 8624fe2..6fc489f 100644
--- a/libs/eigen/test/packetmath_test_shared.h
+++ b/libs/eigen/test/packetmath_test_shared.h
@@ -90,6 +90,7 @@ template<typename Scalar> bool areApproxAbs(const Scalar* a, const Scalar* b, in
     if (!isApproxAbs(a[i],b[i],refvalue))
     {
       print_mismatch(a, b, size);
+      std::cout << "Values differ in position " << i << ": " << a[i] << " vs " << b[i] << std::endl;
       return false;
     }
   }
@@ -100,10 +101,11 @@ template<typename Scalar> bool areApprox(const Scalar* a, const Scalar* b, int s
 {
   for (int i=0; i<size; ++i)
   {
-    if ( a[i]!=b[i] && !internal::isApprox(a[i],b[i]) 
+    if ( numext::not_equal_strict(a[i], b[i]) && !internal::isApprox(a[i],b[i])
          && !((numext::isnan)(a[i]) && (numext::isnan)(b[i])) )
     {
       print_mismatch(a, b, size);
+      std::cout << "Values differ in position " << i << ": " << a[i] << " vs " << b[i] << std::endl;
       return false;
     }
   }
@@ -114,9 +116,10 @@ template<typename Scalar> bool areEqual(const Scalar* a, const Scalar* b, int si
 {
   for (int i=0; i<size; ++i)
   {
-    if ( (a[i] != b[i]) && !((numext::isnan)(a[i]) && (numext::isnan)(b[i])) )
+    if ( numext::not_equal_strict(a[i], b[i]) && !((numext::isnan)(a[i]) && (numext::isnan)(b[i])) )
     {
       print_mismatch(a, b, size);
+      std::cout << "Values differ in position " << i << ": " << a[i] << " vs " << b[i] << std::endl;
       return false;
     }
   }
diff --git a/libs/eigen/test/prec_inverse_4x4.cpp b/libs/eigen/test/prec_inverse_4x4.cpp
index 86f0571..333322f 100644
--- a/libs/eigen/test/prec_inverse_4x4.cpp
+++ b/libs/eigen/test/prec_inverse_4x4.cpp
@@ -13,15 +13,12 @@
 
 template<typename MatrixType> void inverse_permutation_4x4()
 {
-  typedef typename MatrixType::Scalar Scalar;
   Vector4i indices(0,1,2,3);
   for(int i = 0; i < 24; ++i)
   {
     MatrixType m = PermutationMatrix<4>(indices);
     MatrixType inv = m.inverse();
-    double error = double( (m*inv-MatrixType::Identity()).norm() / NumTraits<Scalar>::epsilon() );
-    EIGEN_DEBUG_VAR(error)
-    VERIFY(error == 0.0);
+    VERIFY_IS_APPROX(m*inv, MatrixType::Identity());
     std::next_permutation(indices.data(),indices.data()+4);
   }
 }
diff --git a/libs/eigen/test/product.h b/libs/eigen/test/product.h
index c6c78fb..bd9fa71 100644
--- a/libs/eigen/test/product.h
+++ b/libs/eigen/test/product.h
@@ -17,6 +17,17 @@ bool areNotApprox(const MatrixBase<Derived1>& m1, const MatrixBase<Derived2>& m2
                           * (std::max)(m1.cwiseAbs2().maxCoeff(), m2.cwiseAbs2().maxCoeff()));
 }
 
+template <typename LhsType, typename RhsType>
+std::enable_if_t<RhsType::SizeAtCompileTime==Dynamic,void>
+check_mismatched_product(LhsType& lhs, const RhsType& rhs) {
+  VERIFY_RAISES_ASSERT(lhs = rhs*rhs);
+}
+
+template <typename LhsType, typename RhsType>
+std::enable_if_t<RhsType::SizeAtCompileTime!=Dynamic,void>
+check_mismatched_product(LhsType& /*unused*/, const RhsType& /*unused*/) {
+}
+
 template<typename MatrixType> void product(const MatrixType& m)
 {
   /* this test covers the following files:
@@ -77,8 +88,9 @@ template<typename MatrixType> void product(const MatrixType& m)
   // again, test operator() to check const-qualification
   VERIFY_IS_APPROX(MatrixType::Identity(rows, cols)(r,c), static_cast<Scalar>(r==c));
 
-  if (rows!=cols)
-     VERIFY_RAISES_ASSERT(m3 = m1*m1);
+  if (rows!=cols) {
+    check_mismatched_product(m3, m1);
+  }
 
   // test the previous tests were not screwed up because operator* returns 0
   // (we use the more accurate default epsilon)
@@ -126,7 +138,7 @@ template<typename MatrixType> void product(const MatrixType& m)
   res.noalias() = square + m1 * m2.transpose();
   VERIFY_IS_APPROX(res, square + m1 * m2.transpose());
   res.noalias() += square + m1 * m2.transpose();
-  VERIFY_IS_APPROX(res, 2*(square + m1 * m2.transpose()));
+  VERIFY_IS_APPROX(res, Scalar(2)*(square + m1 * m2.transpose()));
   res.noalias() -= square + m1 * m2.transpose();
   VERIFY_IS_APPROX(res, square + m1 * m2.transpose());
 
@@ -134,7 +146,7 @@ template<typename MatrixType> void product(const MatrixType& m)
   res.noalias() = square - m1 * m2.transpose();
   VERIFY_IS_APPROX(res, square - m1 * m2.transpose());
   res.noalias() += square - m1 * m2.transpose();
-  VERIFY_IS_APPROX(res, 2*(square - m1 * m2.transpose()));
+  VERIFY_IS_APPROX(res, Scalar(2)*(square - m1 * m2.transpose()));
   res.noalias() -= square - m1 * m2.transpose();
   VERIFY_IS_APPROX(res, square - m1 * m2.transpose());
 
diff --git a/libs/eigen/test/product_large.cpp b/libs/eigen/test/product_large.cpp
index 3d0204b..8a9d7f8 100644
--- a/libs/eigen/test/product_large.cpp
+++ b/libs/eigen/test/product_large.cpp
@@ -117,6 +117,7 @@ EIGEN_DECLARE_TEST(product_large)
     CALL_SUBTEST_8( product(Matrix<double,Dynamic,Dynamic,RowMajor>(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
     CALL_SUBTEST_9( product(Matrix<std::complex<float>,Dynamic,Dynamic,RowMajor>(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
     CALL_SUBTEST_10( product(Matrix<std::complex<double>,Dynamic,Dynamic,RowMajor>(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
+    CALL_SUBTEST_11( product(Matrix<bfloat16,Dynamic,Dynamic,RowMajor>(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
   }
 
   CALL_SUBTEST_6( product_large_regressions<0>() );
diff --git a/libs/eigen/test/product_small.cpp b/libs/eigen/test/product_small.cpp
index 1d6df6e..a3a04dc 100644
--- a/libs/eigen/test/product_small.cpp
+++ b/libs/eigen/test/product_small.cpp
@@ -7,7 +7,6 @@
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
-#define EIGEN_NO_STATIC_ASSERT
 #include "product.h"
 #include <Eigen/LU>
 
@@ -41,12 +40,12 @@ const TC& ref_prod(TC &C, const TA &A, const TB &B)
 }
 
 template<typename T, int Rows, int Cols, int Depth, int OC, int OA, int OB>
-typename internal::enable_if<! ( (Rows ==1&&Depth!=1&&OA==ColMajor)
+std::enable_if_t<! ( (Rows ==1&&Depth!=1&&OA==ColMajor)
                               || (Depth==1&&Rows !=1&&OA==RowMajor)
                               || (Cols ==1&&Depth!=1&&OB==RowMajor)
                               || (Depth==1&&Cols !=1&&OB==ColMajor)
                               || (Rows ==1&&Cols !=1&&OC==ColMajor)
-                              || (Cols ==1&&Rows !=1&&OC==RowMajor)),void>::type
+                              || (Cols ==1&&Rows !=1&&OC==RowMajor)),void>
 test_lazy_single(int rows, int cols, int depth)
 {
   Matrix<T,Rows,Depth,OA> A(rows,depth); A.setRandom();
@@ -81,12 +80,12 @@ void test_dynamic_bool()
 }
 
 template<typename T, int Rows, int Cols, int Depth, int OC, int OA, int OB>
-typename internal::enable_if<  ( (Rows ==1&&Depth!=1&&OA==ColMajor)
+std::enable_if_t<  ( (Rows ==1&&Depth!=1&&OA==ColMajor)
                               || (Depth==1&&Rows !=1&&OA==RowMajor)
                               || (Cols ==1&&Depth!=1&&OB==RowMajor)
                               || (Depth==1&&Cols !=1&&OB==ColMajor)
                               || (Rows ==1&&Cols !=1&&OC==ColMajor)
-                              || (Cols ==1&&Rows !=1&&OC==RowMajor)),void>::type
+                              || (Cols ==1&&Rows !=1&&OC==RowMajor)),void>
 test_lazy_single(int, int, int)
 {
 }
@@ -282,6 +281,25 @@ void product_small_regressions()
   }
 }
 
+template<typename T>
+void product_sweep(int max_m, int max_k, int max_n) {
+  using Matrix = Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>;
+  for (int m = 1; m < max_m; ++m) {
+    for (int n = 1; n < max_n; ++n) {
+      Matrix C = Matrix::Zero(m, n);
+      Matrix Cref = Matrix::Zero(m, n);
+      for (int k = 1; k < max_k; ++k) {
+        Matrix A = Matrix::Random(m, k);
+        Matrix B = Matrix::Random(k, n);
+        C = A * B;
+        Cref.setZero();
+        ref_prod(Cref, A, B);
+        VERIFY_IS_APPROX(C, Cref);
+      }
+    }
+  }   
+}
+
 EIGEN_DECLARE_TEST(product_small)
 {
   for(int i = 0; i < g_repeat; i++) {
@@ -291,6 +309,7 @@ EIGEN_DECLARE_TEST(product_small)
     CALL_SUBTEST_3( product(Matrix3d()) );
     CALL_SUBTEST_4( product(Matrix4d()) );
     CALL_SUBTEST_5( product(Matrix4f()) );
+    CALL_SUBTEST_10( product(Matrix<bfloat16, 3, 2>()) );
     CALL_SUBTEST_6( product1x1<0>() );
 
     CALL_SUBTEST_11( test_lazy_l1<float>() );
@@ -317,6 +336,12 @@ EIGEN_DECLARE_TEST(product_small)
     CALL_SUBTEST_6( bug_1311<5>() );
 
     CALL_SUBTEST_9( test_dynamic_bool() );
+    
+    // Commonly specialized vectorized types.
+    CALL_SUBTEST_50( product_sweep<float>(10, 10, 10) );
+    CALL_SUBTEST_51( product_sweep<double>(10, 10, 10) );
+    CALL_SUBTEST_52( product_sweep<Eigen::half>(10, 10, 10) );
+    CALL_SUBTEST_53( product_sweep<Eigen::bfloat16>(10, 10, 10) );
   }
 
   CALL_SUBTEST_6( product_small_regressions<0>() );
diff --git a/libs/eigen/test/product_syrk.cpp b/libs/eigen/test/product_syrk.cpp
index 8becd37..16ccb3a 100644
--- a/libs/eigen/test/product_syrk.cpp
+++ b/libs/eigen/test/product_syrk.cpp
@@ -141,6 +141,7 @@ EIGEN_DECLARE_TEST(product_syrk)
     s = internal::random<int>(1,EIGEN_TEST_MAX_SIZE/2);
     CALL_SUBTEST_3( syrk(MatrixXcf(s, s)) );
     CALL_SUBTEST_4( syrk(MatrixXcd(s, s)) );
+    CALL_SUBTEST_5( syrk(Matrix<bfloat16, Dynamic, Dynamic>(s, s)) );
     TEST_SET_BUT_UNUSED_VARIABLE(s)
   }
 }
diff --git a/libs/eigen/test/qr.cpp b/libs/eigen/test/qr.cpp
index c38e343..36f3121 100644
--- a/libs/eigen/test/qr.cpp
+++ b/libs/eigen/test/qr.cpp
@@ -75,15 +75,17 @@ template<typename MatrixType> void qr_invertible()
   // now construct a matrix with prescribed determinant
   m1.setZero();
   for(int i = 0; i < size; i++) m1(i,i) = internal::random<Scalar>();
-  RealScalar absdet = abs(m1.diagonal().prod());
+  Scalar det = m1.diagonal().prod();
+  RealScalar absdet = abs(det);
   m3 = qr.householderQ(); // get a unitary
-  m1 = m3 * m1 * m3;
+  m1 = m3 * m1 * m3.adjoint();
   qr.compute(m1);
   VERIFY_IS_APPROX(log(absdet), qr.logAbsDeterminant());
   // This test is tricky if the determinant becomes too small.
   // Since we generate random numbers with magnitude range [0,1], the average determinant is 0.5^size
-  VERIFY_IS_MUCH_SMALLER_THAN( abs(absdet-qr.absDeterminant()), numext::maxi(RealScalar(pow(0.5,size)),numext::maxi<RealScalar>(abs(absdet),abs(qr.absDeterminant()))) );
-  
+  RealScalar tol = numext::maxi(RealScalar(pow(0.5,size)), numext::maxi<RealScalar>(abs(absdet), abs(qr.absDeterminant())));
+  VERIFY_IS_MUCH_SMALLER_THAN(abs(det - qr.determinant()), tol);
+  VERIFY_IS_MUCH_SMALLER_THAN(abs(absdet - qr.absDeterminant()), tol);
 }
 
 template<typename MatrixType> void qr_verify_assert()
@@ -96,6 +98,7 @@ template<typename MatrixType> void qr_verify_assert()
   VERIFY_RAISES_ASSERT(qr.transpose().solve(tmp))
   VERIFY_RAISES_ASSERT(qr.adjoint().solve(tmp))
   VERIFY_RAISES_ASSERT(qr.householderQ())
+  VERIFY_RAISES_ASSERT(qr.determinant())
   VERIFY_RAISES_ASSERT(qr.absDeterminant())
   VERIFY_RAISES_ASSERT(qr.logAbsDeterminant())
 }
diff --git a/libs/eigen/test/qr_colpivoting.cpp b/libs/eigen/test/qr_colpivoting.cpp
index 06f1643..4185f51 100644
--- a/libs/eigen/test/qr_colpivoting.cpp
+++ b/libs/eigen/test/qr_colpivoting.cpp
@@ -55,7 +55,7 @@ void cod() {
   MatrixType exact_solution = MatrixType::Random(cols, cols2);
   MatrixType rhs = matrix * exact_solution;
   MatrixType cod_solution = cod.solve(rhs);
-  JacobiSVD<MatrixType> svd(matrix, ComputeThinU | ComputeThinV);
+  JacobiSVD<MatrixType, ComputeThinU | ComputeThinV> svd(matrix);
   MatrixType svd_solution = svd.solve(rhs);
   VERIFY_IS_APPROX(cod_solution, svd_solution);
 
@@ -88,7 +88,7 @@ void cod_fixedsize() {
   exact_solution.setRandom(Cols, Cols2);
   Matrix<Scalar, Rows, Cols2> rhs = matrix * exact_solution;
   Matrix<Scalar, Cols, Cols2> cod_solution = cod.solve(rhs);
-  JacobiSVD<MatrixType> svd(matrix, ComputeFullU | ComputeFullV);
+  JacobiSVD<MatrixType, ComputeFullU | ComputeFullV> svd(matrix);
   Matrix<Scalar, Cols, Cols2> svd_solution = svd.solve(rhs);
   VERIFY_IS_APPROX(cod_solution, svd_solution);
 
@@ -273,10 +273,12 @@ template<typename MatrixType> void qr_invertible()
   // now construct a matrix with prescribed determinant
   m1.setZero();
   for(int i = 0; i < size; i++) m1(i,i) = internal::random<Scalar>();
-  RealScalar absdet = abs(m1.diagonal().prod());
+  Scalar det = m1.diagonal().prod();
+  RealScalar absdet = abs(det);
   m3 = qr.householderQ(); // get a unitary
-  m1 = m3 * m1 * m3;
+  m1 = m3 * m1 * m3.adjoint();
   qr.compute(m1);
+  VERIFY_IS_APPROX(det, qr.determinant());
   VERIFY_IS_APPROX(absdet, qr.absDeterminant());
   VERIFY_IS_APPROX(log(absdet), qr.logAbsDeterminant());
 }
@@ -296,6 +298,7 @@ template<typename MatrixType> void qr_verify_assert()
   VERIFY_RAISES_ASSERT(qr.isSurjective())
   VERIFY_RAISES_ASSERT(qr.isInvertible())
   VERIFY_RAISES_ASSERT(qr.inverse())
+  VERIFY_RAISES_ASSERT(qr.determinant())
   VERIFY_RAISES_ASSERT(qr.absDeterminant())
   VERIFY_RAISES_ASSERT(qr.logAbsDeterminant())
 }
@@ -315,6 +318,7 @@ template<typename MatrixType> void cod_verify_assert()
   VERIFY_RAISES_ASSERT(cod.isSurjective())
   VERIFY_RAISES_ASSERT(cod.isInvertible())
   VERIFY_RAISES_ASSERT(cod.pseudoInverse())
+  VERIFY_RAISES_ASSERT(cod.determinant())
   VERIFY_RAISES_ASSERT(cod.absDeterminant())
   VERIFY_RAISES_ASSERT(cod.logAbsDeterminant())
 }
diff --git a/libs/eigen/test/qr_fullpivoting.cpp b/libs/eigen/test/qr_fullpivoting.cpp
index f2d8cb3..cca9a8c 100644
--- a/libs/eigen/test/qr_fullpivoting.cpp
+++ b/libs/eigen/test/qr_fullpivoting.cpp
@@ -98,10 +98,12 @@ template<typename MatrixType> void qr_invertible()
   // now construct a matrix with prescribed determinant
   m1.setZero();
   for(int i = 0; i < size; i++) m1(i,i) = internal::random<Scalar>();
-  RealScalar absdet = abs(m1.diagonal().prod());
+  Scalar det = m1.diagonal().prod();
+  RealScalar absdet = abs(det);
   m3 = qr.matrixQ(); // get a unitary
-  m1 = m3 * m1 * m3;
+  m1 = m3 * m1 * m3.adjoint();
   qr.compute(m1);
+  VERIFY_IS_APPROX(det, qr.determinant());
   VERIFY_IS_APPROX(absdet, qr.absDeterminant());
   VERIFY_IS_APPROX(log(absdet), qr.logAbsDeterminant());
 }
@@ -121,6 +123,7 @@ template<typename MatrixType> void qr_verify_assert()
   VERIFY_RAISES_ASSERT(qr.isSurjective())
   VERIFY_RAISES_ASSERT(qr.isInvertible())
   VERIFY_RAISES_ASSERT(qr.inverse())
+  VERIFY_RAISES_ASSERT(qr.determinant())
   VERIFY_RAISES_ASSERT(qr.absDeterminant())
   VERIFY_RAISES_ASSERT(qr.logAbsDeterminant())
 }
diff --git a/libs/eigen/test/rand.cpp b/libs/eigen/test/rand.cpp
index 984c01f..4d686bf 100644
--- a/libs/eigen/test/rand.cpp
+++ b/libs/eigen/test/rand.cpp
@@ -57,14 +57,15 @@ template<typename Scalar> void check_histogram(Scalar x, Scalar y, int bins)
 EIGEN_DECLARE_TEST(rand)
 {
   long long_ref = NumTraits<long>::highest()/10;
-  signed char char_offset = (std::min)(g_repeat,64);
-  signed char short_offset = (std::min)(g_repeat,16000);
+  // the minimum guarantees that these conversions are safe
+  auto char_offset = static_cast<signed char>((std::min)(g_repeat, 64));
+  auto short_offset = static_cast<signed short>((std::min)(g_repeat, 8000));
 
   for(int i = 0; i < g_repeat*10000; i++) {
     CALL_SUBTEST(check_in_range<float>(10,11));
-    CALL_SUBTEST(check_in_range<float>(1.24234523,1.24234523));
+    CALL_SUBTEST(check_in_range<float>(1.24234523f,1.24234523f));
     CALL_SUBTEST(check_in_range<float>(-1,1));
-    CALL_SUBTEST(check_in_range<float>(-1432.2352,-1432.2352));
+    CALL_SUBTEST(check_in_range<float>(-1432.2352f,-1432.2352f));
 
     CALL_SUBTEST(check_in_range<double>(10,11));
     CALL_SUBTEST(check_in_range<double>(1.24234523,1.24234523));
diff --git a/libs/eigen/test/random_matrix.cpp b/libs/eigen/test/random_matrix.cpp
new file mode 100644
index 0000000..873845f
--- /dev/null
+++ b/libs/eigen/test/random_matrix.cpp
@@ -0,0 +1,136 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2021 Kolja Brix <kolja.brix@rwth-aachen.de>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+#include <Eigen/SVD>
+
+
+template<typename MatrixType>
+void check_generateRandomUnitaryMatrix(const Index dim)
+{
+    const MatrixType Q = generateRandomUnitaryMatrix<MatrixType>(dim);
+
+    // validate dimensions
+    VERIFY_IS_EQUAL(Q.rows(), dim);
+    VERIFY_IS_EQUAL(Q.cols(), dim);
+
+    VERIFY_IS_UNITARY(Q);
+}
+
+template<typename VectorType, typename RealScalarType>
+void check_setupRandomSvs(const Index dim, const RealScalarType max)
+{
+    const VectorType v = setupRandomSvs<VectorType, RealScalarType>(dim, max);
+
+    // validate dimensions
+    VERIFY_IS_EQUAL(v.size(), dim);
+
+    // check entries
+    for(Index i = 0; i < v.size(); ++i)
+        VERIFY_GE(v(i), 0);
+    for(Index i = 0; i < v.size()-1; ++i)
+        VERIFY_GE(v(i), v(i+1));
+}
+
+template<typename VectorType, typename RealScalarType>
+void check_setupRangeSvs(const Index dim, const RealScalarType min, const RealScalarType max)
+{
+    const VectorType v = setupRangeSvs<VectorType, RealScalarType>(dim, min, max);
+
+    // validate dimensions
+    VERIFY_IS_EQUAL(v.size(), dim);
+
+    // check entries
+    if(dim == 1) {
+        VERIFY_IS_APPROX(v(0), min);
+    } else {
+        VERIFY_IS_APPROX(v(0), max);
+        VERIFY_IS_APPROX(v(dim-1), min);
+    }
+    for(Index i = 0; i < v.size()-1; ++i)
+        VERIFY_GE(v(i), v(i+1));
+}
+
+template<typename MatrixType, typename RealScalar, typename RealVectorType>
+void check_generateRandomMatrixSvs(const Index rows, const Index cols, const Index diag_size,
+                                   const RealScalar min_svs, const RealScalar max_svs)
+{
+    RealVectorType svs = setupRangeSvs<RealVectorType, RealScalar>(diag_size, min_svs, max_svs);
+
+    MatrixType M;
+    generateRandomMatrixSvs(svs, rows, cols, M);
+
+    // validate dimensions
+    VERIFY_IS_EQUAL(M.rows(), rows);
+    VERIFY_IS_EQUAL(M.cols(), cols);
+    VERIFY_IS_EQUAL(svs.size(), diag_size);
+
+    // validate singular values
+    Eigen::JacobiSVD<MatrixType> SVD(M);
+    VERIFY_IS_APPROX(svs, SVD.singularValues());
+}
+
+template<typename MatrixType>
+void check_random_matrix(const MatrixType &m)
+{
+    enum {
+        Rows = MatrixType::RowsAtCompileTime,
+        Cols = MatrixType::ColsAtCompileTime,
+        DiagSize = internal::min_size_prefer_dynamic(Rows, Cols)
+    };
+    typedef typename MatrixType::Scalar Scalar;
+    typedef typename NumTraits<Scalar>::Real RealScalar;
+    typedef Matrix<RealScalar, DiagSize, 1> RealVectorType;
+
+    const Index rows = m.rows(), cols = m.cols();
+    const Index diag_size = (std::min)(rows, cols);
+    const RealScalar min_svs = 1.0, max_svs = 1000.0;
+
+    // check generation of unitary random matrices
+    typedef Matrix<Scalar, Rows, Rows> MatrixAType;
+    typedef Matrix<Scalar, Cols, Cols> MatrixBType;
+    check_generateRandomUnitaryMatrix<MatrixAType>(rows);
+    check_generateRandomUnitaryMatrix<MatrixBType>(cols);
+
+    // test generators for singular values
+    check_setupRandomSvs<RealVectorType, RealScalar>(diag_size, max_svs);
+    check_setupRangeSvs<RealVectorType, RealScalar>(diag_size, min_svs, max_svs);
+
+    // check generation of random matrices
+    check_generateRandomMatrixSvs<MatrixType, RealScalar, RealVectorType>(rows, cols, diag_size, min_svs, max_svs);
+}
+
+EIGEN_DECLARE_TEST(random_matrix)
+{
+  for(int i = 0; i < g_repeat; i++) {
+    CALL_SUBTEST_1(check_random_matrix(Matrix<float, 1, 1>()));
+    CALL_SUBTEST_2(check_random_matrix(Matrix<float, 4, 4>()));
+    CALL_SUBTEST_3(check_random_matrix(Matrix<float, 2, 3>()));
+    CALL_SUBTEST_4(check_random_matrix(Matrix<float, 7, 4>()));
+
+    CALL_SUBTEST_5(check_random_matrix(Matrix<double, 1, 1>()));
+    CALL_SUBTEST_6(check_random_matrix(Matrix<double, 6, 6>()));
+    CALL_SUBTEST_7(check_random_matrix(Matrix<double, 5, 3>()));
+    CALL_SUBTEST_8(check_random_matrix(Matrix<double, 4, 9>()));
+
+    CALL_SUBTEST_9(check_random_matrix(Matrix<std::complex<float>, 12, 12>()));
+    CALL_SUBTEST_10(check_random_matrix(Matrix<std::complex<float>, 7, 14>()));
+    CALL_SUBTEST_11(check_random_matrix(Matrix<std::complex<double>, 15, 11>()));
+    CALL_SUBTEST_12(check_random_matrix(Matrix<std::complex<double>, 6, 9>()));
+
+    CALL_SUBTEST_13(check_random_matrix(
+        MatrixXf(internal::random<int>(1, EIGEN_TEST_MAX_SIZE), internal::random<int>(1, EIGEN_TEST_MAX_SIZE))));
+    CALL_SUBTEST_14(check_random_matrix(
+        MatrixXd(internal::random<int>(1, EIGEN_TEST_MAX_SIZE), internal::random<int>(1, EIGEN_TEST_MAX_SIZE))));
+    CALL_SUBTEST_15(check_random_matrix(
+        MatrixXcf(internal::random<int>(1, EIGEN_TEST_MAX_SIZE), internal::random<int>(1, EIGEN_TEST_MAX_SIZE))));
+    CALL_SUBTEST_16(check_random_matrix(
+        MatrixXcd(internal::random<int>(1, EIGEN_TEST_MAX_SIZE), internal::random<int>(1, EIGEN_TEST_MAX_SIZE))));
+  }
+}
diff --git a/libs/eigen/test/random_matrix_helper.h b/libs/eigen/test/random_matrix_helper.h
new file mode 100644
index 0000000..733dec5
--- /dev/null
+++ b/libs/eigen/test/random_matrix_helper.h
@@ -0,0 +1,256 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2021 Kolja Brix <kolja.brix@rwth-aachen.de>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_RANDOM_MATRIX_HELPER
+#define EIGEN_RANDOM_MATRIX_HELPER
+
+#include <typeinfo>
+#include <Eigen/QR> // required for createRandomPIMatrixOfRank and generateRandomMatrixSvs
+
+
+// Forward declarations to avoid ICC warnings
+#if EIGEN_COMP_ICC
+
+namespace Eigen {
+
+template<typename MatrixType>
+void createRandomPIMatrixOfRank(Index desired_rank, Index rows, Index cols, MatrixType& m);
+
+template<typename PermutationVectorType>
+void randomPermutationVector(PermutationVectorType& v, Index size);
+
+template<typename MatrixType>
+MatrixType generateRandomUnitaryMatrix(const Index dim);
+
+template<typename MatrixType, typename RealScalarVectorType>
+void generateRandomMatrixSvs(const RealScalarVectorType &svs, const Index rows, const Index cols, MatrixType& M);
+
+template<typename VectorType, typename RealScalar>
+VectorType setupRandomSvs(const Index dim, const RealScalar max);
+
+template<typename VectorType, typename RealScalar>
+VectorType setupRangeSvs(const Index dim, const RealScalar min, const RealScalar max);
+
+} // end namespace Eigen
+
+#endif  // EIGEN_COMP_ICC
+
+
+
+namespace Eigen {
+
+/**
+ * Creates a random partial isometry matrix of given rank.
+ *
+ * A partial isometry is a matrix all of whose singular values are either 0 or 1.
+ * This is very useful to test rank-revealing algorithms.
+ *
+ * @tparam MatrixType type of random partial isometry matrix
+ * @param desired_rank rank requested for the random partial isometry matrix
+ * @param rows row dimension of requested random partial isometry matrix
+ * @param cols column dimension of requested random partial isometry matrix
+ * @param m random partial isometry matrix
+ */
+template<typename MatrixType>
+void createRandomPIMatrixOfRank(Index desired_rank, Index rows, Index cols, MatrixType& m)
+{
+  typedef typename internal::traits<MatrixType>::Scalar Scalar;
+  enum { Rows = MatrixType::RowsAtCompileTime, Cols = MatrixType::ColsAtCompileTime };
+
+  typedef Matrix<Scalar, Dynamic, 1> VectorType;
+  typedef Matrix<Scalar, Rows, Rows> MatrixAType;
+  typedef Matrix<Scalar, Cols, Cols> MatrixBType;
+
+  if(desired_rank == 0)
+  {
+    m.setZero(rows,cols);
+    return;
+  }
+
+  if(desired_rank == 1)
+  {
+    // here we normalize the vectors to get a partial isometry
+    m = VectorType::Random(rows).normalized() * VectorType::Random(cols).normalized().transpose();
+    return;
+  }
+
+  MatrixAType a = MatrixAType::Random(rows,rows);
+  MatrixType d = MatrixType::Identity(rows,cols);
+  MatrixBType  b = MatrixBType::Random(cols,cols);
+
+  // set the diagonal such that only desired_rank non-zero entries remain
+  const Index diag_size = (std::min)(d.rows(),d.cols());
+  if(diag_size != desired_rank)
+    d.diagonal().segment(desired_rank, diag_size-desired_rank) = VectorType::Zero(diag_size-desired_rank);
+
+  HouseholderQR<MatrixAType> qra(a);
+  HouseholderQR<MatrixBType> qrb(b);
+  m = qra.householderQ() * d * qrb.householderQ();
+}
+
+/**
+ * Generate random permutation vector.
+ *
+ * @tparam PermutationVectorType type of vector used to store permutation
+ * @param v permutation vector
+ * @param size length of permutation vector
+ */
+template<typename PermutationVectorType>
+void randomPermutationVector(PermutationVectorType& v, Index size)
+{
+  typedef typename PermutationVectorType::Scalar Scalar;
+  v.resize(size);
+  for(Index i = 0; i < size; ++i) v(i) = Scalar(i);
+  if(size == 1) return;
+  for(Index n = 0; n < 3 * size; ++n)
+  {
+    Index i = internal::random<Index>(0, size-1);
+    Index j;
+    do j = internal::random<Index>(0, size-1); while(j==i);
+    std::swap(v(i), v(j));
+  }
+}
+
+/**
+ * Generate a random unitary matrix of prescribed dimension.
+ *
+ * The algorithm is using a random Householder sequence to produce
+ * a random unitary matrix.
+ *
+ * @tparam MatrixType type of matrix to generate
+ * @param dim row and column dimension of the requested square matrix
+ * @return random unitary matrix
+ */
+template<typename MatrixType>
+MatrixType generateRandomUnitaryMatrix(const Index dim)
+{
+  typedef typename internal::traits<MatrixType>::Scalar Scalar;
+  typedef Matrix<Scalar, Dynamic, 1> VectorType;
+
+  MatrixType v = MatrixType::Identity(dim, dim);
+  VectorType h = VectorType::Zero(dim);
+  for (Index i = 0; i < dim; ++i)
+  {
+    v.col(i).tail(dim - i - 1) = VectorType::Random(dim - i - 1);
+    h(i) = 2 / v.col(i).tail(dim - i).squaredNorm();
+  }
+
+  const Eigen::HouseholderSequence<MatrixType, VectorType> HSeq(v, h);
+  return MatrixType(HSeq);
+}
+
+/**
+ * Generation of random matrix with prescribed singular values.
+ *
+ * We generate random matrices with given singular values by setting up
+ * a singular value decomposition. By choosing the number of zeros as
+ * singular values we can specify the rank of the matrix.
+ * Moreover, we also control its spectral norm, which is the largest
+ * singular value, as well as its condition number with respect to the
+ * l2-norm, which is the quotient of the largest and smallest singular
+ * value.
+ *
+ * Reference: For details on the method see e.g. Section 8.1 (pp. 62 f) in
+ *
+ *   C. C. Paige, M. A. Saunders,
+ *   LSQR: An algorithm for sparse linear equations and sparse least squares.
+ *   ACM Transactions on Mathematical Software 8(1), pp. 43-71, 1982.
+ *   https://web.stanford.edu/group/SOL/software/lsqr/lsqr-toms82a.pdf
+ *
+ * and also the LSQR webpage https://web.stanford.edu/group/SOL/software/lsqr/.
+ *
+ * @tparam MatrixType matrix type to generate
+ * @tparam RealScalarVectorType vector type with real entries used for singular values
+ * @param svs vector of desired singular values
+ * @param rows row dimension of requested random matrix
+ * @param cols column dimension of requested random matrix
+ * @param M generated matrix with prescribed singular values
+ */
+template<typename MatrixType, typename RealScalarVectorType>
+void generateRandomMatrixSvs(const RealScalarVectorType &svs, const Index rows, const Index cols, MatrixType& M)
+{
+  enum { Rows = MatrixType::RowsAtCompileTime, Cols = MatrixType::ColsAtCompileTime };
+  typedef typename internal::traits<MatrixType>::Scalar Scalar;
+  typedef Matrix<Scalar, Rows, Rows> MatrixAType;
+  typedef Matrix<Scalar, Cols, Cols> MatrixBType;
+
+  const Index min_dim = (std::min)(rows, cols);
+
+  const MatrixAType U = generateRandomUnitaryMatrix<MatrixAType>(rows);
+  const MatrixBType V = generateRandomUnitaryMatrix<MatrixBType>(cols);
+
+  M = U.block(0, 0, rows, min_dim) * svs.asDiagonal() * V.block(0, 0, cols, min_dim).transpose();
+}
+
+/**
+ * Setup a vector of random singular values with prescribed upper limit.
+ * For use with generateRandomMatrixSvs().
+ *
+ * Singular values are non-negative real values. By convention (to be consistent with
+ * singular value decomposition) we sort them in decreasing order.
+ *
+ * This strategy produces random singular values in the range [0, max], in particular
+ * the singular values can be zero or arbitrarily close to zero.
+ *
+ * @tparam VectorType vector type with real entries used for singular values
+ * @tparam RealScalar data type used for real entry
+ * @param dim number of singular values to generate
+ * @param max upper bound for singular values
+ * @return vector of singular values
+ */
+template<typename VectorType, typename RealScalar>
+VectorType setupRandomSvs(const Index dim, const RealScalar max)
+{
+  VectorType svs = max / RealScalar(2) * (VectorType::Random(dim) + VectorType::Ones(dim));
+  std::sort(svs.begin(), svs.end(), std::greater<RealScalar>());
+  return svs;
+}
+
+/**
+ * Setup a vector of random singular values with prescribed range.
+ * For use with generateRandomMatrixSvs().
+ *
+ * Singular values are non-negative real values. By convention (to be consistent with
+ * singular value decomposition) we sort them in decreasing order.
+ *
+ * For dim > 1 this strategy generates a vector with largest entry max, smallest entry
+ * min, and remaining entries in the range [min, max]. For dim == 1 the only entry is
+ * min.
+ *
+ * @tparam VectorType vector type with real entries used for singular values
+ * @tparam RealScalar data type used for real entry
+ * @param dim number of singular values to generate
+ * @param min smallest singular value to use
+ * @param max largest singular value to use
+ * @return vector of singular values
+ */
+template<typename VectorType, typename RealScalar>
+VectorType setupRangeSvs(const Index dim, const RealScalar min, const RealScalar max)
+{
+  VectorType svs = VectorType::Random(dim);
+  if(dim == 0)
+    return svs;
+  if(dim == 1)
+  {
+    svs(0) = min;
+    return svs;
+  }
+  std::sort(svs.begin(), svs.end(), std::greater<RealScalar>());
+
+  // scale to range [min, max]
+  const RealScalar c_min = svs(dim - 1), c_max = svs(0);
+  svs = (svs - VectorType::Constant(dim, c_min)) / (c_max - c_min);
+  return min * (VectorType::Ones(dim) - svs) + max * svs;
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_RANDOM_MATRIX_HELPER
diff --git a/libs/eigen/test/random_without_cast_overflow.h b/libs/eigen/test/random_without_cast_overflow.h
index 0003451..7f1ea5f 100644
--- a/libs/eigen/test/random_without_cast_overflow.h
+++ b/libs/eigen/test/random_without_cast_overflow.h
@@ -23,11 +23,11 @@ struct random_without_cast_overflow {
 template <typename SrcScalar, typename TgtScalar>
 struct random_without_cast_overflow<
     SrcScalar, TgtScalar,
-    typename internal::enable_if<NumTraits<SrcScalar>::IsInteger && NumTraits<TgtScalar>::IsInteger &&
-                                 !NumTraits<TgtScalar>::IsSigned &&
-                                 (std::numeric_limits<SrcScalar>::digits < std::numeric_limits<TgtScalar>::digits ||
-                                  (std::numeric_limits<SrcScalar>::digits == std::numeric_limits<TgtScalar>::digits &&
-                                   NumTraits<SrcScalar>::IsSigned))>::type> {
+    std::enable_if_t<NumTraits<SrcScalar>::IsInteger && NumTraits<TgtScalar>::IsInteger &&
+                   !NumTraits<TgtScalar>::IsSigned &&
+                   (std::numeric_limits<SrcScalar>::digits < std::numeric_limits<TgtScalar>::digits ||
+                    (std::numeric_limits<SrcScalar>::digits == std::numeric_limits<TgtScalar>::digits &&
+                     NumTraits<SrcScalar>::IsSigned))>> {
   static SrcScalar value() {
     SrcScalar a = internal::random<SrcScalar>();
     return a < SrcScalar(0) ? -(a + 1) : a;
@@ -38,9 +38,9 @@ struct random_without_cast_overflow<
 template <typename SrcScalar, typename TgtScalar>
 struct random_without_cast_overflow<
     SrcScalar, TgtScalar,
-    typename internal::enable_if<
+    std::enable_if_t<
         NumTraits<SrcScalar>::IsInteger && NumTraits<TgtScalar>::IsInteger && !NumTraits<SrcScalar>::IsSigned &&
-        (std::numeric_limits<SrcScalar>::digits > std::numeric_limits<TgtScalar>::digits)>::type> {
+        (std::numeric_limits<SrcScalar>::digits > std::numeric_limits<TgtScalar>::digits)>> {
   static SrcScalar value() {
     TgtScalar b = internal::random<TgtScalar>();
     return static_cast<SrcScalar>(b < TgtScalar(0) ? -(b + 1) : b);
@@ -51,9 +51,9 @@ struct random_without_cast_overflow<
 template <typename SrcScalar, typename TgtScalar>
 struct random_without_cast_overflow<
     SrcScalar, TgtScalar,
-    typename internal::enable_if<
+    std::enable_if_t<
         NumTraits<SrcScalar>::IsInteger && NumTraits<TgtScalar>::IsInteger && NumTraits<SrcScalar>::IsSigned &&
-        (std::numeric_limits<SrcScalar>::digits > std::numeric_limits<TgtScalar>::digits)>::type> {
+        (std::numeric_limits<SrcScalar>::digits > std::numeric_limits<TgtScalar>::digits)>> {
   static SrcScalar value() { return static_cast<SrcScalar>(internal::random<TgtScalar>()); }
 };
 
@@ -61,10 +61,10 @@ struct random_without_cast_overflow<
 template <typename SrcScalar, typename TgtScalar>
 struct random_without_cast_overflow<
     SrcScalar, TgtScalar,
-    typename internal::enable_if<NumTraits<SrcScalar>::IsInteger && NumTraits<TgtScalar>::IsInteger &&
-                                 !NumTraits<SrcScalar>::IsSigned && NumTraits<TgtScalar>::IsSigned &&
-                                 (std::numeric_limits<SrcScalar>::digits ==
-                                  std::numeric_limits<TgtScalar>::digits)>::type> {
+    std::enable_if_t<NumTraits<SrcScalar>::IsInteger && NumTraits<TgtScalar>::IsInteger &&
+                     !NumTraits<SrcScalar>::IsSigned && NumTraits<TgtScalar>::IsSigned &&
+                     (std::numeric_limits<SrcScalar>::digits ==
+                      std::numeric_limits<TgtScalar>::digits)>> {
   static SrcScalar value() { return internal::random<SrcScalar>() / 2; }
 };
 
@@ -72,9 +72,9 @@ struct random_without_cast_overflow<
 template <typename SrcScalar, typename TgtScalar>
 struct random_without_cast_overflow<
     SrcScalar, TgtScalar,
-    typename internal::enable_if<
+    std::enable_if_t<
         !NumTraits<SrcScalar>::IsInteger && !NumTraits<SrcScalar>::IsComplex && NumTraits<TgtScalar>::IsInteger &&
-        (std::numeric_limits<TgtScalar>::digits <= std::numeric_limits<SrcScalar>::digits)>::type> {
+        (std::numeric_limits<TgtScalar>::digits <= std::numeric_limits<SrcScalar>::digits)>> {
   static SrcScalar value() { return static_cast<SrcScalar>(internal::random<TgtScalar>()); }
 };
 
@@ -82,9 +82,9 @@ struct random_without_cast_overflow<
 template <typename SrcScalar, typename TgtScalar>
 struct random_without_cast_overflow<
     SrcScalar, TgtScalar,
-    typename internal::enable_if<
+    std::enable_if_t<
         !NumTraits<SrcScalar>::IsInteger && !NumTraits<SrcScalar>::IsComplex && NumTraits<TgtScalar>::IsInteger &&
-        (std::numeric_limits<TgtScalar>::digits > std::numeric_limits<SrcScalar>::digits)>::type> {
+        (std::numeric_limits<TgtScalar>::digits > std::numeric_limits<SrcScalar>::digits)>> {
   static SrcScalar value() {
     // NOTE: internal::random<T>() is limited by RAND_MAX, so random<int64_t> is always within that range.
     // This prevents us from simply shifting bits, which would result in only 0 or -1.
@@ -99,8 +99,8 @@ struct random_without_cast_overflow<
 template <typename SrcScalar, typename TgtScalar>
 struct random_without_cast_overflow<
     SrcScalar, TgtScalar,
-    typename internal::enable_if<NumTraits<SrcScalar>::IsInteger && !NumTraits<TgtScalar>::IsInteger &&
-                                 !NumTraits<TgtScalar>::IsComplex>::type> {
+    std::enable_if_t<NumTraits<SrcScalar>::IsInteger && !NumTraits<TgtScalar>::IsInteger &&
+                     !NumTraits<TgtScalar>::IsComplex>> {
   static SrcScalar value() {
     return static_cast<SrcScalar>(random_without_cast_overflow<TgtScalar, SrcScalar>::value());
   }
@@ -110,10 +110,10 @@ struct random_without_cast_overflow<
 template <typename SrcScalar, typename TgtScalar>
 struct random_without_cast_overflow<
     SrcScalar, TgtScalar,
-    typename internal::enable_if<!NumTraits<SrcScalar>::IsInteger && !NumTraits<SrcScalar>::IsComplex &&
-                                 !NumTraits<TgtScalar>::IsInteger && !NumTraits<TgtScalar>::IsComplex &&
-                                 (std::numeric_limits<SrcScalar>::digits >
-                                  std::numeric_limits<TgtScalar>::digits)>::type> {
+    std::enable_if_t<!NumTraits<SrcScalar>::IsInteger && !NumTraits<SrcScalar>::IsComplex &&
+                     !NumTraits<TgtScalar>::IsInteger && !NumTraits<TgtScalar>::IsComplex &&
+                     (std::numeric_limits<SrcScalar>::digits >
+                      std::numeric_limits<TgtScalar>::digits)>> {
   static SrcScalar value() { return static_cast<SrcScalar>(internal::random<TgtScalar>()); }
 };
 
@@ -121,7 +121,7 @@ struct random_without_cast_overflow<
 template <typename SrcScalar, typename TgtScalar>
 struct random_without_cast_overflow<
     SrcScalar, TgtScalar,
-    typename internal::enable_if<NumTraits<SrcScalar>::IsComplex && !NumTraits<TgtScalar>::IsComplex>::type> {
+    std::enable_if_t<NumTraits<SrcScalar>::IsComplex && !NumTraits<TgtScalar>::IsComplex>> {
   typedef typename NumTraits<SrcScalar>::Real SrcReal;
   static SrcScalar value() { return SrcScalar(random_without_cast_overflow<SrcReal, TgtScalar>::value(), 0); }
 };
@@ -130,7 +130,7 @@ struct random_without_cast_overflow<
 template <typename SrcScalar, typename TgtScalar>
 struct random_without_cast_overflow<
     SrcScalar, TgtScalar,
-    typename internal::enable_if<!NumTraits<SrcScalar>::IsComplex && NumTraits<TgtScalar>::IsComplex>::type> {
+    std::enable_if_t<!NumTraits<SrcScalar>::IsComplex && NumTraits<TgtScalar>::IsComplex>> {
   typedef typename NumTraits<TgtScalar>::Real TgtReal;
   static SrcScalar value() { return random_without_cast_overflow<SrcScalar, TgtReal>::value(); }
 };
@@ -139,7 +139,7 @@ struct random_without_cast_overflow<
 template <typename SrcScalar, typename TgtScalar>
 struct random_without_cast_overflow<
     SrcScalar, TgtScalar,
-    typename internal::enable_if<NumTraits<SrcScalar>::IsComplex && NumTraits<TgtScalar>::IsComplex>::type> {
+    std::enable_if_t<NumTraits<SrcScalar>::IsComplex && NumTraits<TgtScalar>::IsComplex>> {
   typedef typename NumTraits<SrcScalar>::Real SrcReal;
   typedef typename NumTraits<TgtScalar>::Real TgtReal;
   static SrcScalar value() {
diff --git a/libs/eigen/test/real_qz.cpp b/libs/eigen/test/real_qz.cpp
index 1cf7aba..ea4a270 100644
--- a/libs/eigen/test/real_qz.cpp
+++ b/libs/eigen/test/real_qz.cpp
@@ -18,7 +18,6 @@ template<typename MatrixType> void real_qz(const MatrixType& m)
      RealQZ.h
   */
   using std::abs;
-  typedef typename MatrixType::Scalar Scalar;
   
   Index dim = m.cols();
   
@@ -52,17 +51,18 @@ template<typename MatrixType> void real_qz(const MatrixType& m)
   bool all_zeros = true;
   for (Index i=0; i<A.cols(); i++)
     for (Index j=0; j<i; j++) {
-      if (abs(qz.matrixT()(i,j))!=Scalar(0.0))
+      if (!numext::is_exactly_zero(abs(qz.matrixT()(i, j))))
       {
         std::cerr << "Error: T(" << i << "," << j << ") = " << qz.matrixT()(i,j) << std::endl;
         all_zeros = false;
       }
-      if (j<i-1 && abs(qz.matrixS()(i,j))!=Scalar(0.0))
+      if (j<i-1 && !numext::is_exactly_zero(abs(qz.matrixS()(i, j))))
       {
         std::cerr << "Error: S(" << i << "," << j << ") = " << qz.matrixS()(i,j) << std::endl;
         all_zeros = false;
       }
-      if (j==i-1 && j>0 && abs(qz.matrixS()(i,j))!=Scalar(0.0) && abs(qz.matrixS()(i-1,j-1))!=Scalar(0.0))
+      if (j==i-1 && j>0 && !numext::is_exactly_zero(abs(qz.matrixS()(i, j))) &&
+              !numext::is_exactly_zero(abs(qz.matrixS()(i - 1, j - 1))))
       {
         std::cerr << "Error: S(" << i << "," << j << ") = " << qz.matrixS()(i,j)  << " && S(" << i-1 << "," << j-1 << ") = " << qz.matrixS()(i-1,j-1) << std::endl;
         all_zeros = false;
diff --git a/libs/eigen/test/ref.cpp b/libs/eigen/test/ref.cpp
index ebfc70d..252bacd 100644
--- a/libs/eigen/test/ref.cpp
+++ b/libs/eigen/test/ref.cpp
@@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 20013 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2013 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
@@ -21,7 +21,7 @@
 // Deal with i387 extended precision
 #if EIGEN_ARCH_i386 && !(EIGEN_ARCH_x86_64)
 
-#if EIGEN_COMP_GNUC_STRICT && EIGEN_GNUC_AT_LEAST(4,4)
+#if EIGEN_COMP_GNUC_STRICT
 #pragma GCC optimize ("-ffloat-store")
 #else
 #undef VERIFY_IS_EQUAL
@@ -106,9 +106,6 @@ template<typename VectorType> void ref_vector(const VectorType& m)
   { RefMat    rm0 = v1.block(0,0,size,1); VERIFY_IS_EQUAL(rm0, v1); }
   { RefDynMat rv1 = v1;                   VERIFY_IS_EQUAL(rv1, v1); }
   { RefDynMat rv1 = v1.block(0,0,size,1); VERIFY_IS_EQUAL(rv1, v1); }
-  { VERIFY_RAISES_ASSERT( RefMat    rm0 = v1.block(0, 0, size, 0); EIGEN_UNUSED_VARIABLE(rm0); ); }
-  if(VectorType::SizeAtCompileTime!=1)
-  { VERIFY_RAISES_ASSERT( RefDynMat rv1 = v1.block(0, 0, size, 0); EIGEN_UNUSED_VARIABLE(rv1); ); }
 
   RefDynMat rv2 = v1.segment(i,bsize);
   VERIFY_IS_EQUAL(rv2, v1.segment(i,bsize));
@@ -207,7 +204,7 @@ void ref_vector_fixed_sizes()
 template<typename PlainObjectType> void check_const_correctness(const PlainObjectType&)
 {
   // verify that ref-to-const don't have LvalueBit
-  typedef typename internal::add_const<PlainObjectType>::type ConstPlainObjectType;
+  typedef std::add_const_t<PlainObjectType> ConstPlainObjectType;
   VERIFY( !(internal::traits<Ref<ConstPlainObjectType> >::Flags & LvalueBit) );
   VERIFY( !(internal::traits<Ref<ConstPlainObjectType, Aligned> >::Flags & LvalueBit) );
   VERIFY( !(Ref<ConstPlainObjectType>::Flags & LvalueBit) );
@@ -320,17 +317,6 @@ void test_ref_overloads()
   test_ref_ambiguous(A, B);
 }
 
-void test_ref_fixed_size_assert()
-{
-  Vector4f v4 = Vector4f::Random();
-  VectorXf vx = VectorXf::Random(10);
-  VERIFY_RAISES_STATIC_ASSERT( Ref<Vector3f> y = v4; (void)y; );
-  VERIFY_RAISES_STATIC_ASSERT( Ref<Vector3f> y = vx.head<4>(); (void)y; );
-  VERIFY_RAISES_STATIC_ASSERT( Ref<const Vector3f> y = v4; (void)y; );
-  VERIFY_RAISES_STATIC_ASSERT( Ref<const Vector3f> y = vx.head<4>(); (void)y; );
-  VERIFY_RAISES_STATIC_ASSERT( Ref<const Vector3f> y = 2*v4; (void)y; );
-}
-
 EIGEN_DECLARE_TEST(ref)
 {
   for(int i = 0; i < g_repeat; i++) {
@@ -356,5 +342,4 @@ EIGEN_DECLARE_TEST(ref)
   }
   
   CALL_SUBTEST_7( test_ref_overloads() );
-  CALL_SUBTEST_7( test_ref_fixed_size_assert() );
 }
diff --git a/libs/eigen/test/reshape.cpp b/libs/eigen/test/reshape.cpp
index 7b16742..ca7a73e 100644
--- a/libs/eigen/test/reshape.cpp
+++ b/libs/eigen/test/reshape.cpp
@@ -10,8 +10,11 @@
 
 #include "main.h"
 
+using Eigen::placeholders::last;
+using Eigen::placeholders::all;
+
 template<typename T1,typename T2>
-typename internal::enable_if<internal::is_same<T1,T2>::value,bool>::type
+std::enable_if_t<internal::is_same<T1,T2>::value,bool>
 is_same_eq(const T1& a, const T2& b)
 {
   return (a.array() == b.array()).all();
@@ -193,6 +196,24 @@ void reshape4x4(MatType m)
   }
 }
 
+template<typename BlockType>
+void reshape_block(const BlockType& M) {
+  auto dense = M.eval();
+  Index rows = M.size() / 2;
+  Index cols = M.size() / rows;
+  VERIFY_IS_EQUAL(dense.reshaped(rows, cols), M.reshaped(rows, cols));
+  
+  for (Index i=0; i<rows; ++i) {
+    VERIFY_IS_EQUAL(dense.reshaped(rows, cols).row(i),
+                    M.reshaped(rows, cols).row(i));
+  }
+  
+  for (Index j = 0; j<cols; ++j) {
+    VERIFY_IS_EQUAL(dense.reshaped(rows, cols).col(j),
+                    M.reshaped(rows, cols).col(j));
+  }
+}
+
 EIGEN_DECLARE_TEST(reshape)
 {
   typedef Matrix<int,Dynamic,Dynamic,RowMajor> RowMatrixXi;
@@ -213,4 +234,5 @@ EIGEN_DECLARE_TEST(reshape)
 
   CALL_SUBTEST(reshape4x4(rmx));
   CALL_SUBTEST(reshape4x4(rm4));
+  CALL_SUBTEST(reshape_block(rm4.col(1)));
 }
diff --git a/libs/eigen/test/rvalue_types.cpp b/libs/eigen/test/rvalue_types.cpp
index 2c9999c..9af7c92 100644
--- a/libs/eigen/test/rvalue_types.cpp
+++ b/libs/eigen/test/rvalue_types.cpp
@@ -10,16 +10,13 @@
 #define EIGEN_RUNTIME_NO_MALLOC
 
 #include "main.h"
-#if EIGEN_HAS_CXX11
 #include "MovableScalar.h"
-#endif
 #include "SafeScalar.h"
 
 #include <Eigen/Core>
 
 using internal::UIntPtr;
 
-#if EIGEN_HAS_RVALUE_REFERENCES
 template <typename MatrixType>
 void rvalue_copyassign(const MatrixType& m)
 {
@@ -114,14 +111,6 @@ void rvalue_move(const MatrixType& m)
     g_dst = std::move(g_src);
     VERIFY_IS_EQUAL(g_dst, m);
 }
-#else
-template <typename MatrixType>
-void rvalue_copyassign(const MatrixType&) {}
-template<typename TranspositionsType>
-void rvalue_transpositions(Index) {}
-template <typename MatrixType>
-void rvalue_move(const MatrixType&) {}
-#endif
 
 EIGEN_DECLARE_TEST(rvalue_types)
 {
@@ -148,10 +137,8 @@ EIGEN_DECLARE_TEST(rvalue_types)
     CALL_SUBTEST_4((rvalue_transpositions<Transpositions<Dynamic, Dynamic, int> >(internal::random<int>(1,EIGEN_TEST_MAX_SIZE))));
     CALL_SUBTEST_4((rvalue_transpositions<Transpositions<Dynamic, Dynamic, Index> >(internal::random<int>(1,EIGEN_TEST_MAX_SIZE))));
 
-#if EIGEN_HAS_CXX11
     CALL_SUBTEST_5(rvalue_move(Eigen::Matrix<MovableScalar<float>,1,3>::Random().eval()));
     CALL_SUBTEST_5(rvalue_move(Eigen::Matrix<SafeScalar<float>,1,3>::Random().eval()));
     CALL_SUBTEST_5(rvalue_move(Eigen::Matrix<SafeScalar<float>,Eigen::Dynamic,Eigen::Dynamic>::Random(1,3).eval()));
-#endif
   }
 }
diff --git a/libs/eigen/test/schur_complex.cpp b/libs/eigen/test/schur_complex.cpp
index 03e17e8..26acb8c 100644
--- a/libs/eigen/test/schur_complex.cpp
+++ b/libs/eigen/test/schur_complex.cpp
@@ -54,7 +54,8 @@ template<typename MatrixType> void schur(int size = MatrixType::ColsAtCompileTim
   VERIFY_IS_EQUAL(cs3.matrixT(), cs1.matrixT());
   VERIFY_IS_EQUAL(cs3.matrixU(), cs1.matrixU());
   cs3.setMaxIterations(1).compute(A);
-  VERIFY_IS_EQUAL(cs3.info(), size > 1 ? NoConvergence : Success);
+  // The schur decomposition does often converge with a single iteration.
+  // VERIFY_IS_EQUAL(cs3.info(), size > 1 ? NoConvergence : Success);
   VERIFY_IS_EQUAL(cs3.getMaxIterations(), 1);
 
   MatrixType Atriangular = A;
diff --git a/libs/eigen/test/schur_real.cpp b/libs/eigen/test/schur_real.cpp
index 9454610..8b40ddd 100644
--- a/libs/eigen/test/schur_real.cpp
+++ b/libs/eigen/test/schur_real.cpp
@@ -19,15 +19,15 @@ template<typename MatrixType> void verifyIsQuasiTriangular(const MatrixType& T)
   // Check T is lower Hessenberg
   for(int row = 2; row < size; ++row) {
     for(int col = 0; col < row - 1; ++col) {
-      VERIFY(T(row,col) == Scalar(0));
+      VERIFY_IS_EQUAL(T(row,col), Scalar(0));
     }
   }
 
   // Check that any non-zero on the subdiagonal is followed by a zero and is
   // part of a 2x2 diagonal block with imaginary eigenvalues.
   for(int row = 1; row < size; ++row) {
-    if (T(row,row-1) != Scalar(0)) {
-      VERIFY(row == size-1 || T(row+1,row) == 0);
+    if (!numext::is_exactly_zero(T(row, row - 1))) {
+      VERIFY(row == size-1 || numext::is_exactly_zero(T(row + 1, row)));
       Scalar tr = T(row-1,row-1) + T(row,row);
       Scalar det = T(row-1,row-1) * T(row,row) - T(row-1,row) * T(row,row-1);
       VERIFY(4 * det > tr * tr);
diff --git a/libs/eigen/test/selfadjoint.cpp b/libs/eigen/test/selfadjoint.cpp
index 9ca9cef..74495fb 100644
--- a/libs/eigen/test/selfadjoint.cpp
+++ b/libs/eigen/test/selfadjoint.cpp
@@ -45,9 +45,6 @@ template<typename MatrixType> void selfadjoint(const MatrixType& m)
   m4 = m2;
   m4 -= m1.template selfadjointView<Lower>();
   VERIFY_IS_APPROX(m4, m2-m3);
-
-  VERIFY_RAISES_STATIC_ASSERT(m2.template selfadjointView<StrictlyUpper>());
-  VERIFY_RAISES_STATIC_ASSERT(m2.template selfadjointView<UnitLower>());
 }
 
 void bug_159()
diff --git a/libs/eigen/test/serializer.cpp b/libs/eigen/test/serializer.cpp
new file mode 100644
index 0000000..d3e7ba5
--- /dev/null
+++ b/libs/eigen/test/serializer.cpp
@@ -0,0 +1,233 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2021 The Eigen Team
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+
+#include <Eigen/Core>
+#include <Eigen/SparseCore>
+#include <vector>
+
+template <typename T>
+struct RandomImpl {
+  static auto Create(Eigen::Index rows, Eigen::Index cols) {
+    return T::Random(rows, cols);
+  }
+};
+
+template <typename Scalar, int Options, typename DenseIndex>
+struct RandomImpl<Eigen::SparseMatrix<Scalar, Options, DenseIndex>> {
+  using T = Eigen::SparseMatrix<Scalar, Options, DenseIndex>;
+
+  static auto Create(Eigen::Index rows, Eigen::Index cols) {
+    Eigen::SparseMatrix<Scalar, Options, DenseIndex> M(rows, cols);
+    M.setZero();
+    double density = 0.1;
+
+    // Reserve some space along each inner dim.
+    int nnz = static_cast<int>(std::ceil(density * 1.5 * M.innerSize()));
+    M.reserve(Eigen::VectorXi::Constant(M.outerSize(), nnz));
+
+    for (int j = 0; j < M.outerSize(); j++) {
+      for (int i = 0; i < M.innerSize(); i++) {
+        bool zero = (Eigen::internal::random<double>(0, 1) > density);
+        if (!zero) {
+          M.insertByOuterInner(j, i) = internal::random<Scalar>();
+        }
+      }
+    }
+
+    // 50-50 whether to compress or not.
+    if (Eigen::internal::random<double>(0, 1) >= 0.5) {
+      M.makeCompressed();
+    }
+
+    return M;
+  }
+};
+
+template <typename Scalar, int Options, typename DenseIndex>
+struct RandomImpl<Eigen::SparseVector<Scalar, Options, DenseIndex>> {
+  using T = Eigen::SparseVector<Scalar, Options, DenseIndex>;
+
+  static auto Create(Eigen::Index rows, Eigen::Index cols) {
+    Eigen::SparseVector<Scalar, Options, DenseIndex> M(rows, cols);
+    M.setZero();
+    double density = 0.1;
+
+    // Reserve some space along each inner dim.
+    int nnz = static_cast<int>(density * 1.5 * M.innerSize());
+    M.reserve(nnz);
+
+    for (int i = 0; i < M.innerSize(); i++) {
+      bool zero = (Eigen::internal::random<double>(0, 1) > density);
+      if (!zero) {
+        M.insert(i) = internal::random<Scalar>();
+      }
+    }
+
+    return M;
+  }
+};
+
+struct MyPodType {
+  double x;
+  int y;
+  float z;
+};
+
+// Plain-old-data serialization.
+void test_pod_type() {
+  MyPodType initial = {1.3, 17, 1.9f};
+  MyPodType clone = {-1, -1, -1};
+  
+  Eigen::Serializer<MyPodType> serializer;
+  
+  // Determine required size.
+  size_t buffer_size = serializer.size(initial);
+  VERIFY_IS_EQUAL(buffer_size, sizeof(MyPodType));
+  
+  // Serialize.
+  std::vector<uint8_t> buffer(buffer_size);
+  uint8_t* begin = buffer.data();
+  uint8_t* end = buffer.data() + buffer.size();
+  uint8_t* dest = serializer.serialize(begin, end, initial);
+  VERIFY(dest != nullptr);
+  VERIFY_IS_EQUAL(dest - begin, buffer_size);
+  
+  // Deserialize.
+  const uint8_t* src = serializer.deserialize(begin, end, clone);
+  VERIFY(src != nullptr);
+  VERIFY_IS_EQUAL(src - begin, buffer_size);
+  VERIFY_IS_EQUAL(clone.x, initial.x);
+  VERIFY_IS_EQUAL(clone.y, initial.y);
+  VERIFY_IS_EQUAL(clone.z, initial.z);
+
+  // Serialize with bounds checking errors.
+  dest = serializer.serialize(begin, end - 1, initial);
+  VERIFY(dest == nullptr);
+  dest = serializer.serialize(begin, begin, initial);
+  VERIFY(dest == nullptr);
+  dest = serializer.serialize(nullptr, nullptr, initial);
+  VERIFY(dest == nullptr);
+
+  // Deserialize with bounds checking errors.
+  src = serializer.deserialize(begin, end - 1, clone);
+  VERIFY(src == nullptr);
+  src = serializer.deserialize(begin, begin, clone);
+  VERIFY(src == nullptr);
+  src = serializer.deserialize(nullptr, nullptr, clone);
+  VERIFY(src == nullptr);
+}
+
+// Matrix, Vector, Array
+template<typename T>
+void test_eigen_type(const T& type) {
+  const Index rows = type.rows();
+  const Index cols = type.cols();
+
+  const T initial = RandomImpl<T>::Create(rows, cols);
+
+  // Serialize.
+  Eigen::Serializer<T> serializer;
+  size_t buffer_size = serializer.size(initial);
+  std::vector<uint8_t> buffer(buffer_size);
+  uint8_t* begin = buffer.data();
+  uint8_t* end = buffer.data() + buffer.size();
+  uint8_t* dest = serializer.serialize(begin, end, initial);
+  VERIFY(dest != nullptr);
+  VERIFY_IS_EQUAL(dest - begin, buffer_size);
+  
+  // Deserialize.
+  T clone;
+  const uint8_t* src = serializer.deserialize(begin, end, clone);
+  VERIFY(src != nullptr);
+  VERIFY_IS_EQUAL(src - begin, buffer_size);
+  VERIFY_IS_CWISE_EQUAL(clone, initial);
+
+  // Serialize with bounds checking errors.
+  dest = serializer.serialize(begin, end - 1, initial);
+  VERIFY(dest == nullptr);
+  dest = serializer.serialize(begin, begin, initial);
+  VERIFY(dest == nullptr);
+  dest = serializer.serialize(nullptr, nullptr, initial);
+  VERIFY(dest == nullptr);
+
+  // Deserialize with bounds checking errors.
+  src = serializer.deserialize(begin, end - 1, clone);
+  VERIFY(src == nullptr);
+  src = serializer.deserialize(begin, begin, clone);
+  VERIFY(src == nullptr);
+  src = serializer.deserialize(nullptr, nullptr, clone);
+  VERIFY(src == nullptr);
+}
+
+// Test a collection of dense types.
+template<typename T1, typename T2, typename T3>
+void test_dense_types(const T1& type1, const T2& type2, const T3& type3) {
+  
+  // Make random inputs.
+  const T1 x1 = T1::Random(type1.rows(), type1.cols());
+  const T2 x2 = T2::Random(type2.rows(), type2.cols());
+  const T3 x3 = T3::Random(type3.rows(), type3.cols());
+  
+  // Allocate buffer and serialize.
+  size_t buffer_size = Eigen::serialize_size(x1, x2, x3);
+  std::vector<uint8_t> buffer(buffer_size);
+  uint8_t* begin = buffer.data();
+  uint8_t* end = buffer.data() + buffer.size();
+  uint8_t* dest = Eigen::serialize(begin, end, x1, x2, x3);
+  VERIFY(dest != nullptr);
+  
+  // Clone everything.
+  T1 y1;
+  T2 y2;
+  T3 y3;
+  const uint8_t* src = Eigen::deserialize(begin, end, y1, y2, y3);
+  VERIFY(src != nullptr);
+
+  // Verify they equal.
+  VERIFY_IS_CWISE_EQUAL(y1, x1);
+  VERIFY_IS_CWISE_EQUAL(y2, x2);
+  VERIFY_IS_CWISE_EQUAL(y3, x3);
+
+  // Serialize everything with bounds checking errors.
+  dest = Eigen::serialize(begin, end - 1, y1, y2, y3);
+  VERIFY(dest == nullptr);
+  dest = Eigen::serialize(begin, begin, y1, y2, y3);
+  VERIFY(dest == nullptr);
+  dest = Eigen::serialize(nullptr, nullptr, y1, y2, y3);
+  VERIFY(dest == nullptr);
+
+  // Deserialize everything with bounds checking errors.
+  src = Eigen::deserialize(begin, end - 1, y1, y2, y3);
+  VERIFY(src == nullptr);
+  src = Eigen::deserialize(begin, begin, y1, y2, y3);
+  VERIFY(src == nullptr);
+  src = Eigen::deserialize(nullptr, nullptr, y1, y2, y3);
+  VERIFY(src == nullptr);
+}
+
+EIGEN_DECLARE_TEST(serializer)
+{
+  CALL_SUBTEST( test_pod_type() );
+
+  for(int i = 0; i < g_repeat; i++) {
+    CALL_SUBTEST( test_eigen_type(Eigen::Array33f()) );
+    CALL_SUBTEST( test_eigen_type(Eigen::ArrayXd(10)) );
+    CALL_SUBTEST( test_eigen_type(Eigen::Vector3f()) );
+    CALL_SUBTEST( test_eigen_type(Eigen::Matrix4d()) );
+    CALL_SUBTEST( test_eigen_type(Eigen::MatrixXd(15, 17)) );
+    CALL_SUBTEST(test_eigen_type(Eigen::SparseMatrix<float>(13, 12)));
+    CALL_SUBTEST(test_eigen_type(Eigen::SparseVector<float>(17)));
+
+    CALL_SUBTEST( test_dense_types( Eigen::Array33f(),
+                                    Eigen::ArrayXd(10),
+                                    Eigen::MatrixXd(15, 17)) );
+  }
+}
diff --git a/libs/eigen/test/skew_symmetric_matrix3.cpp b/libs/eigen/test/skew_symmetric_matrix3.cpp
new file mode 100644
index 0000000..6dad003
--- /dev/null
+++ b/libs/eigen/test/skew_symmetric_matrix3.cpp
@@ -0,0 +1,217 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+#include <Eigen/LU>
+
+namespace {
+template <typename Scalar>
+void constructors() {
+  typedef Matrix<Scalar, 3, 1> Vector;
+  const Vector v = Vector::Random();
+  // l-value
+  const SkewSymmetricMatrix3<Scalar> s1(v);
+  const Vector& v1 = s1.vector();
+  VERIFY_IS_APPROX(v1, v);
+  VERIFY(s1.cols() == 3);
+  VERIFY(s1.rows() == 3);
+
+  // r-value
+  const SkewSymmetricMatrix3<Scalar> s2(std::move(v));
+  VERIFY_IS_APPROX(v1, s2.vector());
+  VERIFY_IS_APPROX(s1.toDenseMatrix(), s2.toDenseMatrix());
+
+  // from scalars
+  SkewSymmetricMatrix3<Scalar> s4(v1(0), v1(1), v1(2));
+  VERIFY_IS_APPROX(v1, s4.vector());
+
+  // constructors with four vectors do not compile
+  // Matrix<Scalar, 4, 1> vector4 = Matrix<Scalar, 4, 1>::Random();
+  // SkewSymmetricMatrix3<Scalar> s5(vector4);
+}
+
+template <typename Scalar>
+void assignments() {
+  typedef Matrix<Scalar, 3, 1> Vector;
+  typedef Matrix<Scalar, 3, 3> SquareMatrix;
+
+  const Vector v = Vector::Random();
+
+  // assign to square matrix
+  SquareMatrix sq;
+  sq = v.asSkewSymmetric();
+  VERIFY(sq.isSkewSymmetric());
+
+  // assign to skew symmetric matrix
+  SkewSymmetricMatrix3<Scalar> sk;
+  sk = v.asSkewSymmetric();
+  VERIFY_IS_APPROX(v, sk.vector());
+}
+
+template <typename Scalar>
+void plusMinus() {
+  typedef Matrix<Scalar, 3, 1> Vector;
+  typedef Matrix<Scalar, 3, 3> SquareMatrix;
+
+  const Vector v1 = Vector::Random();
+  const Vector v2 = Vector::Random();
+
+  SquareMatrix sq1;
+  sq1 = v1.asSkewSymmetric();
+  SquareMatrix sq2;
+  sq2 = v2.asSkewSymmetric();
+
+  SkewSymmetricMatrix3<Scalar> sk1;
+  sk1 = v1.asSkewSymmetric();
+  SkewSymmetricMatrix3<Scalar> sk2;
+  sk2 = v2.asSkewSymmetric();
+
+  VERIFY_IS_APPROX((sk1 + sk2).toDenseMatrix(), sq1 + sq2);
+  VERIFY_IS_APPROX((sk1 - sk2).toDenseMatrix(), sq1 - sq2);
+
+  SquareMatrix sq3 = v1.asSkewSymmetric();
+  VERIFY_IS_APPROX( sq3 = v1.asSkewSymmetric() + v2.asSkewSymmetric(), sq1 + sq2);
+  VERIFY_IS_APPROX( sq3 = v1.asSkewSymmetric() - v2.asSkewSymmetric(), sq1 - sq2);
+  VERIFY_IS_APPROX( sq3 = v1.asSkewSymmetric() - 2*v2.asSkewSymmetric() + v1.asSkewSymmetric(), sq1 - 2*sq2 + sq1);
+
+  VERIFY_IS_APPROX((sk1 + sk1).vector(), 2*v1);
+  VERIFY((sk1 - sk1).vector().isZero());
+  VERIFY((sk1 - sk1).toDenseMatrix().isZero());
+}
+
+
+template <typename Scalar>
+void multiplyScale() {
+  typedef Matrix<Scalar, 3, 1> Vector;
+  typedef Matrix<Scalar, 3, 3> SquareMatrix;
+
+  const Vector v1 = Vector::Random();
+  SquareMatrix sq1;
+  sq1 = v1.asSkewSymmetric();
+  SkewSymmetricMatrix3<Scalar> sk1;
+  sk1 = v1.asSkewSymmetric();
+
+  const Scalar s1 = internal::random<Scalar>();
+  VERIFY_IS_APPROX(SkewSymmetricMatrix3<Scalar>(sk1*s1).vector(), sk1.vector() * s1);
+  VERIFY_IS_APPROX(SkewSymmetricMatrix3<Scalar>(s1*sk1).vector(), s1 * sk1.vector());
+  VERIFY_IS_APPROX(sq1 * (sk1 * s1), (sq1 * sk1) * s1);
+
+  const Vector v2 = Vector::Random();
+  SquareMatrix sq2;
+  sq2 = v2.asSkewSymmetric();
+  SkewSymmetricMatrix3<Scalar> sk2;
+  sk2 = v2.asSkewSymmetric();
+  VERIFY_IS_APPROX(sk1*sk2, sq1*sq2);
+
+  // null space
+  VERIFY((sk1*v1).isZero());
+  VERIFY((sk2*v2).isZero());
+}
+
+template<typename Matrix>
+void skewSymmetricMultiplication(const Matrix& m) {
+  typedef Eigen::Matrix<typename Matrix::Scalar, 3, 1> Vector;
+  const Vector v = Vector::Random();
+  const Matrix m1 = Matrix::Random(m.rows(), m.cols());
+  const SkewSymmetricMatrix3<typename Matrix::Scalar> sk = v.asSkewSymmetric();
+  VERIFY_IS_APPROX(m1.transpose() * (sk * m1), (m1.transpose() * sk) * m1);
+  VERIFY((m1.transpose() * (sk * m1)).isSkewSymmetric());
+}
+
+template <typename Scalar>
+void traceAndDet() {
+  typedef Matrix<Scalar, 3, 1> Vector;
+  const Vector v = Vector::Random();
+  // this does not work, values larger than 1.e-08 can be seen
+  //VERIFY_IS_APPROX(sq.determinant(), static_cast<Scalar>(0));
+  VERIFY_IS_APPROX(v.asSkewSymmetric().determinant(), static_cast<Scalar>(0));
+  VERIFY_IS_APPROX(v.asSkewSymmetric().toDenseMatrix().trace(), static_cast<Scalar>(0));
+}
+
+template <typename Scalar>
+void transpose() {
+  typedef Matrix<Scalar, 3, 1> Vector;
+  const Vector v = Vector::Random();
+  // By definition of a skew symmetric matrix: A^T = -A
+  VERIFY_IS_APPROX(v.asSkewSymmetric().toDenseMatrix().transpose(), v.asSkewSymmetric().transpose().toDenseMatrix());
+  VERIFY_IS_APPROX(v.asSkewSymmetric().transpose().vector(), (-v).asSkewSymmetric().vector());
+}
+
+template <typename Scalar>
+void exponentialIdentity() {
+  typedef Matrix<Scalar, 3, 1> Vector;
+  const Vector v1 = Vector::Zero();
+  VERIFY(v1.asSkewSymmetric().exponential().isIdentity());
+
+  Vector v2 = Vector::Random();
+  v2.normalize();
+  VERIFY((2*EIGEN_PI*v2).asSkewSymmetric().exponential().isIdentity());
+
+  Vector v3;
+  const auto precision = static_cast<Scalar>(1.1)*NumTraits<Scalar>::dummy_precision();
+  v3 << 0, 0, precision;
+  VERIFY(v3.asSkewSymmetric().exponential().isIdentity(precision));
+}
+
+template <typename Scalar>
+void exponentialOrthogonality() {
+  typedef Matrix<Scalar, 3, 1> Vector;
+  typedef Matrix<Scalar, 3, 3> SquareMatrix;
+  const Vector v = Vector::Random();
+  SquareMatrix sq = v.asSkewSymmetric().exponential();
+  VERIFY(sq.isUnitary());
+}
+
+template <typename Scalar>
+void exponentialRotation() {
+  typedef Matrix<Scalar, 3, 1> Vector;
+  typedef Matrix<Scalar, 3, 3> SquareMatrix;
+
+  // rotation axis is invariant
+  const Vector v1 = Vector::Random();
+  const SquareMatrix r1 = v1.asSkewSymmetric().exponential();
+  VERIFY_IS_APPROX(r1*v1, v1);
+
+  // rotate around z-axis
+  Vector v2;
+  v2 << 0, 0, EIGEN_PI;
+  const SquareMatrix r2 = v2.asSkewSymmetric().exponential();
+  VERIFY_IS_APPROX(r2*(Vector() << 1,0,0).finished(), (Vector() << -1,0,0).finished());
+  VERIFY_IS_APPROX(r2*(Vector() << 0,1,0).finished(), (Vector() << 0,-1,0).finished());
+}
+
+
+} // namespace
+
+
+EIGEN_DECLARE_TEST(skew_symmetric_matrix3)
+{
+  for(int i = 0; i < g_repeat; i++) {
+    CALL_SUBTEST_1(constructors<float>());
+    CALL_SUBTEST_1(constructors<double>());
+    CALL_SUBTEST_1(assignments<float>());
+    CALL_SUBTEST_1(assignments<double>());
+
+    CALL_SUBTEST_2(plusMinus<float>());
+    CALL_SUBTEST_2(plusMinus<double>());
+    CALL_SUBTEST_2(multiplyScale<float>());
+    CALL_SUBTEST_2(multiplyScale<double>());
+    CALL_SUBTEST_2(skewSymmetricMultiplication(MatrixXf(3,internal::random<int>(1,EIGEN_TEST_MAX_SIZE))));
+    CALL_SUBTEST_2(skewSymmetricMultiplication(MatrixXd(3,internal::random<int>(1,EIGEN_TEST_MAX_SIZE))));
+    CALL_SUBTEST_2(traceAndDet<float>());
+    CALL_SUBTEST_2(traceAndDet<double>());
+    CALL_SUBTEST_2(transpose<float>());
+    CALL_SUBTEST_2(transpose<double>());
+
+    CALL_SUBTEST_3(exponentialIdentity<float>());
+    CALL_SUBTEST_3(exponentialIdentity<double>());
+    CALL_SUBTEST_3(exponentialOrthogonality<float>());
+    CALL_SUBTEST_3(exponentialOrthogonality<double>());
+    CALL_SUBTEST_3(exponentialRotation<float>());
+    CALL_SUBTEST_3(exponentialRotation<double>());
+  }
+}
diff --git a/libs/eigen/test/smallvectors.cpp b/libs/eigen/test/smallvectors.cpp
index f9803ac..d58038f 100644
--- a/libs/eigen/test/smallvectors.cpp
+++ b/libs/eigen/test/smallvectors.cpp
@@ -7,7 +7,6 @@
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
-#define EIGEN_NO_STATIC_ASSERT
 #include "main.h"
 
 template<typename Scalar> void smallVectors()
@@ -33,28 +32,11 @@ template<typename Scalar> void smallVectors()
   VERIFY_IS_APPROX(x3, v4.z());
   VERIFY_IS_APPROX(x4, v4.w());
 
-  if (!NumTraits<Scalar>::IsInteger)
-  {
-    VERIFY_RAISES_ASSERT(V3(2, 1))
-    VERIFY_RAISES_ASSERT(V3(3, 2))
-    VERIFY_RAISES_ASSERT(V3(Scalar(3), 1))
-    VERIFY_RAISES_ASSERT(V3(3, Scalar(1)))
-    VERIFY_RAISES_ASSERT(V3(Scalar(3), Scalar(1)))
-    VERIFY_RAISES_ASSERT(V3(Scalar(123), Scalar(123)))
-
-    VERIFY_RAISES_ASSERT(V4(1, 3))
-    VERIFY_RAISES_ASSERT(V4(2, 4))
-    VERIFY_RAISES_ASSERT(V4(1, Scalar(4)))
-    VERIFY_RAISES_ASSERT(V4(Scalar(1), 4))
-    VERIFY_RAISES_ASSERT(V4(Scalar(1), Scalar(4)))
-    VERIFY_RAISES_ASSERT(V4(Scalar(123), Scalar(123)))
-
-    VERIFY_RAISES_ASSERT(VX(3, 2))
-    VERIFY_RAISES_ASSERT(VX(Scalar(3), 1))
-    VERIFY_RAISES_ASSERT(VX(3, Scalar(1)))
-    VERIFY_RAISES_ASSERT(VX(Scalar(3), Scalar(1)))
-    VERIFY_RAISES_ASSERT(VX(Scalar(123), Scalar(123)))
-  }
+  VERIFY_RAISES_ASSERT(V3(2, 1))
+  VERIFY_RAISES_ASSERT(V3(3, 2))
+  VERIFY_RAISES_ASSERT(V4(1, 3))
+  VERIFY_RAISES_ASSERT(V4(2, 4))
+  VERIFY_RAISES_ASSERT(VX(3, 2))
 }
 
 EIGEN_DECLARE_TEST(smallvectors)
diff --git a/libs/eigen/test/solverbase.h b/libs/eigen/test/solverbase.h
index 13c0959..ba76ba4 100644
--- a/libs/eigen/test/solverbase.h
+++ b/libs/eigen/test/solverbase.h
@@ -31,6 +31,10 @@ void check_solverbase(const MatrixType& matrix, const SolverType& solver, Index
   solver_solution2         = RhsType::Random(rows,cols2);
   solver_solution2         = solver.adjoint().solve(m2);
   VERIFY_IS_APPROX(m2, matrix.adjoint()*solver_solution2);
+  // test with temporary expression as rhs
+  m2 = DstType::Random(cols,cols2);
+  solver_solution = solver.solve(matrix*m2);
+  VERIFY_IS_APPROX(matrix*m2, matrix*solver_solution);
 }
 
 #endif // TEST_SOLVERBASE_H
diff --git a/libs/eigen/test/sparse.h b/libs/eigen/test/sparse.h
index 6cd07fc..f3e697d 100644
--- a/libs/eigen/test/sparse.h
+++ b/libs/eigen/test/sparse.h
@@ -14,8 +14,6 @@
 
 #include "main.h"
 
-#if EIGEN_HAS_CXX11
-
 #ifdef min
 #undef min
 #endif
@@ -27,8 +25,6 @@
 #include <unordered_map>
 #define EIGEN_UNORDERED_MAP_SUPPORT
 
-#endif
-
 #include <Eigen/Cholesky>
 #include <Eigen/LU>
 #include <Eigen/Sparse>
@@ -58,8 +54,10 @@ initSparse(double density,
   enum { IsRowMajor = SparseMatrix<Scalar,Opt2,StorageIndex>::IsRowMajor };
   sparseMat.setZero();
   //sparseMat.reserve(int(refMat.rows()*refMat.cols()*density));
-  sparseMat.reserve(VectorXi::Constant(IsRowMajor ? refMat.rows() : refMat.cols(), int((1.5*density)*(IsRowMajor?refMat.cols():refMat.rows()))));
-  
+  int nnz = static_cast<int>((1.5 * density) * static_cast<double>(IsRowMajor ? refMat.cols() : refMat.rows()));
+  sparseMat.reserve(VectorXi::Constant(IsRowMajor ? refMat.rows() : refMat.cols(), nnz));
+
+  Index insert_count = 0;
   for(Index j=0; j<sparseMat.outerSize(); j++)
   {
     //sparseMat.startVec(j);
@@ -85,10 +83,11 @@ initSparse(double density,
       if ((flags&ForceRealDiag) && (i==j))
         v = numext::real(v);
 
-      if (v!=Scalar(0))
+      if (!numext::is_exactly_zero(v))
       {
         //sparseMat.insertBackByOuterInner(j,i) = v;
         sparseMat.insertByOuterInner(j,i) = v;
+        ++insert_count;
         if (nonzeroCoords)
           nonzeroCoords->push_back(Matrix<StorageIndex,2,1> (ai,aj));
       }
@@ -97,60 +96,14 @@ initSparse(double density,
         zeroCoords->push_back(Matrix<StorageIndex,2,1> (ai,aj));
       }
       refMat(ai,aj) = v;
+
+      // make sure we only insert as many as the sparse matrix supports
+      if(insert_count == NumTraits<StorageIndex>::highest()) return;
     }
   }
   //sparseMat.finalize();
 }
 
-template<typename Scalar,int Opt1,int Opt2,typename Index> void
-initSparse(double density,
-           Matrix<Scalar,Dynamic,Dynamic, Opt1>& refMat,
-           DynamicSparseMatrix<Scalar, Opt2, Index>& sparseMat,
-           int flags = 0,
-           std::vector<Matrix<Index,2,1> >* zeroCoords = 0,
-           std::vector<Matrix<Index,2,1> >* nonzeroCoords = 0)
-{
-  enum { IsRowMajor = DynamicSparseMatrix<Scalar,Opt2,Index>::IsRowMajor };
-  sparseMat.setZero();
-  sparseMat.reserve(int(refMat.rows()*refMat.cols()*density));
-  for(int j=0; j<sparseMat.outerSize(); j++)
-  {
-    sparseMat.startVec(j); // not needed for DynamicSparseMatrix
-    for(int i=0; i<sparseMat.innerSize(); i++)
-    {
-      int ai(i), aj(j);
-      if(IsRowMajor)
-        std::swap(ai,aj);
-      Scalar v = (internal::random<double>(0,1) < density) ? internal::random<Scalar>() : Scalar(0);
-      if ((flags&ForceNonZeroDiag) && (i==j))
-      {
-        v = internal::random<Scalar>()*Scalar(3.);
-        v = v*v + Scalar(5.);
-      }
-      if ((flags & MakeLowerTriangular) && aj>ai)
-        v = Scalar(0);
-      else if ((flags & MakeUpperTriangular) && aj<ai)
-        v = Scalar(0);
-
-      if ((flags&ForceRealDiag) && (i==j))
-        v = numext::real(v);
-
-      if (v!=Scalar(0))
-      {
-        sparseMat.insertBackByOuterInner(j,i) = v;
-        if (nonzeroCoords)
-          nonzeroCoords->push_back(Matrix<Index,2,1> (ai,aj));
-      }
-      else if (zeroCoords)
-      {
-        zeroCoords->push_back(Matrix<Index,2,1> (ai,aj));
-      }
-      refMat(ai,aj) = v;
-    }
-  }
-  sparseMat.finalize();
-}
-
 template<typename Scalar,int Options,typename Index> void
 initSparse(double density,
            Matrix<Scalar,Dynamic,1>& refVec,
@@ -163,7 +116,7 @@ initSparse(double density,
   for(int i=0; i<refVec.size(); i++)
   {
     Scalar v = (internal::random<double>(0,1) < density) ? internal::random<Scalar>() : Scalar(0);
-    if (v!=Scalar(0))
+    if (!numext::is_exactly_zero(v))
     {
       sparseVec.insertBack(i) = v;
       if (nonzeroCoords)
diff --git a/libs/eigen/test/sparse_basic.cpp b/libs/eigen/test/sparse_basic.cpp
index 9453111..52ff7b7 100644
--- a/libs/eigen/test/sparse_basic.cpp
+++ b/libs/eigen/test/sparse_basic.cpp
@@ -28,8 +28,8 @@ template<typename SparseMatrixType> void sparse_basic(const SparseMatrixType& re
   
   const Index rows = ref.rows();
   const Index cols = ref.cols();
-  //const Index inner = ref.innerSize();
-  //const Index outer = ref.outerSize();
+  const Index inner = ref.innerSize();
+  const Index outer = ref.outerSize();
 
   typedef typename SparseMatrixType::Scalar Scalar;
   typedef typename SparseMatrixType::RealScalar RealScalar;
@@ -91,8 +91,12 @@ template<typename SparseMatrixType> void sparse_basic(const SparseMatrixType& re
         for (Index k=0; k<nnz; ++k)
         {
           Index i = internal::random<Index>(0,rows-1);
-          if (m1.coeff(i,j)==Scalar(0))
-            m2.insert(i,j) = m1(i,j) = internal::random<Scalar>();
+          if (m1.coeff(i, j) == Scalar(0)) {
+            Scalar v = internal::random<Scalar>();
+            if (v == Scalar(0)) v = Scalar(1);
+            m1(i, j) = v;
+            m2.insert(i, j) = v;
+          }
         }
       }
       
@@ -116,13 +120,18 @@ template<typename SparseMatrixType> void sparse_basic(const SparseMatrixType& re
       {
         Index i = internal::random<Index>(0,rows-1);
         Index j = internal::random<Index>(0,cols-1);
-        if ((m1.coeff(i,j)==Scalar(0)) && (internal::random<int>()%2))
-          m2.insert(i,j) = m1(i,j) = internal::random<Scalar>();
+        if ((m1.coeff(i, j) == Scalar(0)) && (internal::random<int>() % 2)) {
+          Scalar v = internal::random<Scalar>();
+          if (v == Scalar(0)) v = Scalar(1);
+          m1(i, j) = v;
+          m2.insert(i, j) = v;
+        }
         else
         {
           Scalar v = internal::random<Scalar>();
-          m2.coeffRef(i,j) += v;
-          m1(i,j) += v;
+          if (v == Scalar(0)) v = Scalar(1);
+          m1(i, j) = v;
+          m2.coeffRef(i, j) = v;
         }
       }
       VERIFY_IS_APPROX(m2,m1);
@@ -140,8 +149,12 @@ template<typename SparseMatrixType> void sparse_basic(const SparseMatrixType& re
       {
         Index i = internal::random<Index>(0,rows-1);
         Index j = internal::random<Index>(0,cols-1);
-        if (m1.coeff(i,j)==Scalar(0))
-          m2.insert(i,j) = m1(i,j) = internal::random<Scalar>();
+        if (m1.coeff(i, j) == Scalar(0)) {
+          Scalar v = internal::random<Scalar>();
+          if (v == Scalar(0)) v = Scalar(1);
+          m1(i, j) = v;
+          m2.insert(i, j) = v;
+        }
         if(mode==3)
           m2.reserve(r);
       }
@@ -150,6 +163,63 @@ template<typename SparseMatrixType> void sparse_basic(const SparseMatrixType& re
       VERIFY_IS_APPROX(m2,m1);
     }
 
+    // test sort
+    if (inner > 1) {
+      bool StorageOrdersMatch = DenseMatrix::IsRowMajor == SparseMatrixType::IsRowMajor;
+      DenseMatrix m1(rows, cols);
+      m1.setZero();
+      SparseMatrixType m2(rows, cols);
+      // generate random inner indices with no repeats
+      Vector<Index, Dynamic> innerIndices(inner);
+      innerIndices.setLinSpaced(inner, 0, inner - 1);
+      for (Index j = 0; j < outer; j++) {
+        std::random_shuffle(innerIndices.begin(), innerIndices.end());
+        Index nzj = internal::random<Index>(2, inner / 2);
+        for (Index k = 0; k < nzj; k++) {
+          Index i = innerIndices[k];
+          Scalar val = internal::random<Scalar>();
+          m1.coeffRefByOuterInner(StorageOrdersMatch ? j : i, StorageOrdersMatch ? i : j) = val;
+          m2.insertByOuterInner(j, i) = val;
+        }
+      }
+
+      VERIFY_IS_APPROX(m2, m1);
+      // sort wrt greater
+      m2.template sortInnerIndices<std::greater<>>();
+      // verify that all inner vectors are not sorted wrt less
+      VERIFY_IS_EQUAL(m2.template innerIndicesAreSorted<std::less<>>(), 0);
+      // verify that all inner vectors are sorted wrt greater
+      VERIFY_IS_EQUAL(m2.template innerIndicesAreSorted<std::greater<>>(), m2.outerSize());
+      // verify that sort does not change evaluation
+      VERIFY_IS_APPROX(m2, m1);
+      // sort wrt less
+      m2.template sortInnerIndices<std::less<>>();
+      // verify that all inner vectors are sorted wrt less
+      VERIFY_IS_EQUAL(m2.template innerIndicesAreSorted<std::less<>>(), m2.outerSize());
+      // verify that all inner vectors are not sorted wrt greater
+      VERIFY_IS_EQUAL(m2.template innerIndicesAreSorted<std::greater<>>(), 0);
+      // verify that sort does not change evaluation
+      VERIFY_IS_APPROX(m2, m1);
+
+      m2.makeCompressed();
+      // sort wrt greater
+      m2.template sortInnerIndices<std::greater<>>();
+      // verify that all inner vectors are not sorted wrt less
+      VERIFY_IS_EQUAL(m2.template innerIndicesAreSorted<std::less<>>(), 0);
+      // verify that all inner vectors are sorted wrt greater
+      VERIFY_IS_EQUAL(m2.template innerIndicesAreSorted<std::greater<>>(), m2.outerSize());
+      // verify that sort does not change evaluation
+      VERIFY_IS_APPROX(m2, m1);
+      // sort wrt less
+      m2.template sortInnerIndices<std::less<>>();
+      // verify that all inner vectors are sorted wrt less
+      VERIFY_IS_EQUAL(m2.template innerIndicesAreSorted<std::less<>>(), m2.outerSize());
+      // verify that all inner vectors are not sorted wrt greater
+      VERIFY_IS_EQUAL(m2.template innerIndicesAreSorted<std::greater<>>(), 0);
+      // verify that sort does not change evaluation
+      VERIFY_IS_APPROX(m2, m1);
+    }
+
   // test basic computations
   {
     DenseMatrix refM1 = DenseMatrix::Zero(rows, cols);
@@ -413,10 +483,8 @@ template<typename SparseMatrixType> void sparse_basic(const SparseMatrixType& re
 
     m.setFromTriplets(triplets.begin(), triplets.end(), std::multiplies<Scalar>());
     VERIFY_IS_APPROX(m, refMat_prod);
-#if (EIGEN_COMP_CXXVER >= 11)
     m.setFromTriplets(triplets.begin(), triplets.end(), [] (Scalar,Scalar b) { return b; });
     VERIFY_IS_APPROX(m, refMat_last);
-#endif
   }
   
   // test Map
@@ -431,12 +499,6 @@ template<typename SparseMatrixType> void sparse_basic(const SparseMatrixType& re
       VERIFY_IS_APPROX(mapMat2+mapMat3, refMat2+refMat3);
       VERIFY_IS_APPROX(mapMat2+mapMat3, refMat2+refMat3);
     }
-    {
-      MappedSparseMatrix<Scalar,SparseMatrixType::Options,StorageIndex> mapMat2(m2.rows(), m2.cols(), m2.nonZeros(), m2.outerIndexPtr(), m2.innerIndexPtr(), m2.valuePtr(), m2.innerNonZeroPtr());
-      MappedSparseMatrix<Scalar,SparseMatrixType::Options,StorageIndex> mapMat3(m3.rows(), m3.cols(), m3.nonZeros(), m3.outerIndexPtr(), m3.innerIndexPtr(), m3.valuePtr(), m3.innerNonZeroPtr());
-      VERIFY_IS_APPROX(mapMat2+mapMat3, refMat2+refMat3);
-      VERIFY_IS_APPROX(mapMat2+mapMat3, refMat2+refMat3);
-    }
 
     Index i = internal::random<Index>(0,rows-1);
     Index j = internal::random<Index>(0,cols-1);
@@ -687,7 +749,7 @@ void big_sparse_triplet(Index rows, Index cols, double density) {
   typedef typename SparseMatrixType::Scalar Scalar;
   typedef Triplet<Scalar,Index> TripletType;
   std::vector<TripletType> triplets;
-  double nelements = density * rows*cols;
+  double nelements = density * static_cast<double>(rows*cols);
   VERIFY(nelements>=0 && nelements < static_cast<double>(NumTraits<StorageIndex>::highest()));
   Index ntriplets = Index(nelements);
   triplets.reserve(ntriplets);
@@ -737,9 +799,12 @@ EIGEN_DECLARE_TEST(sparse_basic)
     CALL_SUBTEST_1(( sparse_basic(SparseMatrix<double>(8, 8)) ));
     CALL_SUBTEST_2(( sparse_basic(SparseMatrix<std::complex<double>, ColMajor>(r, c)) ));
     CALL_SUBTEST_2(( sparse_basic(SparseMatrix<std::complex<double>, RowMajor>(r, c)) ));
-    CALL_SUBTEST_1(( sparse_basic(SparseMatrix<double>(r, c)) ));
-    CALL_SUBTEST_5(( sparse_basic(SparseMatrix<double,ColMajor,long int>(r, c)) ));
-    CALL_SUBTEST_5(( sparse_basic(SparseMatrix<double,RowMajor,long int>(r, c)) ));
+    CALL_SUBTEST_2(( sparse_basic(SparseMatrix<float,  RowMajor>(r, c))));
+    CALL_SUBTEST_2(( sparse_basic(SparseMatrix<float,  ColMajor>(r, c))));
+    CALL_SUBTEST_3(( sparse_basic(SparseMatrix<double, ColMajor>(r, c))));
+    CALL_SUBTEST_3(( sparse_basic(SparseMatrix<double, RowMajor>(r, c))));
+    CALL_SUBTEST_4(( sparse_basic(SparseMatrix<double, ColMajor,long int>(r, c)) ));
+    CALL_SUBTEST_4(( sparse_basic(SparseMatrix<double, RowMajor,long int>(r, c)) ));
     
     r = Eigen::internal::random<int>(1,100);
     c = Eigen::internal::random<int>(1,100);
@@ -747,14 +812,14 @@ EIGEN_DECLARE_TEST(sparse_basic)
       r = c; // check square matrices in 25% of tries
     }
     
-    CALL_SUBTEST_6(( sparse_basic(SparseMatrix<double,ColMajor,short int>(short(r), short(c))) ));
-    CALL_SUBTEST_6(( sparse_basic(SparseMatrix<double,RowMajor,short int>(short(r), short(c))) ));
+    CALL_SUBTEST_5(( sparse_basic(SparseMatrix<double,ColMajor,short int>(short(r), short(c))) ));
+    CALL_SUBTEST_5(( sparse_basic(SparseMatrix<double,RowMajor,short int>(short(r), short(c))) ));
   }
 
   // Regression test for bug 900: (manually insert higher values here, if you have enough RAM):
-  CALL_SUBTEST_3((big_sparse_triplet<SparseMatrix<float, RowMajor, int> >(10000, 10000, 0.125)));
-  CALL_SUBTEST_4((big_sparse_triplet<SparseMatrix<double, ColMajor, long int> >(10000, 10000, 0.125)));
+  CALL_SUBTEST_5(( big_sparse_triplet<SparseMatrix<float, RowMajor, int>>(10000, 10000, 0.125)));
+  CALL_SUBTEST_5(( big_sparse_triplet<SparseMatrix<double, ColMajor, long int>>(10000, 10000, 0.125)));
 
-  CALL_SUBTEST_7( bug1105<0>() );
+  CALL_SUBTEST_5(bug1105<0>());
 }
 #endif
diff --git a/libs/eigen/test/sparse_block.cpp b/libs/eigen/test/sparse_block.cpp
index b4905b0..955aa60 100644
--- a/libs/eigen/test/sparse_block.cpp
+++ b/libs/eigen/test/sparse_block.cpp
@@ -11,14 +11,14 @@
 #include "AnnoyingScalar.h"
 
 template<typename T>
-typename Eigen::internal::enable_if<(T::Flags&RowMajorBit)==RowMajorBit, typename T::RowXpr>::type
+std::enable_if_t<(T::Flags&RowMajorBit)==RowMajorBit, typename T::RowXpr>
 innervec(T& A, Index i)
 {
   return A.row(i);
 }
 
 template<typename T>
-typename Eigen::internal::enable_if<(T::Flags&RowMajorBit)==0, typename T::ColXpr>::type
+std::enable_if_t<(T::Flags&RowMajorBit)==0, typename T::ColXpr>
 innervec(T& A, Index i)
 {
   return A.col(i);
@@ -90,11 +90,11 @@ template<typename SparseMatrixType> void sparse_block(const SparseMatrixType& re
           
           VERIFY_IS_APPROX(m.middleCols(j,w).coeff(r,c), refMat.middleCols(j,w).coeff(r,c));
           VERIFY_IS_APPROX(m.middleRows(i,h).coeff(r,c), refMat.middleRows(i,h).coeff(r,c));
-          if(m.middleCols(j,w).coeff(r,c) != Scalar(0))
+          if(!numext::is_exactly_zero(m.middleCols(j, w).coeff(r, c)))
           {
             VERIFY_IS_APPROX(m.middleCols(j,w).coeffRef(r,c), refMat.middleCols(j,w).coeff(r,c));
           }
-          if(m.middleRows(i,h).coeff(r,c) != Scalar(0))
+          if(!numext::is_exactly_zero(m.middleRows(i, h).coeff(r, c)))
           {
             VERIFY_IS_APPROX(m.middleRows(i,h).coeff(r,c), refMat.middleRows(i,h).coeff(r,c));
           }
@@ -166,14 +166,14 @@ template<typename SparseMatrixType> void sparse_block(const SparseMatrixType& re
     {
       VERIFY(j==numext::real(m3.innerVector(j).nonZeros()));
       if(j>0)
-        VERIFY(RealScalar(j)==numext::real(m3.innerVector(j).lastCoeff()));
+        VERIFY_IS_EQUAL(RealScalar(j), numext::real(m3.innerVector(j).lastCoeff()));
     }
     m3.makeCompressed();
     for(Index j=0; j<(std::min)(outer, inner); ++j)
     {
       VERIFY(j==numext::real(m3.innerVector(j).nonZeros()));
       if(j>0)
-        VERIFY(RealScalar(j)==numext::real(m3.innerVector(j).lastCoeff()));
+        VERIFY_IS_EQUAL(RealScalar(j), numext::real(m3.innerVector(j).lastCoeff()));
     }
 
     VERIFY(m3.innerVector(j0).nonZeros() == m3.transpose().innerVector(j0).nonZeros());
@@ -288,6 +288,25 @@ template<typename SparseMatrixType> void sparse_block(const SparseMatrixType& re
       VERIFY_IS_APPROX(m3, refMat3);
     }
   }
+  
+  // Explicit inner iterator.
+  {
+    DenseMatrix refMat2 = DenseMatrix::Zero(rows, cols);
+    SparseMatrixType m2(rows, cols);
+    initSparse<Scalar>(density, refMat2, m2);
+    
+    Index j0 =internal::random<Index>(0, outer - 1);
+    auto v = innervec(m2, j0);
+    
+    typename decltype(v)::InnerIterator block_iterator(v);
+    typename SparseMatrixType::InnerIterator matrix_iterator(m2, j0);
+    while (block_iterator) {
+      VERIFY_IS_EQUAL(block_iterator.index(), matrix_iterator.index());
+      ++block_iterator;
+      ++matrix_iterator;
+    }
+
+  }
 }
 
 EIGEN_DECLARE_TEST(sparse_block)
diff --git a/libs/eigen/test/sparse_permutations.cpp b/libs/eigen/test/sparse_permutations.cpp
index e93493c..5974c74 100644
--- a/libs/eigen/test/sparse_permutations.cpp
+++ b/libs/eigen/test/sparse_permutations.cpp
@@ -53,7 +53,7 @@ template<int OtherStorage, typename SparseMatrixType> void sparse_permutations(c
 //   bool IsRowMajor1 = SparseMatrixType::IsRowMajor;
 //   bool IsRowMajor2 = OtherSparseMatrixType::IsRowMajor;
   
-  double density = (std::max)(8./(rows*cols), 0.01);
+  double density = (std::max)(8./static_cast<double>(rows*cols), 0.01);
   
   SparseMatrixType mat(rows, cols), up(rows,cols), lo(rows,cols);
   OtherSparseMatrixType res;
diff --git a/libs/eigen/test/sparse_product.cpp b/libs/eigen/test/sparse_product.cpp
index 6e85f69..dbf549a 100644
--- a/libs/eigen/test/sparse_product.cpp
+++ b/libs/eigen/test/sparse_product.cpp
@@ -390,7 +390,7 @@ void test_mixing_types()
   typedef Matrix<Cplx,Dynamic,Dynamic> DenseMatCplx;
 
   Index n = internal::random<Index>(1,100);
-  double density = (std::max)(8./(n*n), 0.2);
+  double density = (std::max)(8./static_cast<double>(n*n), 0.2);
 
   SpMatReal sR1(n,n);
   SpMatCplx sC1(n,n), sC2(n,n), sC3(n,n);
@@ -461,6 +461,58 @@ void test_mixing_types()
   VERIFY_IS_APPROX( dC2 = sC1 * dR1.col(0), dC3 = sC1 * dR1.template cast<Cplx>().col(0) );
 }
 
+// Test mixed storage types
+template<int OrderA, int OrderB, int OrderC>
+void test_mixed_storage_imp() {
+  typedef float Real;
+  typedef Matrix<Real,Dynamic,Dynamic> DenseMat;
+
+  // Case: Large inputs but small result
+  {
+    SparseMatrix<Real, OrderA> A(8, 512);
+    SparseMatrix<Real, OrderB> B(512, 8);
+    DenseMat refA(8, 512);
+    DenseMat refB(512, 8);
+
+    initSparse<Real>(0.1, refA, A);
+    initSparse<Real>(0.1, refB, B);
+
+    SparseMatrix<Real, OrderC, std::int8_t> result;
+    SparseMatrix<Real, OrderC> result_large;
+    DenseMat refResult;
+
+    VERIFY_IS_APPROX( result = (A * B), refResult = refA * refB );
+  }
+
+  // Case: Small input but large result
+  {
+    SparseMatrix<Real, OrderA, std::int8_t> A(127, 8);
+    SparseMatrix<Real, OrderB, std::int8_t> B(8, 127);
+    DenseMat refA(127, 8);
+    DenseMat refB(8, 127);
+
+    initSparse<Real>(0.01, refA, A);
+    initSparse<Real>(0.01, refB, B);
+
+    SparseMatrix<Real, OrderC> result;
+    SparseMatrix<Real, OrderC> result_large;
+    DenseMat refResult;
+
+    VERIFY_IS_APPROX( result = (A * B), refResult = refA * refB );
+  }
+}
+
+void test_mixed_storage() {
+  test_mixed_storage_imp<RowMajor, RowMajor, RowMajor>();
+  test_mixed_storage_imp<RowMajor, RowMajor, ColMajor>();
+  test_mixed_storage_imp<RowMajor, ColMajor, RowMajor>();
+  test_mixed_storage_imp<RowMajor, ColMajor, ColMajor>();
+  test_mixed_storage_imp<ColMajor, RowMajor, RowMajor>();
+  test_mixed_storage_imp<ColMajor, RowMajor, ColMajor>();
+  test_mixed_storage_imp<ColMajor, ColMajor, RowMajor>();
+  test_mixed_storage_imp<ColMajor, ColMajor, ColMajor>();
+}
+
 EIGEN_DECLARE_TEST(sparse_product)
 {
   for(int i = 0; i < g_repeat; i++) {
@@ -473,5 +525,6 @@ EIGEN_DECLARE_TEST(sparse_product)
     CALL_SUBTEST_4( (sparse_product_regression_test<SparseMatrix<double,RowMajor>, Matrix<double, Dynamic, Dynamic, RowMajor> >()) );
 
     CALL_SUBTEST_5( (test_mixing_types<float>()) );
+    CALL_SUBTEST_5( (test_mixed_storage()) );
   }
 }
diff --git a/libs/eigen/test/sparse_ref.cpp b/libs/eigen/test/sparse_ref.cpp
index 12b6f8a..098331e 100644
--- a/libs/eigen/test/sparse_ref.cpp
+++ b/libs/eigen/test/sparse_ref.cpp
@@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 20015 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2015 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
@@ -34,7 +34,7 @@ inline void on_temporary_creation() {
 template<typename PlainObjectType> void check_const_correctness(const PlainObjectType&)
 {
   // verify that ref-to-const don't have LvalueBit
-  typedef typename internal::add_const<PlainObjectType>::type ConstPlainObjectType;
+  typedef std::add_const_t<PlainObjectType> ConstPlainObjectType;
   VERIFY( !(internal::traits<Ref<ConstPlainObjectType> >::Flags & LvalueBit) );
   VERIFY( !(internal::traits<Ref<ConstPlainObjectType, Aligned> >::Flags & LvalueBit) );
   VERIFY( !(Ref<ConstPlainObjectType>::Flags & LvalueBit) );
diff --git a/libs/eigen/test/sparse_solver.h b/libs/eigen/test/sparse_solver.h
index 5892794..01846e2 100644
--- a/libs/eigen/test/sparse_solver.h
+++ b/libs/eigen/test/sparse_solver.h
@@ -88,7 +88,7 @@ void check_sparse_solving(Solver& solver, const typename Solver::MatrixType& A,
     
     x.setZero();
     // test with Map
-    MappedSparseMatrix<Scalar,Mat::Options,StorageIndex> Am(A.rows(), A.cols(), A.nonZeros(), const_cast<StorageIndex*>(A.outerIndexPtr()), const_cast<StorageIndex*>(A.innerIndexPtr()), const_cast<Scalar*>(A.valuePtr()));
+    Map<SparseMatrix<Scalar,Mat::Options,StorageIndex>> Am(A.rows(), A.cols(), A.nonZeros(), const_cast<StorageIndex*>(A.outerIndexPtr()), const_cast<StorageIndex*>(A.innerIndexPtr()), const_cast<Scalar*>(A.valuePtr()));
     solver.compute(Am);
     VERIFY(solver.info() == Success && "factorization failed when using Map");
     DenseRhs dx(refX);
@@ -99,6 +99,13 @@ void check_sparse_solving(Solver& solver, const typename Solver::MatrixType& A,
     VERIFY(solver.info() == Success && "solving failed when using Map");
     VERIFY(oldb.isApprox(bm) && "sparse solver testing: the rhs should not be modified!");
     VERIFY(xm.isApprox(refX,test_precision<Scalar>()));
+    
+    // Test with a Map and non-unit stride.
+    Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic> out(2*xm.rows(), 2*xm.cols());
+    out.setZero();
+    Eigen::Map<DenseRhs, 0, Stride<Eigen::Dynamic, 2>> outm(out.data(), xm.rows(), xm.cols(), Stride<Eigen::Dynamic, 2>(2 * xm.rows(), 2));
+    outm = solver.solve(bm);
+    VERIFY(outm.isApprox(refX,test_precision<Scalar>()));
   }
   
   // if not too large, do some extra check:
@@ -217,7 +224,7 @@ void check_sparse_solving(Eigen::SparseLU<Eigen::SparseMatrix<Scalar> >& solver,
 
     x1.setZero();
     // test with Map
-    MappedSparseMatrix<Scalar,Mat::Options,StorageIndex> Am(A.rows(), A.cols(), A.nonZeros(), const_cast<StorageIndex*>(A.outerIndexPtr()), const_cast<StorageIndex*>(A.innerIndexPtr()), const_cast<Scalar*>(A.valuePtr()));
+    Map<SparseMatrix<Scalar,Mat::Options,StorageIndex> > Am(A.rows(), A.cols(), A.nonZeros(), const_cast<StorageIndex*>(A.outerIndexPtr()), const_cast<StorageIndex*>(A.innerIndexPtr()), const_cast<Scalar*>(A.valuePtr()));
     solver.compute(Am);
     VERIFY(solver.info() == Success && "factorization failed when using Map");
     DenseRhs dx(refX1);
@@ -350,7 +357,7 @@ int generate_sparse_spd_problem(Solver& , typename Solver::MatrixType& A, typena
   typedef Matrix<Scalar,Dynamic,Dynamic> DenseMatrix;
 
   int size = internal::random<int>(1,maxSize);
-  double density = (std::max)(8./(size*size), 0.01);
+  double density = (std::max)(8./static_cast<double>(size*size), 0.01);
 
   Mat M(size, size);
   DenseMatrix dM(size, size);
@@ -419,7 +426,7 @@ template<typename Solver> void check_sparse_spd_solving(Solver& solver, int maxS
 
     // generate the right hand sides
     int rhsCols = internal::random<int>(1,16);
-    double density = (std::max)(8./(size*rhsCols), 0.1);
+    double density = (std::max)(8./static_cast<double>(size*rhsCols), 0.1);
     SpMat B(size,rhsCols);
     DenseVector b = DenseVector::Random(size);
     DenseMatrix dB(size,rhsCols);
@@ -510,7 +517,7 @@ Index generate_sparse_square_problem(Solver&, typename Solver::MatrixType& A, De
   typedef typename Mat::Scalar Scalar;
 
   Index size = internal::random<int>(1,maxSize);
-  double density = (std::max)(8./(size*size), 0.01);
+  double density = (std::max)(8./static_cast<double>(size*size), 0.01);
   
   A.resize(size,size);
   dA.resize(size,size);
@@ -551,7 +558,7 @@ template<typename Solver> void check_sparse_square_solving(Solver& solver, int m
     DenseVector b = DenseVector::Random(size);
     DenseMatrix dB(size,rhsCols);
     SpMat B(size,rhsCols);
-    double density = (std::max)(8./(size*rhsCols), 0.1);
+    double density = (std::max)(8./double(size*rhsCols), 0.1);
     initSparse<Scalar>(density, dB, B, ForceNonZeroDiag);
     B.makeCompressed();
     SpVec c = B.col(0);
diff --git a/libs/eigen/test/sparse_solvers.cpp b/libs/eigen/test/sparse_solvers.cpp
index 3b7cd77..1f4948a 100644
--- a/libs/eigen/test/sparse_solvers.cpp
+++ b/libs/eigen/test/sparse_solvers.cpp
@@ -62,7 +62,7 @@ template<typename Scalar> void sparse_solvers(int rows, int cols)
     {
       SparseMatrix<Scalar> cm2(m2);
       //Index rows, Index cols, Index nnz, Index* outerIndexPtr, Index* innerIndexPtr, Scalar* valuePtr
-      MappedSparseMatrix<Scalar> mm2(rows, cols, cm2.nonZeros(), cm2.outerIndexPtr(), cm2.innerIndexPtr(), cm2.valuePtr());
+      Map<SparseMatrix<Scalar> > mm2(rows, cols, cm2.nonZeros(), cm2.outerIndexPtr(), cm2.innerIndexPtr(), cm2.valuePtr());
       VERIFY_IS_APPROX(refMat2.conjugate().template triangularView<Upper>().solve(vec2),
                        mm2.conjugate().template triangularView<Upper>().solve(vec3));
     }
diff --git a/libs/eigen/test/sparse_vector.cpp b/libs/eigen/test/sparse_vector.cpp
index 3512927..88cea09 100644
--- a/libs/eigen/test/sparse_vector.cpp
+++ b/libs/eigen/test/sparse_vector.cpp
@@ -15,6 +15,7 @@ template<typename Scalar,typename StorageIndex> void sparse_vector(int rows, int
   double densityVec = (std::max)(8./(rows), 0.1);
   typedef Matrix<Scalar,Dynamic,Dynamic> DenseMatrix;
   typedef Matrix<Scalar,Dynamic,1> DenseVector;
+  typedef Matrix<DenseIndex,Dynamic,1> DenseIndexVector;
   typedef SparseVector<Scalar,0,StorageIndex> SparseVectorType;
   typedef SparseMatrix<Scalar,0,StorageIndex> SparseMatrixType;
   Scalar eps = 1e-6;
@@ -47,8 +48,8 @@ template<typename Scalar,typename StorageIndex> void sparse_vector(int rows, int
     for (typename SparseVectorType::InnerIterator it(v1); it; ++it,++j)
     {
       VERIFY(nonzerocoords[j]==it.index());
-      VERIFY(it.value()==v1.coeff(it.index()));
-      VERIFY(it.value()==refV1.coeff(it.index()));
+      VERIFY_IS_EQUAL(it.value(), v1.coeff(it.index()));
+      VERIFY_IS_EQUAL(it.value(), refV1.coeff(it.index()));
     }
   }
   VERIFY_IS_APPROX(v1, refV1);
@@ -111,7 +112,7 @@ template<typename Scalar,typename StorageIndex> void sparse_vector(int rows, int
   // check copy to dense vector with transpose
   refV3.resize(0);
   VERIFY_IS_APPROX(refV3 = v1.transpose(),v1.toDense()); 
-  VERIFY_IS_APPROX(DenseVector(v1),v1.toDense()); 
+  VERIFY_IS_APPROX(DenseVector(v1),v1.toDense());
 
   // test conservative resize
   {
@@ -143,6 +144,58 @@ template<typename Scalar,typename StorageIndex> void sparse_vector(int rows, int
     }
   }
 
+  // test sort
+  if(rows > 1)
+  {
+    SparseVectorType vec1(rows);
+    DenseVector refVec1 = DenseVector::Zero(rows);
+    DenseIndexVector innerIndices(rows);
+    innerIndices.setLinSpaced(0, rows - 1);
+    std::random_shuffle(innerIndices.begin(), innerIndices.end());
+    Index nz = internal::random<Index>(2, rows / 2);
+    for (Index k = 0; k < nz; k++)
+    {
+        Index i = innerIndices[k];
+        Scalar val = internal::random<Scalar>();
+        refVec1.coeffRef(i) = val;
+        vec1.insert(i) = val;
+    }
+
+    vec1.template sortInnerIndices<std::greater<>>();
+    VERIFY_IS_APPROX(vec1, refVec1);
+    VERIFY_IS_EQUAL(vec1.template innerIndicesAreSorted<std::greater<>>(), 1);
+    VERIFY_IS_EQUAL(vec1.template innerIndicesAreSorted<std::less<>>(), 0);
+    vec1.template sortInnerIndices<std::less<>>();
+    VERIFY_IS_APPROX(vec1, refVec1);
+    VERIFY_IS_EQUAL(vec1.template innerIndicesAreSorted<std::greater<>>(), 0);
+    VERIFY_IS_EQUAL(vec1.template innerIndicesAreSorted<std::less<>>(), 1);
+  }
+
+}
+void test_pruning() {
+    using SparseVectorType = SparseVector<double, 0, int>;
+
+    SparseVectorType vec;
+    auto init_vec = [&](){;
+        vec.resize(10);
+        vec.insert(3) = 0.1;
+        vec.insert(5) = 1.0;
+        vec.insert(8) = -0.1;
+        vec.insert(9) = -0.2;
+    };
+    init_vec();
+
+    VERIFY_IS_EQUAL(vec.nonZeros(), 4);
+    VERIFY_IS_EQUAL(vec.prune(0.1, 1.0), 2);
+    VERIFY_IS_EQUAL(vec.nonZeros(), 2);
+    VERIFY_IS_EQUAL(vec.coeff(5), 1.0);
+    VERIFY_IS_EQUAL(vec.coeff(9), -0.2);
+
+    init_vec();
+    VERIFY_IS_EQUAL(vec.prune([](double v) { return v >= 0; }), 2);
+    VERIFY_IS_EQUAL(vec.nonZeros(), 2);
+    VERIFY_IS_EQUAL(vec.coeff(3), 0.1);
+    VERIFY_IS_EQUAL(vec.coeff(5), 1.0);
 }
 
 EIGEN_DECLARE_TEST(sparse_vector)
@@ -159,5 +212,7 @@ EIGEN_DECLARE_TEST(sparse_vector)
     CALL_SUBTEST_1(( sparse_vector<double,long int>(r, c) ));
     CALL_SUBTEST_1(( sparse_vector<double,short>(r, c) ));
   }
+
+  CALL_SUBTEST_1(test_pruning());
 }
 
diff --git a/libs/eigen/test/stl_iterators.cpp b/libs/eigen/test/stl_iterators.cpp
index 72bbf82..121eb86 100644
--- a/libs/eigen/test/stl_iterators.cpp
+++ b/libs/eigen/test/stl_iterators.cpp
@@ -18,28 +18,7 @@ make_reverse_iterator( Iterator i )
   return std::reverse_iterator<Iterator>(i);
 }
 
-#if !EIGEN_HAS_CXX11
-template<class ForwardIt>
-ForwardIt is_sorted_until(ForwardIt firstIt, ForwardIt lastIt)
-{
-    if (firstIt != lastIt) {
-        ForwardIt next = firstIt;
-        while (++next != lastIt) {
-            if (*next < *firstIt)
-                return next;
-            firstIt = next;
-        }
-    }
-    return lastIt;
-}
-template<class ForwardIt>
-bool is_sorted(ForwardIt firstIt, ForwardIt lastIt)
-{
-    return ::is_sorted_until(firstIt, lastIt) == lastIt;
-}
-#else
 using std::is_sorted;
-#endif
 
 template<typename XprType>
 bool is_pointer_based_stl_iterator(const internal::pointer_based_stl_iterator<XprType> &) { return true; }
@@ -50,10 +29,8 @@ bool is_generic_randaccess_stl_iterator(const internal::generic_randaccess_stl_i
 template<typename Iter>
 bool is_default_constructible_and_assignable(const Iter& it)
 {
-#if EIGEN_HAS_CXX11
   VERIFY(std::is_default_constructible<Iter>::value);
   VERIFY(std::is_nothrow_default_constructible<Iter>::value);
-#endif
   Iter it2;
   it2 = it;
   return (it==it2);
@@ -82,12 +59,10 @@ void check_begin_end_for_loop(Xpr xpr)
     typename Xpr::const_iterator cit = xpr.begin();
     cit = xpr.cbegin();
 
-    #if EIGEN_HAS_CXX11
     auto tmp1 = xpr.begin();
     VERIFY(tmp1==xpr.begin());
     auto tmp2 = xpr.cbegin();
     VERIFY(tmp2==xpr.cbegin());
-    #endif
   }
 
   VERIFY( xpr.end() -xpr.begin()  == xpr.size() );
@@ -123,9 +98,7 @@ template<typename Scalar, int Rows, int Cols>
 void test_stl_iterators(int rows=Rows, int cols=Cols)
 {
   typedef Matrix<Scalar,Rows,1> VectorType;
-  #if EIGEN_HAS_CXX11
   typedef Matrix<Scalar,1,Cols> RowVectorType;
-  #endif
   typedef Matrix<Scalar,Rows,Cols,ColMajor> ColMatrixType;
   typedef Matrix<Scalar,Rows,Cols,RowMajor> RowMatrixType;
   VectorType v = VectorType::Random(rows);
@@ -133,6 +106,7 @@ void test_stl_iterators(int rows=Rows, int cols=Cols)
   ColMatrixType A = ColMatrixType::Random(rows,cols);
   const ColMatrixType& cA(A);
   RowMatrixType B = RowMatrixType::Random(rows,cols);
+  using Eigen::placeholders::last;
   
   Index i, j;
 
@@ -190,7 +164,6 @@ void test_stl_iterators(int rows=Rows, int cols=Cols)
     check_begin_end_for_loop(v+v);
   }
 
-#if EIGEN_HAS_CXX11
   // check swappable
   {
     using std::swap;
@@ -325,8 +298,6 @@ void test_stl_iterators(int rows=Rows, int cols=Cols)
     }
   }
 
-#endif
-
   if(rows>=3) {
     VERIFY_IS_EQUAL((v.begin()+rows/2)[1], v(rows/2+1));
 
@@ -343,11 +314,7 @@ void test_stl_iterators(int rows=Rows, int cols=Cols)
     if(rows>=2)
     {
       v(1) = v(0)-Scalar(1);
-      #if EIGEN_HAS_CXX11
       VERIFY(!is_sorted(std::begin(v),std::end(v)));
-      #else
-      VERIFY(!is_sorted(v.cbegin(),v.cend()));
-      #endif
     }
 
     // on a vector
@@ -427,7 +394,6 @@ void test_stl_iterators(int rows=Rows, int cols=Cols)
     VERIFY_IS_APPROX(v1(rows/4), v(rows/4));
   }
 
-#if EIGEN_HAS_CXX11
   // check rows/cols iterators with range-for loops
   {
     j = 0;
@@ -472,31 +438,26 @@ void test_stl_iterators(int rows=Rows, int cols=Cols)
     i = internal::random<Index>(0,A.rows()-1);
     A.setRandom();
     A.row(i).setZero();
-    VERIFY_IS_EQUAL( std::find_if(A.rowwise().begin(),  A.rowwise().end(),  [](typename ColMatrixType::RowXpr x) { return x.squaredNorm() == Scalar(0); })-A.rowwise().begin(),  i );
-    VERIFY_IS_EQUAL( std::find_if(A.rowwise().rbegin(), A.rowwise().rend(), [](typename ColMatrixType::RowXpr x) { return x.squaredNorm() == Scalar(0); })-A.rowwise().rbegin(), (A.rows()-1) - i );
+    VERIFY_IS_EQUAL(std::find_if(A.rowwise().begin(),  A.rowwise().end(),  [](typename ColMatrixType::RowXpr x) { return numext::is_exactly_zero(x.squaredNorm()); }) - A.rowwise().begin(), i );
+    VERIFY_IS_EQUAL(std::find_if(A.rowwise().rbegin(), A.rowwise().rend(), [](typename ColMatrixType::RowXpr x) { return numext::is_exactly_zero(x.squaredNorm()); }) - A.rowwise().rbegin(), (A.rows() - 1) - i );
 
     j = internal::random<Index>(0,A.cols()-1);
     A.setRandom();
     A.col(j).setZero();
-    VERIFY_IS_EQUAL( std::find_if(A.colwise().begin(),  A.colwise().end(),  [](typename ColMatrixType::ColXpr x) { return x.squaredNorm() == Scalar(0); })-A.colwise().begin(),  j );
-    VERIFY_IS_EQUAL( std::find_if(A.colwise().rbegin(), A.colwise().rend(), [](typename ColMatrixType::ColXpr x) { return x.squaredNorm() == Scalar(0); })-A.colwise().rbegin(), (A.cols()-1) - j );
+    VERIFY_IS_EQUAL(std::find_if(A.colwise().begin(),  A.colwise().end(),  [](typename ColMatrixType::ColXpr x) { return numext::is_exactly_zero(x.squaredNorm()); }) - A.colwise().begin(), j );
+    VERIFY_IS_EQUAL(std::find_if(A.colwise().rbegin(), A.colwise().rend(), [](typename ColMatrixType::ColXpr x) { return numext::is_exactly_zero(x.squaredNorm()); }) - A.colwise().rbegin(), (A.cols() - 1) - j );
   }
 
   {
     using VecOp = VectorwiseOp<ArrayXXi, 0>;
     STATIC_CHECK(( internal::is_same<VecOp::const_iterator, decltype(std::declval<const VecOp&>().cbegin())>::value ));
     STATIC_CHECK(( internal::is_same<VecOp::const_iterator, decltype(std::declval<const VecOp&>().cend  ())>::value ));
-    #if EIGEN_COMP_CXXVER>=14
-      STATIC_CHECK(( internal::is_same<VecOp::const_iterator, decltype(std::cbegin(std::declval<const VecOp&>()))>::value ));
-      STATIC_CHECK(( internal::is_same<VecOp::const_iterator, decltype(std::cend  (std::declval<const VecOp&>()))>::value ));
-    #endif
+    STATIC_CHECK(( internal::is_same<VecOp::const_iterator, decltype(std::cbegin(std::declval<const VecOp&>()))>::value ));
+    STATIC_CHECK(( internal::is_same<VecOp::const_iterator, decltype(std::cend  (std::declval<const VecOp&>()))>::value ));
   }
-
-#endif
 }
 
 
-#if EIGEN_HAS_CXX11
 // When the compiler sees expression IsContainerTest<C>(0), if C is an
 // STL-style container class, the first overload of IsContainerTest
 // will be viable (since both C::iterator* and C::const_iterator* are
@@ -544,7 +505,6 @@ void test_stl_container_detection(int rows=Rows, int cols=Cols)
   VERIFY_IS_EQUAL(IsContainerType<ColMatrixType>(0), rows == 1 || cols == 1);
   VERIFY_IS_EQUAL(IsContainerType<RowMatrixType>(0), rows == 1 || cols == 1);
 }
-#endif
 
 EIGEN_DECLARE_TEST(stl_iterators)
 {
@@ -554,9 +514,7 @@ EIGEN_DECLARE_TEST(stl_iterators)
     CALL_SUBTEST_1(( test_stl_iterators<int,Dynamic,Dynamic>(internal::random<int>(5,10), internal::random<int>(5,10)) ));
     CALL_SUBTEST_1(( test_stl_iterators<int,Dynamic,Dynamic>(internal::random<int>(10,200), internal::random<int>(10,200)) ));
   }
-  
-#if EIGEN_HAS_CXX11
+
   CALL_SUBTEST_1(( test_stl_container_detection<float,1,1>() ));
   CALL_SUBTEST_1(( test_stl_container_detection<float,5,5>() ));
-#endif  
 }
diff --git a/libs/eigen/test/svd_common.h b/libs/eigen/test/svd_common.h
index eae4c0b..9822595 100644
--- a/libs/eigen/test/svd_common.h
+++ b/libs/eigen/test/svd_common.h
@@ -16,6 +16,10 @@
 #error a macro SVD_FOR_MIN_NORM(MatrixType) must be defined prior to including svd_common.h
 #endif
 
+#ifndef SVD_STATIC_OPTIONS
+#error a macro SVD_STATIC_OPTIONS(MatrixType, Options) must be defined prior to including svd_common.h
+#endif
+
 #include "svd_fill.h"
 #include "solverbase.h"
 
@@ -55,50 +59,44 @@ void svd_check_full(const MatrixType& m, const SvdType& svd)
 }
 
 // Compare partial SVD defined by computationOptions to a full SVD referenceSvd
-template<typename SvdType, typename MatrixType>
-void svd_compare_to_full(const MatrixType& m,
-                         unsigned int computationOptions,
-                         const SvdType& referenceSvd)
-{
+template <typename MatrixType, typename SvdType, int Options>
+void svd_compare_to_full(const MatrixType& m, const SvdType& referenceSvd) {
   typedef typename MatrixType::RealScalar RealScalar;
   Index rows = m.rows();
   Index cols = m.cols();
   Index diagSize = (std::min)(rows, cols);
   RealScalar prec = test_precision<RealScalar>();
 
-  SvdType svd(m, computationOptions);
+  SVD_STATIC_OPTIONS(MatrixType, Options) svd(m);
 
   VERIFY_IS_APPROX(svd.singularValues(), referenceSvd.singularValues());
-  
-  if(computationOptions & (ComputeFullV|ComputeThinV))
-  {
+
+  if (Options & (ComputeFullV | ComputeThinV)) {
     VERIFY( (svd.matrixV().adjoint()*svd.matrixV()).isIdentity(prec) );
     VERIFY_IS_APPROX( svd.matrixV().leftCols(diagSize) * svd.singularValues().asDiagonal() * svd.matrixV().leftCols(diagSize).adjoint(),
                       referenceSvd.matrixV().leftCols(diagSize) * referenceSvd.singularValues().asDiagonal() * referenceSvd.matrixV().leftCols(diagSize).adjoint());
   }
-  
-  if(computationOptions & (ComputeFullU|ComputeThinU))
-  {
+
+  if (Options & (ComputeFullU | ComputeThinU)) {
     VERIFY( (svd.matrixU().adjoint()*svd.matrixU()).isIdentity(prec) );
     VERIFY_IS_APPROX( svd.matrixU().leftCols(diagSize) * svd.singularValues().cwiseAbs2().asDiagonal() * svd.matrixU().leftCols(diagSize).adjoint(),
                       referenceSvd.matrixU().leftCols(diagSize) * referenceSvd.singularValues().cwiseAbs2().asDiagonal() * referenceSvd.matrixU().leftCols(diagSize).adjoint());
   }
-  
+
   // The following checks are not critical.
-  // For instance, with Dived&Conquer SVD, if only the factor 'V' is computedt then different matrix-matrix product implementation will be used
-  // and the resulting 'V' factor might be significantly different when the SVD decomposition is not unique, especially with single precision float.
+  // For instance, with Dived&Conquer SVD, if only the factor 'V' is computed then different matrix-matrix product
+  // implementation will be used and the resulting 'V' factor might be significantly different when the SVD
+  // decomposition is not unique, especially with single precision float.
   ++g_test_level;
-  if(computationOptions & ComputeFullU)  VERIFY_IS_APPROX(svd.matrixU(), referenceSvd.matrixU());
-  if(computationOptions & ComputeThinU)  VERIFY_IS_APPROX(svd.matrixU(), referenceSvd.matrixU().leftCols(diagSize));
-  if(computationOptions & ComputeFullV)  VERIFY_IS_APPROX(svd.matrixV().cwiseAbs(), referenceSvd.matrixV().cwiseAbs());
-  if(computationOptions & ComputeThinV)  VERIFY_IS_APPROX(svd.matrixV(), referenceSvd.matrixV().leftCols(diagSize));
+  if (Options & ComputeFullU) VERIFY_IS_APPROX(svd.matrixU(), referenceSvd.matrixU());
+  if (Options & ComputeThinU) VERIFY_IS_APPROX(svd.matrixU(), referenceSvd.matrixU().leftCols(diagSize));
+  if (Options & ComputeFullV) VERIFY_IS_APPROX(svd.matrixV().cwiseAbs(), referenceSvd.matrixV().cwiseAbs());
+  if (Options & ComputeThinV) VERIFY_IS_APPROX(svd.matrixV(), referenceSvd.matrixV().leftCols(diagSize));
   --g_test_level;
 }
 
-//
-template<typename SvdType, typename MatrixType>
-void svd_least_square(const MatrixType& m, unsigned int computationOptions)
-{
+template <typename SvdType, typename MatrixType>
+void svd_least_square(const MatrixType& m) {
   typedef typename MatrixType::Scalar Scalar;
   typedef typename MatrixType::RealScalar RealScalar;
   Index rows = m.rows();
@@ -113,10 +111,10 @@ void svd_least_square(const MatrixType& m, unsigned int computationOptions)
   typedef Matrix<Scalar, ColsAtCompileTime, Dynamic> SolutionType;
 
   RhsType rhs = RhsType::Random(rows, internal::random<Index>(1, cols));
-  SvdType svd(m, computationOptions);
+  SvdType svd(m);
 
-       if(internal::is_same<RealScalar,double>::value) svd.setThreshold(1e-8);
-  else if(internal::is_same<RealScalar,float>::value)  svd.setThreshold(2e-4);
+  if (internal::is_same<RealScalar, double>::value)  svd.setThreshold(RealScalar(1e-8));
+  else if(internal::is_same<RealScalar,float>::value)  svd.setThreshold(RealScalar(2e-4));
 
   SolutionType x = svd.solve(rhs);
    
@@ -162,10 +160,9 @@ void svd_least_square(const MatrixType& m, unsigned int computationOptions)
   }
 }
 
-// check minimal norm solutions, the inoput matrix m is only used to recover problem size
-template<typename MatrixType>
-void svd_min_norm(const MatrixType& m, unsigned int computationOptions)
-{
+// check minimal norm solutions, the input matrix m is only used to recover problem size
+template <typename MatrixType, int Options>
+void svd_min_norm(const MatrixType& m) {
   typedef typename MatrixType::Scalar Scalar;
   Index cols = m.cols();
 
@@ -199,7 +196,7 @@ void svd_min_norm(const MatrixType& m, unsigned int computationOptions)
   tmp.tail(cols-rank).setZero();
   SolutionType x21 = qr.householderQ() * tmp;
   // now check with SVD
-  SVD_FOR_MIN_NORM(MatrixType2) svd2(m2, computationOptions);
+  SVD_STATIC_OPTIONS(MatrixType2, Options) svd2(m2);
   SolutionType x22 = svd2.solve(rhs2);
   VERIFY_IS_APPROX(m2*x21, rhs2);
   VERIFY_IS_APPROX(m2*x22, rhs2);
@@ -212,7 +209,7 @@ void svd_min_norm(const MatrixType& m, unsigned int computationOptions)
   Matrix<Scalar,RowsAtCompileTime3,Dynamic> C = Matrix<Scalar,RowsAtCompileTime3,Dynamic>::Random(rows3,rank);
   MatrixType3 m3 = C * m2;
   RhsType3 rhs3 = C * rhs2;
-  SVD_FOR_MIN_NORM(MatrixType3) svd3(m3, computationOptions);
+  SVD_STATIC_OPTIONS(MatrixType3, Options) svd3(m3);
   SolutionType x3 = svd3.solve(rhs3);
   VERIFY_IS_APPROX(m3*x3, rhs3);
   VERIFY_IS_APPROX(m3*x21, rhs3);
@@ -239,57 +236,6 @@ void svd_test_solvers(const MatrixType& m, const SolverType& solver) {
     check_solverbase<CMatrixType, MatrixType>(m, solver, rows, cols, cols2);
 }
 
-// Check full, compare_to_full, least_square, and min_norm for all possible compute-options
-template<typename SvdType, typename MatrixType>
-void svd_test_all_computation_options(const MatrixType& m, bool full_only)
-{
-//   if (QRPreconditioner == NoQRPreconditioner && m.rows() != m.cols())
-//     return;
-  STATIC_CHECK(( internal::is_same<typename SvdType::StorageIndex,int>::value ));
-
-  SvdType fullSvd(m, ComputeFullU|ComputeFullV);
-  CALL_SUBTEST(( svd_check_full(m, fullSvd) ));
-  CALL_SUBTEST(( svd_least_square<SvdType>(m, ComputeFullU | ComputeFullV) ));
-  CALL_SUBTEST(( svd_min_norm(m, ComputeFullU | ComputeFullV) ));
-  
-  #if defined __INTEL_COMPILER
-  // remark #111: statement is unreachable
-  #pragma warning disable 111
-  #endif
-
-  svd_test_solvers(m, fullSvd);
-
-  if(full_only)
-    return;
-
-  CALL_SUBTEST(( svd_compare_to_full(m, ComputeFullU, fullSvd) ));
-  CALL_SUBTEST(( svd_compare_to_full(m, ComputeFullV, fullSvd) ));
-  CALL_SUBTEST(( svd_compare_to_full(m, 0, fullSvd) ));
-
-  if (MatrixType::ColsAtCompileTime == Dynamic) {
-    // thin U/V are only available with dynamic number of columns
-    CALL_SUBTEST(( svd_compare_to_full(m, ComputeFullU|ComputeThinV, fullSvd) ));
-    CALL_SUBTEST(( svd_compare_to_full(m,              ComputeThinV, fullSvd) ));
-    CALL_SUBTEST(( svd_compare_to_full(m, ComputeThinU|ComputeFullV, fullSvd) ));
-    CALL_SUBTEST(( svd_compare_to_full(m, ComputeThinU             , fullSvd) ));
-    CALL_SUBTEST(( svd_compare_to_full(m, ComputeThinU|ComputeThinV, fullSvd) ));
-    
-    CALL_SUBTEST(( svd_least_square<SvdType>(m, ComputeFullU | ComputeThinV) ));
-    CALL_SUBTEST(( svd_least_square<SvdType>(m, ComputeThinU | ComputeFullV) ));
-    CALL_SUBTEST(( svd_least_square<SvdType>(m, ComputeThinU | ComputeThinV) ));
-
-    CALL_SUBTEST(( svd_min_norm(m, ComputeFullU | ComputeThinV) ));
-    CALL_SUBTEST(( svd_min_norm(m, ComputeThinU | ComputeFullV) ));
-    CALL_SUBTEST(( svd_min_norm(m, ComputeThinU | ComputeThinV) ));
-
-    // test reconstruction
-    Index diagSize = (std::min)(m.rows(), m.cols());
-    SvdType svd(m, ComputeThinU | ComputeThinV);
-    VERIFY_IS_APPROX(m, svd.matrixU().leftCols(diagSize) * svd.singularValues().asDiagonal() * svd.matrixV().leftCols(diagSize).adjoint());
-  }
-}
-
-
 // work around stupid msvc error when constructing at compile time an expression that involves
 // a division by zero, even if the numeric type has floating point
 template<typename Scalar>
@@ -300,29 +246,28 @@ template<typename T> EIGEN_DONT_INLINE  T sub(T a, T b) { return a - b; }
 
 // This function verifies we don't iterate infinitely on nan/inf values,
 // and that info() returns InvalidInput.
-template<typename SvdType, typename MatrixType>
-void svd_inf_nan()
-{
-  SvdType svd;
+template <typename MatrixType>
+void svd_inf_nan() {
+  SVD_STATIC_OPTIONS(MatrixType, ComputeFullU | ComputeFullV) svd;
   typedef typename MatrixType::Scalar Scalar;
   Scalar some_inf = Scalar(1) / zero<Scalar>();
   VERIFY(sub(some_inf, some_inf) != sub(some_inf, some_inf));
-  svd.compute(MatrixType::Constant(10,10,some_inf), ComputeFullU | ComputeFullV);
+  svd.compute(MatrixType::Constant(10, 10, some_inf));
   VERIFY(svd.info() == InvalidInput);
 
   Scalar nan = std::numeric_limits<Scalar>::quiet_NaN();
   VERIFY(nan != nan);
-  svd.compute(MatrixType::Constant(10,10,nan), ComputeFullU | ComputeFullV);
+  svd.compute(MatrixType::Constant(10, 10, nan));
   VERIFY(svd.info() == InvalidInput);  
 
   MatrixType m = MatrixType::Zero(10,10);
   m(internal::random<int>(0,9), internal::random<int>(0,9)) = some_inf;
-  svd.compute(m, ComputeFullU | ComputeFullV);
+  svd.compute(m);
   VERIFY(svd.info() == InvalidInput);
 
   m = MatrixType::Zero(10,10);
   m(internal::random<int>(0,9), internal::random<int>(0,9)) = nan;
-  svd.compute(m, ComputeFullU | ComputeFullV);
+  svd.compute(m);
   VERIFY(svd.info() == InvalidInput);
   
   // regression test for bug 791
@@ -330,7 +275,7 @@ void svd_inf_nan()
   m << 0,    2*NumTraits<Scalar>::epsilon(),  0.5,
        0,   -0.5,                             0,
        nan,  0,                               0;
-  svd.compute(m, ComputeFullU | ComputeFullV);
+  svd.compute(m);
   VERIFY(svd.info() == InvalidInput);
   
   m.resize(4,4);
@@ -338,7 +283,7 @@ void svd_inf_nan()
         0, 3, 1, 2e-308,
         1, 0, 1, nan,
         0, nan, nan, 0;
-  svd.compute(m, ComputeFullU | ComputeFullV);
+  svd.compute(m);
   VERIFY(svd.info() == InvalidInput);
 }
 
@@ -355,8 +300,8 @@ void svd_underoverflow()
   Matrix2d M;
   M << -7.90884e-313, -4.94e-324,
                  0, 5.60844e-313;
-  SVD_DEFAULT(Matrix2d) svd;
-  svd.compute(M,ComputeFullU|ComputeFullV);
+  SVD_STATIC_OPTIONS(Matrix2d, ComputeFullU | ComputeFullV) svd;
+  svd.compute(M);
   CALL_SUBTEST( svd_check_full(M,svd) );
   
   // Check all 2x2 matrices made with the following coefficients:
@@ -367,7 +312,7 @@ void svd_underoverflow()
   do
   {
     M << value_set(id(0)), value_set(id(1)), value_set(id(2)), value_set(id(3));
-    svd.compute(M,ComputeFullU|ComputeFullV);
+    svd.compute(M);
     CALL_SUBTEST( svd_check_full(M,svd) );
 
     id(k)++;
@@ -390,16 +335,13 @@ void svd_underoverflow()
         3.7841695601406358e+307,  2.4331702789740617e+306, -3.5235707140272905e+307,
        -8.7190887618028355e+307, -7.3453213709232193e+307, -2.4367363684472105e+307;
 
-  SVD_DEFAULT(Matrix3d) svd3;
-  svd3.compute(M3,ComputeFullU|ComputeFullV); // just check we don't loop indefinitely
+  SVD_STATIC_OPTIONS(Matrix3d, ComputeFullU | ComputeFullV) svd3;
+  svd3.compute(M3);  // just check we don't loop indefinitely
   CALL_SUBTEST( svd_check_full(M3,svd3) );
 }
 
-// void jacobisvd(const MatrixType& a = MatrixType(), bool pickrandom = true)
-
-template<typename MatrixType>
-void svd_all_trivial_2x2( void (*cb)(const MatrixType&,bool) )
-{
+template <typename MatrixType>
+void svd_all_trivial_2x2(void (*cb)(const MatrixType&)) {
   MatrixType M;
   VectorXd value_set(3);
   value_set << 0, 1, -1;
@@ -408,9 +350,9 @@ void svd_all_trivial_2x2( void (*cb)(const MatrixType&,bool) )
   do
   {
     M << value_set(id(0)), value_set(id(1)), value_set(id(2)), value_set(id(3));
-    
-    cb(M,false);
-    
+
+    cb(M);
+
     id(k)++;
     if(id(k)>=value_set.size())
     {
@@ -434,22 +376,10 @@ void svd_preallocate()
   internal::set_is_malloc_allowed(true);
   svd.compute(m);
   VERIFY_IS_APPROX(svd.singularValues(), v);
+  VERIFY_RAISES_ASSERT(svd.matrixU());
+  VERIFY_RAISES_ASSERT(svd.matrixV());
 
-  SVD_DEFAULT(MatrixXf) svd2(3,3);
-  internal::set_is_malloc_allowed(false);
-  svd2.compute(m);
-  internal::set_is_malloc_allowed(true);
-  VERIFY_IS_APPROX(svd2.singularValues(), v);
-  VERIFY_RAISES_ASSERT(svd2.matrixU());
-  VERIFY_RAISES_ASSERT(svd2.matrixV());
-  svd2.compute(m, ComputeFullU | ComputeFullV);
-  VERIFY_IS_APPROX(svd2.matrixU(), Matrix3f::Identity());
-  VERIFY_IS_APPROX(svd2.matrixV(), Matrix3f::Identity());
-  internal::set_is_malloc_allowed(false);
-  svd2.compute(m);
-  internal::set_is_malloc_allowed(true);
-
-  SVD_DEFAULT(MatrixXf) svd3(3,3,ComputeFullU|ComputeFullV);
+  SVD_STATIC_OPTIONS(MatrixXf, ComputeFullU | ComputeFullV) svd2(3, 3);
   internal::set_is_malloc_allowed(false);
   svd2.compute(m);
   internal::set_is_malloc_allowed(true);
@@ -457,13 +387,203 @@ void svd_preallocate()
   VERIFY_IS_APPROX(svd2.matrixU(), Matrix3f::Identity());
   VERIFY_IS_APPROX(svd2.matrixV(), Matrix3f::Identity());
   internal::set_is_malloc_allowed(false);
-  svd2.compute(m, ComputeFullU|ComputeFullV);
+  svd2.compute(m);
   internal::set_is_malloc_allowed(true);
 }
 
-template<typename SvdType,typename MatrixType> 
-void svd_verify_assert(const MatrixType& m, bool fullOnly = false)
-{
+template <typename MatrixType, int QRPreconditioner = 0>
+void svd_verify_assert_full_only(const MatrixType& m = MatrixType()) {
+  enum { RowsAtCompileTime = MatrixType::RowsAtCompileTime };
+
+  typedef Matrix<typename MatrixType::Scalar, RowsAtCompileTime, 1> RhsType;
+  RhsType rhs = RhsType::Zero(m.rows());
+
+  SVD_STATIC_OPTIONS(MatrixType, QRPreconditioner) svd0;
+  VERIFY_RAISES_ASSERT((svd0.matrixU()));
+  VERIFY_RAISES_ASSERT((svd0.singularValues()));
+  VERIFY_RAISES_ASSERT((svd0.matrixV()));
+  VERIFY_RAISES_ASSERT((svd0.solve(rhs)));
+  VERIFY_RAISES_ASSERT((svd0.transpose().solve(rhs)));
+  VERIFY_RAISES_ASSERT((svd0.adjoint().solve(rhs)));
+
+  SVD_STATIC_OPTIONS(MatrixType, QRPreconditioner) svd1(m);
+  VERIFY_RAISES_ASSERT((svd1.matrixU()));
+  VERIFY_RAISES_ASSERT((svd1.matrixV()));
+  VERIFY_RAISES_ASSERT((svd1.solve(rhs)));
+
+  SVD_STATIC_OPTIONS(MatrixType, QRPreconditioner | ComputeFullU) svdFullU(m);
+  VERIFY_RAISES_ASSERT((svdFullU.matrixV()));
+  VERIFY_RAISES_ASSERT((svdFullU.solve(rhs)));
+  SVD_STATIC_OPTIONS(MatrixType, QRPreconditioner | ComputeFullV) svdFullV(m);
+  VERIFY_RAISES_ASSERT((svdFullV.matrixU()));
+  VERIFY_RAISES_ASSERT((svdFullV.solve(rhs)));
+}
+
+template <typename MatrixType, int QRPreconditioner = 0>
+void svd_verify_assert(const MatrixType& m = MatrixType()) {
+  enum { RowsAtCompileTime = MatrixType::RowsAtCompileTime };
+
+  typedef Matrix<typename MatrixType::Scalar, RowsAtCompileTime, 1> RhsType;
+  RhsType rhs = RhsType::Zero(m.rows());
+
+  SVD_STATIC_OPTIONS(MatrixType, QRPreconditioner | ComputeThinU) svdThinU(m);
+  VERIFY_RAISES_ASSERT((svdThinU.matrixV()));
+  VERIFY_RAISES_ASSERT((svdThinU.solve(rhs)));
+  SVD_STATIC_OPTIONS(MatrixType, QRPreconditioner | ComputeThinV) svdThinV(m);
+  VERIFY_RAISES_ASSERT((svdThinV.matrixU()));
+  VERIFY_RAISES_ASSERT((svdThinV.solve(rhs)));
+
+  svd_verify_assert_full_only<MatrixType, QRPreconditioner>(m);
+}
+
+template <typename MatrixType, int Options>
+void svd_compute_checks(const MatrixType& m) {
+  typedef SVD_STATIC_OPTIONS(MatrixType, Options) SVDType;
+
+  enum {
+    RowsAtCompileTime = MatrixType::RowsAtCompileTime,
+    ColsAtCompileTime = MatrixType::ColsAtCompileTime,
+    DiagAtCompileTime = internal::min_size_prefer_dynamic(RowsAtCompileTime, ColsAtCompileTime),
+    MatrixURowsAtCompileTime = SVDType::MatrixUType::RowsAtCompileTime,
+    MatrixUColsAtCompileTime = SVDType::MatrixUType::ColsAtCompileTime,
+    MatrixVRowsAtCompileTime = SVDType::MatrixVType::RowsAtCompileTime,
+    MatrixVColsAtCompileTime = SVDType::MatrixVType::ColsAtCompileTime
+  };
+
+  SVDType staticSvd(m);
+
+  VERIFY(MatrixURowsAtCompileTime == RowsAtCompileTime);
+  VERIFY(MatrixVRowsAtCompileTime == ColsAtCompileTime);
+  if (Options & ComputeThinU) VERIFY(MatrixUColsAtCompileTime == DiagAtCompileTime);
+  if (Options & ComputeFullU) VERIFY(MatrixUColsAtCompileTime == RowsAtCompileTime);
+  if (Options & ComputeThinV) VERIFY(MatrixVColsAtCompileTime == DiagAtCompileTime);
+  if (Options & ComputeFullV) VERIFY(MatrixVColsAtCompileTime == ColsAtCompileTime);
+
+  if (Options & (ComputeThinU | ComputeFullU))
+    VERIFY(staticSvd.computeU());
+  else
+    VERIFY(!staticSvd.computeU());
+  if (Options & (ComputeThinV | ComputeFullV))
+    VERIFY(staticSvd.computeV());
+  else
+    VERIFY(!staticSvd.computeV());
+
+  if (staticSvd.computeU()) VERIFY(staticSvd.matrixU().isUnitary());
+  if (staticSvd.computeV()) VERIFY(staticSvd.matrixV().isUnitary());
+
+  if (staticSvd.computeU() && staticSvd.computeV()) {
+    svd_test_solvers(m, staticSvd);
+    svd_least_square<SVDType, MatrixType>(m);
+    // svd_min_norm generates non-square matrices so it can't be used with NoQRPreconditioner
+    if ((Options & internal::QRPreconditionerBits) != NoQRPreconditioner) svd_min_norm<MatrixType, Options>(m);
+  }
+}
+
+// Deprecated behavior.
+template <typename SvdType, typename MatrixType>
+void svd_check_runtime_options(const MatrixType& m, unsigned int computationOptions) {
+  const bool fixedRowAndThinU = SvdType::RowsAtCompileTime != Dynamic && (computationOptions & ComputeThinU) != 0 && m.cols() < m.rows();
+  const bool fixedColAndThinV = SvdType::ColsAtCompileTime != Dynamic && (computationOptions & ComputeThinV) != 0 && m.rows() < m.cols();
+  if (fixedRowAndThinU || fixedColAndThinV) {
+    VERIFY_RAISES_ASSERT(SvdType svd(m, computationOptions));
+    return;
+  }
+
+  Index diagSize = (std::min)(m.rows(), m.cols());
+
+  SvdType svd(m, computationOptions);
+  if (svd.computeU()) {
+    VERIFY(svd.matrixU().isUnitary());
+    if (computationOptions & ComputeThinU) VERIFY(svd.matrixU().cols() == diagSize);
+  }
+
+  if (svd.computeV()) {
+    VERIFY(svd.matrixV().isUnitary());
+    if (computationOptions & ComputeThinV) VERIFY(svd.matrixV().cols() == diagSize);
+  }
+  if (svd.computeU() && svd.computeV()) {
+    svd_test_solvers(m, svd);
+    svd.matrixU().isUnitary();
+    svd.matrixV().isUnitary();
+  }
+}
+
+template <typename MatrixType, int QRPreconditioner = 0>
+void svd_option_checks(const MatrixType& m) {
+  svd_compute_checks<MatrixType, QRPreconditioner>(m);
+  svd_compute_checks<MatrixType, QRPreconditioner | ComputeThinU>(m);
+  svd_compute_checks<MatrixType, QRPreconditioner | ComputeThinV>(m);
+  svd_compute_checks<MatrixType, QRPreconditioner | ComputeThinU | ComputeThinV>(m);
+
+  svd_compute_checks<MatrixType, QRPreconditioner | ComputeFullU>(m);
+  svd_compute_checks<MatrixType, QRPreconditioner | ComputeFullV>(m);
+  svd_compute_checks<MatrixType, QRPreconditioner | ComputeFullU | ComputeFullV>(m);
+
+  svd_compute_checks<MatrixType, QRPreconditioner | ComputeThinU | ComputeFullV>(m);
+  svd_compute_checks<MatrixType, QRPreconditioner | ComputeFullU | ComputeThinV>(m);
+
+  typedef SVD_STATIC_OPTIONS(MatrixType, QRPreconditioner | ComputeFullU | ComputeFullV) FullSvdType;
+  FullSvdType fullSvd(m);
+  svd_check_full(m, fullSvd);
+  svd_compare_to_full<MatrixType, FullSvdType, QRPreconditioner | ComputeFullU | ComputeFullV>(m, fullSvd);
+
+  // Deprecated behavior.
+  typedef SVD_STATIC_OPTIONS(MatrixType, QRPreconditioner) DynamicSvd;
+  svd_check_runtime_options<DynamicSvd>(m, 0);
+  svd_check_runtime_options<DynamicSvd>(m, ComputeThinU);
+  svd_check_runtime_options<DynamicSvd>(m, ComputeThinV);
+  svd_check_runtime_options<DynamicSvd>(m, ComputeThinU | ComputeThinV);
+
+  svd_check_runtime_options<DynamicSvd>(m, ComputeFullU);
+  svd_check_runtime_options<DynamicSvd>(m, ComputeFullV);
+  svd_check_runtime_options<DynamicSvd>(m, ComputeFullU | ComputeFullV);
+
+  svd_check_runtime_options<DynamicSvd>(m, ComputeThinU | ComputeFullV);
+  svd_check_runtime_options<DynamicSvd>(m, ComputeFullU | ComputeThinV);
+}
+
+template <typename MatrixType, int QRPreconditioner = 0>
+void svd_option_checks_full_only(const MatrixType& m) {
+  svd_compute_checks<MatrixType, QRPreconditioner | ComputeFullU>(m);
+  svd_compute_checks<MatrixType, QRPreconditioner | ComputeFullV>(m);
+  svd_compute_checks<MatrixType, QRPreconditioner | ComputeFullU | ComputeFullV>(m);
+
+  SVD_STATIC_OPTIONS(MatrixType, QRPreconditioner | ComputeFullU | ComputeFullV) fullSvd(m);
+  svd_check_full(m, fullSvd);
+}
+
+template <typename MatrixType, int QRPreconditioner = 0>
+void svd_check_max_size_matrix(int initialRows, int initialCols) {
+  enum {
+    MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
+    MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
+  };
+
+  int rows = MaxRowsAtCompileTime == Dynamic ? initialRows : (std::min)(initialRows, (int)MaxRowsAtCompileTime);
+  int cols = MaxColsAtCompileTime == Dynamic ? initialCols : (std::min)(initialCols, (int)MaxColsAtCompileTime);
+
+  MatrixType m(rows, cols);
+  SVD_STATIC_OPTIONS(MatrixType, QRPreconditioner | ComputeThinU | ComputeThinV) thinSvd(m);
+  SVD_STATIC_OPTIONS(MatrixType, QRPreconditioner | ComputeThinU | ComputeFullV) mixedSvd1(m);
+  SVD_STATIC_OPTIONS(MatrixType, QRPreconditioner | ComputeFullU | ComputeThinV) mixedSvd2(m);
+  SVD_STATIC_OPTIONS(MatrixType, QRPreconditioner | ComputeFullU | ComputeFullV) fullSvd(m);
+
+  MatrixType n(MaxRowsAtCompileTime, MaxColsAtCompileTime);
+  thinSvd.compute(n);
+  mixedSvd1.compute(n);
+  mixedSvd2.compute(n);
+  fullSvd.compute(n);
+
+  MatrixX<typename MatrixType::Scalar> dynamicMatrix(MaxRowsAtCompileTime + 1, MaxColsAtCompileTime + 1);
+
+  VERIFY_RAISES_ASSERT(thinSvd.compute(dynamicMatrix));
+  VERIFY_RAISES_ASSERT(mixedSvd1.compute(dynamicMatrix));
+  VERIFY_RAISES_ASSERT(mixedSvd2.compute(dynamicMatrix));
+  VERIFY_RAISES_ASSERT(fullSvd.compute(dynamicMatrix));
+}
+
+template <typename SvdType, typename MatrixType>
+void svd_verify_constructor_options_assert(const MatrixType& m, bool fullOnly = false) {
   typedef typename MatrixType::Scalar Scalar;
   Index rows = m.rows();
   Index cols = m.cols();
@@ -482,40 +602,39 @@ void svd_verify_assert(const MatrixType& m, bool fullOnly = false)
   VERIFY_RAISES_ASSERT(svd.solve(rhs))
   VERIFY_RAISES_ASSERT(svd.transpose().solve(rhs))
   VERIFY_RAISES_ASSERT(svd.adjoint().solve(rhs))
-  MatrixType a = MatrixType::Zero(rows, cols);
-  a.setZero();
-  svd.compute(a, 0);
-  VERIFY_RAISES_ASSERT(svd.matrixU())
-  VERIFY_RAISES_ASSERT(svd.matrixV())
-  svd.singularValues();
-  VERIFY_RAISES_ASSERT(svd.solve(rhs))
 
-  svd.compute(a, ComputeFullU);
-  svd.matrixU();
-  VERIFY_RAISES_ASSERT(svd.matrixV())
-  VERIFY_RAISES_ASSERT(svd.solve(rhs))
-  svd.compute(a, ComputeFullV);
-  svd.matrixV();
-  VERIFY_RAISES_ASSERT(svd.matrixU())
-  VERIFY_RAISES_ASSERT(svd.solve(rhs))
+  MatrixType a = MatrixType::Zero(rows, cols);
+  SvdType svd2(a, 0);
+  VERIFY_RAISES_ASSERT(svd2.matrixU())
+  VERIFY_RAISES_ASSERT(svd2.matrixV())
+  svd2.singularValues();
+  VERIFY_RAISES_ASSERT(svd2.solve(rhs))
+
+  // Deprecated behavior.
+  SvdType svd3(a, ComputeFullU);
+  svd3.matrixU();
+  VERIFY_RAISES_ASSERT(svd3.matrixV())
+  VERIFY_RAISES_ASSERT(svd3.solve(rhs))
+
+  SvdType svd4(a, ComputeFullV);
+  svd4.matrixV();
+  VERIFY_RAISES_ASSERT(svd4.matrixU())
+  VERIFY_RAISES_ASSERT(svd4.solve(rhs))
 
   if (!fullOnly && ColsAtCompileTime == Dynamic)
   {
-    svd.compute(a, ComputeThinU);
-    svd.matrixU();
-    VERIFY_RAISES_ASSERT(svd.matrixV())
-    VERIFY_RAISES_ASSERT(svd.solve(rhs))
-    svd.compute(a, ComputeThinV);
-    svd.matrixV();
-    VERIFY_RAISES_ASSERT(svd.matrixU())
-    VERIFY_RAISES_ASSERT(svd.solve(rhs))
-  }
-  else
-  {
-    VERIFY_RAISES_ASSERT(svd.compute(a, ComputeThinU))
-    VERIFY_RAISES_ASSERT(svd.compute(a, ComputeThinV))
+    SvdType svd5(a, ComputeThinU);
+    svd5.matrixU();
+    VERIFY_RAISES_ASSERT(svd5.matrixV())
+    VERIFY_RAISES_ASSERT(svd5.solve(rhs))
+
+    SvdType svd6(a, ComputeThinV);
+    svd6.matrixV();
+    VERIFY_RAISES_ASSERT(svd6.matrixU())
+    VERIFY_RAISES_ASSERT(svd6.solve(rhs))
   }
 }
 
 #undef SVD_DEFAULT
 #undef SVD_FOR_MIN_NORM
+#undef SVD_STATIC_OPTIONS
diff --git a/libs/eigen/test/svd_fill.h b/libs/eigen/test/svd_fill.h
index d68647e..6411f57 100644
--- a/libs/eigen/test/svd_fill.h
+++ b/libs/eigen/test/svd_fill.h
@@ -64,8 +64,11 @@ void svd_fill_random(MatrixType &m, int Option = 0)
   }
   
   Matrix<Scalar,Dynamic,1> samples(9);
-  samples << 0, four_denorms<RealScalar>(),
-            -RealScalar(1)/NumTraits<RealScalar>::highest(), RealScalar(1)/NumTraits<RealScalar>::highest(), (std::numeric_limits<RealScalar>::min)(), pow((std::numeric_limits<RealScalar>::min)(),0.8);
+  samples << Scalar(0), four_denorms<RealScalar>(),
+            -RealScalar(1)/NumTraits<RealScalar>::highest(),
+            RealScalar(1)/NumTraits<RealScalar>::highest(),
+            (std::numeric_limits<RealScalar>::min)(),
+            pow((std::numeric_limits<RealScalar>::min)(), RealScalar(0.8));
   
   if(Option==Symmetric)
   {
diff --git a/libs/eigen/test/swap.cpp b/libs/eigen/test/swap.cpp
index 5b259d3..e8d6881 100644
--- a/libs/eigen/test/swap.cpp
+++ b/libs/eigen/test/swap.cpp
@@ -7,7 +7,6 @@
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
-#define EIGEN_NO_STATIC_ASSERT
 #include "main.h"
 
 template<typename T>
@@ -16,12 +15,29 @@ struct other_matrix_type
   typedef int type;
 };
 
-template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
-struct other_matrix_type<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
+template<typename Scalar_, int Rows_, int Cols_, int Options_, int MaxRows_, int MaxCols_>
+struct other_matrix_type<Matrix<Scalar_, Rows_, Cols_, Options_, MaxRows_, MaxCols_> >
 {
-  typedef Matrix<_Scalar, _Rows, _Cols, _Options^RowMajor, _MaxRows, _MaxCols> type;
+  typedef Matrix<Scalar_, Rows_, Cols_, Options_^RowMajor, MaxRows_, MaxCols_> type;
 };
 
+template <typename MatrixType>
+std::enable_if_t<(MatrixType::RowsAtCompileTime==1 || MatrixType::RowsAtCompileTime==Dynamic), void>
+check_row_swap(MatrixType& m1) {
+  
+  if (m1.rows() != 1) {
+    // test assertion on mismatching size -- matrix case
+    VERIFY_RAISES_ASSERT(m1.swap(m1.row(0)));
+    // test assertion on mismatching size -- xpr case
+    VERIFY_RAISES_ASSERT(m1.row(0).swap(m1));
+  }
+}
+
+template <typename MatrixType>
+std::enable_if_t<!(MatrixType::RowsAtCompileTime==1 || MatrixType::RowsAtCompileTime==Dynamic), void>
+check_row_swap(MatrixType& /* unused */) {
+}
+
 template<typename MatrixType> void swap(const MatrixType& m)
 {
   typedef typename other_matrix_type<MatrixType>::type OtherMatrixType;
@@ -73,14 +89,8 @@ template<typename MatrixType> void swap(const MatrixType& m)
   VERIFY_IS_APPROX(m3,m1_copy);
   m1 = m1_copy;
   m3 = m3_copy;
-  
-  if(m1.rows()>1)
-  {
-    // test assertion on mismatching size -- matrix case
-    VERIFY_RAISES_ASSERT(m1.swap(m1.row(0)));
-    // test assertion on mismatching size -- xpr case
-    VERIFY_RAISES_ASSERT(m1.row(0).swap(m1));
-  }
+
+  check_row_swap(m1);
 }
 
 EIGEN_DECLARE_TEST(swap)
diff --git a/libs/eigen/test/symbolic_index.cpp b/libs/eigen/test/symbolic_index.cpp
index b114cbb..53eb55d 100644
--- a/libs/eigen/test/symbolic_index.cpp
+++ b/libs/eigen/test/symbolic_index.cpp
@@ -7,18 +7,12 @@
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
-#ifdef EIGEN_TEST_PART_2
-#define EIGEN_MAX_CPP_VER 03
-
-// see indexed_view.cpp
-#if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))
-  #pragma GCC diagnostic ignored "-Wdeprecated"
-#endif
-
-#endif
-
 #include "main.h"
 
+using Eigen::placeholders::last;
+using Eigen::placeholders::lastp1;
+using Eigen::placeholders::all;
+
 template<typename T1,typename T2>
 bool is_same_symb(const T1& a, const T2& b, Index size)
 {
@@ -58,15 +52,14 @@ void check_symbolic_index()
   VERIFY( is_same_type( fix<9>()/2, int(9/2) ) );
 
   VERIFY( is_same_symb( lastp1-1, last, size) );
-  VERIFY( is_same_symb( lastp1-fix<1>, last, size) );
+  VERIFY( is_same_symb( lastp1-fix<1>(), last, size) );
 
   VERIFY_IS_EQUAL( ( (last*5-2)/3 ).eval(last=size-1), ((size-1)*5-2)/3 );
-  VERIFY_IS_EQUAL( ( (last*fix<5>-fix<2>)/fix<3> ).eval(last=size-1), ((size-1)*5-2)/3 );
+  VERIFY_IS_EQUAL( ( (last*fix<5>()-fix<2>())/fix<3>() ).eval(last=size-1), ((size-1)*5-2)/3 );
   VERIFY_IS_EQUAL( ( -last*lastp1  ).eval(last=size-1), -(size-1)*size );
   VERIFY_IS_EQUAL( ( lastp1-3*last  ).eval(last=size-1), size- 3*(size-1) );
   VERIFY_IS_EQUAL( ( (lastp1-3*last)/lastp1  ).eval(last=size-1), (size- 3*(size-1))/size );
 
-#if EIGEN_HAS_CXX14
   {
     struct x_tag {};  static const symbolic::SymbolExpr<x_tag> x;
     struct y_tag {};  static const symbolic::SymbolExpr<y_tag> y;
@@ -74,11 +67,9 @@ void check_symbolic_index()
 
     VERIFY_IS_APPROX( int(((x+3)/y+z).eval(x=6,y=3,z=-13)), (6+3)/3+(-13) );
   }
-#endif
 }
 
 EIGEN_DECLARE_TEST(symbolic_index)
 {
   CALL_SUBTEST_1( check_symbolic_index() );
-  CALL_SUBTEST_2( check_symbolic_index() );
 }
diff --git a/libs/eigen/test/triangular.cpp b/libs/eigen/test/triangular.cpp
index 981a0d0..2259869 100644
--- a/libs/eigen/test/triangular.cpp
+++ b/libs/eigen/test/triangular.cpp
@@ -7,7 +7,7 @@
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
-#ifdef EIGEN_TEST_PART_100
+#if defined(EIGEN_TEST_PART_100) || defined(EIGEN_TEST_PART_ALL)
 #  define EIGEN_NO_DEPRECATED_WARNING
 #endif
 
@@ -139,7 +139,6 @@ template<typename MatrixType> void triangular_square(const MatrixType& m)
   m3.setZero();
   m3.template triangularView<Upper>().setOnes();
   VERIFY_IS_APPROX(m2,m3);
-  VERIFY_RAISES_STATIC_ASSERT(m1.template triangularView<Eigen::Lower>().swap(m2.template triangularView<Eigen::Upper>()));
 
   m1.setRandom();
   m3 = m1.template triangularView<Upper>();
diff --git a/libs/eigen/test/tuple_test.cpp b/libs/eigen/test/tuple_test.cpp
new file mode 100644
index 0000000..c3bbe37
--- /dev/null
+++ b/libs/eigen/test/tuple_test.cpp
@@ -0,0 +1,123 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2021 The Eigen Team
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+
+#include <Eigen/Core>
+#include <Eigen/src/Core/arch/GPU/Tuple.h>
+
+using namespace Eigen::internal;
+using Eigen::internal::tuple_impl::tuple;
+  
+void basic_tuple_test() {  
+  // Construction.
+  tuple<> tuple0 {};
+  tuple<int> tuple1 {1};
+  tuple<int, float> tuple2 {3, 5.0f};
+  tuple<int, float, double> tuple3 {7, 11.0f, 13.0};
+  // Default construction.
+  tuple<> tuple0default;
+  EIGEN_UNUSED_VARIABLE(tuple0default)
+  tuple<int> tuple1default;
+  EIGEN_UNUSED_VARIABLE(tuple1default)
+  tuple<int, float> tuple2default;
+  EIGEN_UNUSED_VARIABLE(tuple2default)
+  tuple<int, float, double> tuple3default;
+  EIGEN_UNUSED_VARIABLE(tuple3default)
+  
+  // Assignment.
+  tuple<> tuple0b = tuple0;
+  EIGEN_UNUSED_VARIABLE(tuple0b)
+  decltype(tuple1) tuple1b = tuple1;
+  EIGEN_UNUSED_VARIABLE(tuple1b)
+  decltype(tuple2) tuple2b = tuple2;
+  EIGEN_UNUSED_VARIABLE(tuple2b)
+  decltype(tuple3) tuple3b = tuple3;
+  EIGEN_UNUSED_VARIABLE(tuple3b)
+  
+  // get.
+  VERIFY_IS_EQUAL(tuple_impl::get<0>(tuple3), 7);
+  VERIFY_IS_EQUAL(tuple_impl::get<1>(tuple3), 11.0f);
+  VERIFY_IS_EQUAL(tuple_impl::get<2>(tuple3), 13.0);
+  
+  // tuple_impl::tuple_size.
+  VERIFY_IS_EQUAL(tuple_impl::tuple_size<decltype(tuple0)>::value, size_t(0));
+  VERIFY_IS_EQUAL(tuple_impl::tuple_size<decltype(tuple1)>::value, size_t(1));
+  VERIFY_IS_EQUAL(tuple_impl::tuple_size<decltype(tuple2)>::value, size_t(2));
+  VERIFY_IS_EQUAL(tuple_impl::tuple_size<decltype(tuple3)>::value, size_t(3));
+  
+  // tuple_impl::tuple_cat.
+  auto tuple2cat3 = tuple_impl::tuple_cat(tuple2, tuple3);
+  VERIFY_IS_EQUAL(tuple_impl::tuple_size<decltype(tuple2cat3)>::value, size_t(5));
+  VERIFY_IS_EQUAL(tuple_impl::get<1>(tuple2cat3), 5.0f);
+  VERIFY_IS_EQUAL(tuple_impl::get<3>(tuple2cat3), 11.0f);
+  auto tuple3cat0 = tuple_impl::tuple_cat(tuple3, tuple0);
+  VERIFY_IS_EQUAL(tuple_impl::tuple_size<decltype(tuple3cat0)>::value, size_t(3));
+  auto singlecat = tuple_impl::tuple_cat(tuple3);
+  VERIFY_IS_EQUAL(tuple_impl::tuple_size<decltype(singlecat)>::value, size_t(3));
+  auto emptycat = tuple_impl::tuple_cat();
+  VERIFY_IS_EQUAL(tuple_impl::tuple_size<decltype(emptycat)>::value, size_t(0));
+  auto tuple0cat1cat2cat3 = tuple_impl::tuple_cat(tuple0, tuple1, tuple2, tuple3);
+  VERIFY_IS_EQUAL(tuple_impl::tuple_size<decltype(tuple0cat1cat2cat3)>::value, size_t(6));
+  
+  // make_tuple.
+  // The tuple types should uses values for the second and fourth parameters.
+  double tmp = 20;
+  auto tuple_make = tuple_impl::make_tuple(int(10), tmp, float(20.0f), tuple0);
+  VERIFY( (std::is_same<decltype(tuple_make), tuple<int, double, float, tuple<> > >::value) );
+  VERIFY_IS_EQUAL(tuple_impl::get<1>(tuple_make), tmp);
+  
+  // forward_as_tuple.
+  // The tuple types should uses references for the second and fourth parameters.
+  auto tuple_forward = tuple_impl::forward_as_tuple(int(10), tmp, float(20.0f), tuple0);
+  VERIFY( (std::is_same<decltype(tuple_forward), tuple<int, double&, float, tuple<>& > >::value) );
+  VERIFY_IS_EQUAL(tuple_impl::get<1>(tuple_forward), tmp);
+  
+  // tie.
+  auto tuple_tie = tuple_impl::tie(tuple0, tuple1, tuple2, tuple3);
+  VERIFY( (std::is_same<decltype(tuple_tie), 
+                        tuple<decltype(tuple0)&,
+                              decltype(tuple1)&,
+                              decltype(tuple2)&,
+                              decltype(tuple3)&> >::value) );
+  VERIFY_IS_EQUAL( (tuple_impl::get<1>(tuple_impl::get<2>(tuple_tie))), 5.0f );
+  // Modify value and ensure tuple2 is updated.
+  tuple_impl::get<1>(tuple_impl::get<2>(tuple_tie)) = 10.0f;
+  VERIFY_IS_EQUAL( (tuple_impl::get<1>(tuple2)), 10.0f );
+  
+  // Assignment.
+  int x = -1;
+  float y = -1;
+  double z = -1;
+  tuple_impl::tie(x, y, z) = tuple3;
+  VERIFY_IS_EQUAL(x, tuple_impl::get<0>(tuple3));
+  VERIFY_IS_EQUAL(y, tuple_impl::get<1>(tuple3));
+  VERIFY_IS_EQUAL(z, tuple_impl::get<2>(tuple3));
+  tuple<int, float, double> tuple3c(-2, -2.0f, -2.0);
+  tuple3c = std::move(tuple3b);
+  VERIFY_IS_EQUAL(tuple_impl::get<0>(tuple3c), tuple_impl::get<0>(tuple3));
+  VERIFY_IS_EQUAL(tuple_impl::get<1>(tuple3c), tuple_impl::get<1>(tuple3));
+  VERIFY_IS_EQUAL(tuple_impl::get<2>(tuple3c), tuple_impl::get<2>(tuple3));
+}
+
+void eigen_tuple_test() {
+  tuple<Eigen::Matrix3d, Eigen::MatrixXd> tuple;
+  tuple_impl::get<0>(tuple).setRandom();
+  tuple_impl::get<1>(tuple).setRandom(10, 10);
+  
+  auto tuple_tie = tuple_impl::tie(tuple_impl::get<0>(tuple), tuple_impl::get<1>(tuple));
+  tuple_impl::get<1>(tuple_tie).setIdentity();
+  VERIFY(tuple_impl::get<1>(tuple).isIdentity());
+}
+
+EIGEN_DECLARE_TEST(tuple)
+{
+  CALL_SUBTEST(basic_tuple_test());
+  CALL_SUBTEST(eigen_tuple_test());
+}
diff --git a/libs/eigen/test/type_alias.cpp b/libs/eigen/test/type_alias.cpp
index 9a6616c..0a223d9 100644
--- a/libs/eigen/test/type_alias.cpp
+++ b/libs/eigen/test/type_alias.cpp
@@ -18,8 +18,6 @@ EIGEN_DECLARE_TEST(type_alias)
   STATIC_CHECK((is_same<Matrix2f,Matrix<float,2,2> >::value));
   STATIC_CHECK((is_same<Array33i,Array<int,3,3> >::value));
 
-#if EIGEN_HAS_CXX11
-  
   STATIC_CHECK((is_same<MatrixX<double>,    MatrixXd>::value));
   STATIC_CHECK((is_same<MatrixX<int>,       MatrixXi>::value));
   STATIC_CHECK((is_same<Matrix2<int>,       Matrix2i>::value));
@@ -42,7 +40,4 @@ EIGEN_DECLARE_TEST(type_alias)
   STATIC_CHECK((is_same<RowVector<float,3>,     RowVector3f>::value));
   STATIC_CHECK((is_same<RowVector<int,Dynamic>, RowVectorXi>::value));
 
-#else
-  std::cerr << "WARNING: c++11 type aliases not tested.\n";
-#endif
 }
diff --git a/libs/eigen/test/unaryviewstride.cpp b/libs/eigen/test/unaryviewstride.cpp
new file mode 100644
index 0000000..03a770b
--- /dev/null
+++ b/libs/eigen/test/unaryviewstride.cpp
@@ -0,0 +1,38 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2021 Andrew Johnson <andrew.johnson@arjohnsonau.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+
+template<int OuterStride,int InnerStride,typename VectorType> void unaryview_stride(const VectorType& m)
+{
+  typedef typename VectorType::Scalar Scalar;
+  Index rows = m.rows();
+  Index cols = m.cols();
+  VectorType vec = VectorType::Random(rows, cols);
+
+  struct view_op {
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE const Scalar&
+      operator()(const Scalar& v) const { return v; }
+  };
+
+  CwiseUnaryView<view_op, VectorType, Stride<OuterStride,InnerStride>> vec_view(vec);
+  VERIFY(vec_view.outerStride() == (OuterStride == 0 ? 0 : OuterStride));
+  VERIFY(vec_view.innerStride() == (InnerStride == 0 ? 1 : InnerStride));
+}
+
+EIGEN_DECLARE_TEST(unaryviewstride)
+{
+    CALL_SUBTEST_1(( unaryview_stride<1,2>(MatrixXf()) ));
+    CALL_SUBTEST_1(( unaryview_stride<0,0>(MatrixXf()) ));
+    CALL_SUBTEST_2(( unaryview_stride<1,2>(VectorXf()) ));
+    CALL_SUBTEST_2(( unaryview_stride<0,0>(VectorXf()) ));
+    CALL_SUBTEST_3(( unaryview_stride<1,2>(RowVectorXf()) ));
+    CALL_SUBTEST_3(( unaryview_stride<0,0>(RowVectorXf()) ));
+}
diff --git a/libs/eigen/test/vectorization_logic.cpp b/libs/eigen/test/vectorization_logic.cpp
index 97c0bda..a6441ab 100644
--- a/libs/eigen/test/vectorization_logic.cpp
+++ b/libs/eigen/test/vectorization_logic.cpp
@@ -39,11 +39,15 @@ bool test_assign(const Dst&, const Src&, int traversal, int unrolling)
 {
   EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Dst,Src);
   typedef internal::copy_using_evaluator_traits<internal::evaluator<Dst>,internal::evaluator<Src>, internal::assign_op<typename Dst::Scalar,typename Src::Scalar> > traits;
-  bool res = traits::Traversal==traversal;
-  if(unrolling==InnerUnrolling+CompleteUnrolling)
-    res = res && (int(traits::Unrolling)==InnerUnrolling || int(traits::Unrolling)==CompleteUnrolling);
-  else
-    res = res && int(traits::Unrolling)==unrolling;
+  // If traversal or unrolling are negative, ignore.
+  bool res = traversal > -1 ? traits::Traversal==traversal : true;
+  if (unrolling > -1) {
+    if(unrolling==InnerUnrolling+CompleteUnrolling) {
+      res = res && (int(traits::Unrolling)==InnerUnrolling || int(traits::Unrolling)==CompleteUnrolling);
+    } else {
+      res = res && int(traits::Unrolling)==unrolling;
+    }
+  }
   if(!res)
   {
     std::cerr << "Src: " << demangle_flags(Src::Flags) << std::endl;
@@ -178,21 +182,15 @@ struct vectorization_logic
       typedef Matrix<Scalar,3,1,ColMajor> Vector3;
       VERIFY(test_assign(Matrix33c().row(2),Matrix33c().row(1)+Matrix33c().row(1),
         LinearTraversal,CompleteUnrolling));
-      VERIFY(test_assign(Vector3(),Vector3()+Vector3(),
-        sizeof(Scalar)==16 ? InnerVectorizedTraversal : (EIGEN_UNALIGNED_VECTORIZE ? LinearVectorizedTraversal : LinearTraversal), CompleteUnrolling));
-      VERIFY(test_assign(Matrix33c().col(0),Matrix33c().col(1)+Matrix33c().col(1),
-        EIGEN_UNALIGNED_VECTORIZE ? (sizeof(Scalar)==16 ? InnerVectorizedTraversal : LinearVectorizedTraversal)
-                                  : (sizeof(Scalar)==16 ? SliceVectorizedTraversal : LinearTraversal),
-        ((!EIGEN_UNALIGNED_VECTORIZE) && (sizeof(Scalar)==16)) ? NoUnrolling : CompleteUnrolling));
+      // Vectorization depends on too many factors - ignore.
+      VERIFY(test_assign(Vector3(),Vector3()+Vector3(), -1, CompleteUnrolling));
 
       VERIFY(test_assign(Matrix3(),Matrix3().cwiseProduct(Matrix3()),
         LinearVectorizedTraversal,CompleteUnrolling));
 
+      // Vectorization depends on too many factors - ignore.
       VERIFY(test_assign(Matrix<Scalar,17,17>(),Matrix<Scalar,17,17>()+Matrix<Scalar,17,17>(),
-        sizeof(Scalar)==16        ? InnerVectorizedTraversal  :
-        EIGEN_UNALIGNED_VECTORIZE ? LinearVectorizedTraversal :
-                                    LinearTraversal,
-        NoUnrolling));
+        -1, NoUnrolling));
 
       VERIFY(test_assign(Matrix11(), Matrix11()+Matrix11(),InnerVectorizedTraversal,CompleteUnrolling));
 
@@ -245,11 +243,11 @@ struct vectorization_logic
             >(InnerVectorizedTraversal,CompleteUnrolling)));
 
     VERIFY((test_assign<
-            Map<Matrix<Scalar,EIGEN_PLAIN_ENUM_MAX(2,PacketSize),EIGEN_PLAIN_ENUM_MAX(2,PacketSize)>, AlignedMax, InnerStride<3*PacketSize> >,
-            Matrix<Scalar,EIGEN_PLAIN_ENUM_MAX(2,PacketSize),EIGEN_PLAIN_ENUM_MAX(2,PacketSize)>
+            Map<Matrix<Scalar, internal::plain_enum_max(2,PacketSize), internal::plain_enum_max(2, PacketSize)>, AlignedMax, InnerStride<3*PacketSize> >,
+            Matrix<Scalar, internal::plain_enum_max(2, PacketSize), internal::plain_enum_max(2, PacketSize)>
             >(DefaultTraversal,PacketSize>=8?InnerUnrolling:CompleteUnrolling)));
 
-    VERIFY((test_assign(Matrix11(), Matrix<Scalar,PacketSize,EIGEN_PLAIN_ENUM_MIN(2,PacketSize)>()*Matrix<Scalar,EIGEN_PLAIN_ENUM_MIN(2,PacketSize),PacketSize>(),
+    VERIFY((test_assign(Matrix11(), Matrix<Scalar,PacketSize, internal::plain_enum_min(2, PacketSize)>()*Matrix<Scalar, internal::plain_enum_min(2, PacketSize),PacketSize>(),
                         InnerVectorizedTraversal, CompleteUnrolling)));
     #endif
 
@@ -277,12 +275,20 @@ struct vectorization_logic_half
   };
   static void run()
   {
+    // Some half-packets have a byte size < EIGEN_MIN_ALIGN_BYTES (e.g. Packet2f),
+    // which causes many of these tests to fail since they don't vectorize if
+    // EIGEN_UNALIGNED_VECTORIZE is 0 (the matrix is assumed unaligned).
+    // Adjust the matrix sizes to account for these alignment issues.
+    constexpr int PacketBytes = sizeof(Scalar)*PacketSize;
+    constexpr int MinVSize = EIGEN_UNALIGNED_VECTORIZE ? PacketSize
+                             : PacketBytes >= EIGEN_MIN_ALIGN_BYTES ? PacketSize
+                             : (EIGEN_MIN_ALIGN_BYTES + sizeof(Scalar) - 1) / sizeof(Scalar);
     
-    typedef Matrix<Scalar,PacketSize,1> Vector1;
-    typedef Matrix<Scalar,PacketSize,PacketSize> Matrix11;
-    typedef Matrix<Scalar,5*PacketSize,7,ColMajor> Matrix57;
-    typedef Matrix<Scalar,3*PacketSize,5,ColMajor> Matrix35;
-    typedef Matrix<Scalar,5*PacketSize,7,DontAlign|ColMajor> Matrix57u;
+    typedef Matrix<Scalar,MinVSize,1> Vector1;
+    typedef Matrix<Scalar,MinVSize,MinVSize> Matrix11;
+    typedef Matrix<Scalar,5*MinVSize,7,ColMajor> Matrix57;
+    typedef Matrix<Scalar,3*MinVSize,5,ColMajor> Matrix35;
+    typedef Matrix<Scalar,5*MinVSize,7,DontAlign|ColMajor> Matrix57u;
 
     typedef Matrix<Scalar,
         (PacketSize==16 ? 8 : PacketSize==8 ? 4 : PacketSize==4 ? 2 : PacketSize==2 ? 1 : /*PacketSize==1 ?*/ 1),
@@ -296,20 +302,20 @@ struct vectorization_logic_half
 
     // this type is made such that it can only be vectorized when viewed as a linear 1D vector
     typedef Matrix<Scalar,
-        (PacketSize==16 ?  4 : PacketSize==8 ? 4 : PacketSize==4 ? 6 : PacketSize==2 ? ((Matrix11::Flags&RowMajorBit)?2:3) : /*PacketSize==1 ?*/ 1),
-        (PacketSize==16 ? 12 : PacketSize==8 ? 6 : PacketSize==4 ? 2 : PacketSize==2 ? ((Matrix11::Flags&RowMajorBit)?3:2) : /*PacketSize==1 ?*/ 3)
+        (MinVSize==16 ?  4 : MinVSize==8 ? 4 : MinVSize==4 ? 6 : MinVSize==2 ? ((Matrix11::Flags&RowMajorBit)?2:3) : /*PacketSize==1 ?*/ 1),
+        (MinVSize==16 ? 12 : MinVSize==8 ? 6 : MinVSize==4 ? 2 : MinVSize==2 ? ((Matrix11::Flags&RowMajorBit)?3:2) : /*PacketSize==1 ?*/ 3)
       > Matrix3;
     
-    #if !EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT
+#if !EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT
     VERIFY(test_assign(Vector1(),Vector1(),
       InnerVectorizedTraversal,CompleteUnrolling));
     VERIFY(test_assign(Vector1(),Vector1()+Vector1(),
       InnerVectorizedTraversal,CompleteUnrolling));
-    VERIFY(test_assign(Vector1(),Vector1().template segment<PacketSize>(0).derived(),
+    VERIFY(test_assign(Vector1(),Vector1().template segment<MinVSize>(0).derived(),
       EIGEN_UNALIGNED_VECTORIZE ? InnerVectorizedTraversal : LinearVectorizedTraversal,CompleteUnrolling));
     VERIFY(test_assign(Vector1(),Scalar(2.1)*Vector1()-Vector1(),
       InnerVectorizedTraversal,CompleteUnrolling));
-    VERIFY(test_assign(Vector1(),(Scalar(2.1)*Vector1().template segment<PacketSize>(0)-Vector1().template segment<PacketSize>(0)).derived(),
+    VERIFY(test_assign(Vector1(),(Scalar(2.1)*Vector1().template segment<MinVSize>(0)-Vector1().template segment<MinVSize>(0)).derived(),
       EIGEN_UNALIGNED_VECTORIZE ? InnerVectorizedTraversal : LinearVectorizedTraversal,CompleteUnrolling));
     VERIFY(test_assign(Vector1(),Vector1().cwiseProduct(Vector1()),
       InnerVectorizedTraversal,CompleteUnrolling));
@@ -331,19 +337,16 @@ struct vectorization_logic_half
       typedef Matrix<Scalar,3,3,ColMajor> Matrix33c;
       VERIFY(test_assign(Matrix33c().row(2),Matrix33c().row(1)+Matrix33c().row(1),
         LinearTraversal,CompleteUnrolling));
-      VERIFY(test_assign(Matrix33c().col(0),Matrix33c().col(1)+Matrix33c().col(1),
-        EIGEN_UNALIGNED_VECTORIZE ? (sizeof(Scalar)==16 ? InnerVectorizedTraversal : LinearVectorizedTraversal)
-                                  : (sizeof(Scalar)==16 ? SliceVectorizedTraversal : LinearTraversal),
-        ((!EIGEN_UNALIGNED_VECTORIZE) && (sizeof(Scalar)==16)) ? NoUnrolling : CompleteUnrolling));
-              
+
+      // Unrolling depends on read costs and unroll limits, which vary - ignore.   
       VERIFY(test_assign(Matrix3(),Matrix3().cwiseQuotient(Matrix3()),
-        PacketTraits::HasDiv ? LinearVectorizedTraversal : LinearTraversal,CompleteUnrolling));
+        PacketTraits::HasDiv ? LinearVectorizedTraversal : LinearTraversal, -1));
         
       VERIFY(test_assign(Matrix<Scalar,17,17>(),Matrix<Scalar,17,17>()+Matrix<Scalar,17,17>(),
         sizeof(Scalar)==16 ? InnerVectorizedTraversal : (EIGEN_UNALIGNED_VECTORIZE ? LinearVectorizedTraversal : LinearTraversal),
         NoUnrolling));
         
-      VERIFY(test_assign(Matrix11(),Matrix<Scalar,17,17>().template block<PacketSize,PacketSize>(2,3)+Matrix<Scalar,17,17>().template block<PacketSize,PacketSize>(8,4),
+      VERIFY(test_assign(Matrix11(),Matrix<Scalar,17,17>().template block<MinVSize,MinVSize>(2,3)+Matrix<Scalar,17,17>().template block<MinVSize,MinVSize>(8,4),
         EIGEN_UNALIGNED_VECTORIZE ? InnerVectorizedTraversal : DefaultTraversal,InnerUnrolling+CompleteUnrolling));
   
 
@@ -357,7 +360,7 @@ struct vectorization_logic_half
     VERIFY(test_redux(Vector1(),
       LinearVectorizedTraversal,CompleteUnrolling));
 
-    VERIFY(test_redux(Matrix<Scalar,PacketSize,3>(),
+    VERIFY(test_redux(Matrix<Scalar,MinVSize,3>(),
       LinearVectorizedTraversal,CompleteUnrolling));
 
     VERIFY(test_redux(Matrix3(),
@@ -375,13 +378,14 @@ struct vectorization_logic_half
     }
 
     VERIFY((test_assign<
-            Map<Matrix<Scalar,EIGEN_PLAIN_ENUM_MAX(2,PacketSize),EIGEN_PLAIN_ENUM_MAX(2,PacketSize)>, AlignedMax, InnerStride<3*PacketSize> >,
-            Matrix<Scalar,EIGEN_PLAIN_ENUM_MAX(2,PacketSize),EIGEN_PLAIN_ENUM_MAX(2,PacketSize)>
-            >(DefaultTraversal,PacketSize>4?InnerUnrolling:CompleteUnrolling)));
+            Map<Matrix<Scalar, internal::plain_enum_max(2, PacketSize), internal::plain_enum_max(2, PacketSize)>,
+                AlignedMax, InnerStride<3 * PacketSize> >,
+            Matrix<Scalar, internal::plain_enum_max(2, PacketSize), internal::plain_enum_max(2, PacketSize)> >(
+        DefaultTraversal, PacketSize > 4 ? InnerUnrolling : CompleteUnrolling)));
 
-    VERIFY((test_assign(Matrix57(), Matrix<Scalar,5*PacketSize,3>()*Matrix<Scalar,3,7>(),
-                        InnerVectorizedTraversal, InnerUnrolling+CompleteUnrolling)));
-    #endif
+    VERIFY((test_assign(Matrix57(), Matrix<Scalar, 5 * MinVSize, 3>() * Matrix<Scalar, 3, 7>(),
+                        InnerVectorizedTraversal, InnerUnrolling + CompleteUnrolling)));
+#endif
   }
 };
 
diff --git a/libs/eigen/test/vectorwiseop.cpp b/libs/eigen/test/vectorwiseop.cpp
index 8ee5884..4369789 100644
--- a/libs/eigen/test/vectorwiseop.cpp
+++ b/libs/eigen/test/vectorwiseop.cpp
@@ -9,7 +9,6 @@
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 #define TEST_ENABLE_TEMPORARY_TRACKING
-#define EIGEN_NO_STATIC_ASSERT
 
 #include "main.h"
 
@@ -32,77 +31,49 @@ template<typename ArrayType> void vectorwiseop_array(const ArrayType& m)
   RowVectorType rowvec = RowVectorType::Random(cols);
 
   // test addition
-
   m2 = m1;
   m2.colwise() += colvec;
   VERIFY_IS_APPROX(m2, m1.colwise() + colvec);
   VERIFY_IS_APPROX(m2.col(c), m1.col(c) + colvec);
 
-  VERIFY_RAISES_ASSERT(m2.colwise() += colvec.transpose());
-  VERIFY_RAISES_ASSERT(m1.colwise() + colvec.transpose());
-
   m2 = m1;
   m2.rowwise() += rowvec;
   VERIFY_IS_APPROX(m2, m1.rowwise() + rowvec);
   VERIFY_IS_APPROX(m2.row(r), m1.row(r) + rowvec);
 
-  VERIFY_RAISES_ASSERT(m2.rowwise() += rowvec.transpose());
-  VERIFY_RAISES_ASSERT(m1.rowwise() + rowvec.transpose());
-
-  // test substraction
-
+  // test subtraction
   m2 = m1;
   m2.colwise() -= colvec;
   VERIFY_IS_APPROX(m2, m1.colwise() - colvec);
   VERIFY_IS_APPROX(m2.col(c), m1.col(c) - colvec);
 
-  VERIFY_RAISES_ASSERT(m2.colwise() -= colvec.transpose());
-  VERIFY_RAISES_ASSERT(m1.colwise() - colvec.transpose());
-
   m2 = m1;
   m2.rowwise() -= rowvec;
   VERIFY_IS_APPROX(m2, m1.rowwise() - rowvec);
   VERIFY_IS_APPROX(m2.row(r), m1.row(r) - rowvec);
 
-  VERIFY_RAISES_ASSERT(m2.rowwise() -= rowvec.transpose());
-  VERIFY_RAISES_ASSERT(m1.rowwise() - rowvec.transpose());
-
   // test multiplication
-
   m2 = m1;
   m2.colwise() *= colvec;
   VERIFY_IS_APPROX(m2, m1.colwise() * colvec);
   VERIFY_IS_APPROX(m2.col(c), m1.col(c) * colvec);
 
-  VERIFY_RAISES_ASSERT(m2.colwise() *= colvec.transpose());
-  VERIFY_RAISES_ASSERT(m1.colwise() * colvec.transpose());
-
   m2 = m1;
   m2.rowwise() *= rowvec;
   VERIFY_IS_APPROX(m2, m1.rowwise() * rowvec);
   VERIFY_IS_APPROX(m2.row(r), m1.row(r) * rowvec);
 
-  VERIFY_RAISES_ASSERT(m2.rowwise() *= rowvec.transpose());
-  VERIFY_RAISES_ASSERT(m1.rowwise() * rowvec.transpose());
-
   // test quotient
-
   m2 = m1;
   m2.colwise() /= colvec;
   VERIFY_IS_APPROX(m2, m1.colwise() / colvec);
   VERIFY_IS_APPROX(m2.col(c), m1.col(c) / colvec);
 
-  VERIFY_RAISES_ASSERT(m2.colwise() /= colvec.transpose());
-  VERIFY_RAISES_ASSERT(m1.colwise() / colvec.transpose());
-
   m2 = m1;
   m2.rowwise() /= rowvec;
   VERIFY_IS_APPROX(m2, m1.rowwise() / rowvec);
   VERIFY_IS_APPROX(m2.row(r), m1.row(r) / rowvec);
 
-  VERIFY_RAISES_ASSERT(m2.rowwise() /= rowvec.transpose());
-  VERIFY_RAISES_ASSERT(m1.rowwise() / rowvec.transpose());
-
   m2 = m1;
   // yes, there might be an aliasing issue there but ".rowwise() /="
   // is supposed to evaluate " m2.colwise().sum()" into a temporary to avoid
@@ -158,58 +129,30 @@ template<typename MatrixType> void vectorwiseop_matrix(const MatrixType& m)
   m2.rowwise() = rowvec;
   for(Index i=0; i<rows; ++i)
     VERIFY_IS_APPROX(m2.row(i), rowvec);
-  if(rows>1)
-    VERIFY_RAISES_ASSERT(m2.colwise() = colvec.transpose());
-  if(cols>1)
-    VERIFY_RAISES_ASSERT(m2.rowwise() = rowvec.transpose());
 
   // test addition
-
   m2 = m1;
   m2.colwise() += colvec;
   VERIFY_IS_APPROX(m2, m1.colwise() + colvec);
   VERIFY_IS_APPROX(m2.col(c), m1.col(c) + colvec);
 
-  if(rows>1)
-  {
-    VERIFY_RAISES_ASSERT(m2.colwise() += colvec.transpose());
-    VERIFY_RAISES_ASSERT(m1.colwise() + colvec.transpose());
-  }
-
   m2 = m1;
   m2.rowwise() += rowvec;
   VERIFY_IS_APPROX(m2, m1.rowwise() + rowvec);
   VERIFY_IS_APPROX(m2.row(r), m1.row(r) + rowvec);
 
-  if(cols>1)
-  {
-    VERIFY_RAISES_ASSERT(m2.rowwise() += rowvec.transpose());
-    VERIFY_RAISES_ASSERT(m1.rowwise() + rowvec.transpose());
-  }
-
-  // test substraction
 
+  // test subtraction
   m2 = m1;
   m2.colwise() -= colvec;
   VERIFY_IS_APPROX(m2, m1.colwise() - colvec);
   VERIFY_IS_APPROX(m2.col(c), m1.col(c) - colvec);
 
-  if(rows>1)
-  {
-    VERIFY_RAISES_ASSERT(m2.colwise() -= colvec.transpose());
-    VERIFY_RAISES_ASSERT(m1.colwise() - colvec.transpose());
-  }
-
   m2 = m1;
   m2.rowwise() -= rowvec;
   VERIFY_IS_APPROX(m2, m1.rowwise() - rowvec);
   VERIFY_IS_APPROX(m2.row(r), m1.row(r) - rowvec);
 
-  if(cols>1)
-  {
-    VERIFY_RAISES_ASSERT(m2.rowwise() -= rowvec.transpose());
-    VERIFY_RAISES_ASSERT(m1.rowwise() - rowvec.transpose());
-  }
 
   // ------ partial reductions ------
 
@@ -272,11 +215,8 @@ template<typename MatrixType> void vectorwiseop_matrix(const MatrixType& m)
   VERIFY_IS_APPROX(m1.matrix().middleRows(0,0).colwise().prod().eval(), MatrixX::Ones(1,cols));
   VERIFY_IS_APPROX(m1.matrix().middleCols(0,fix<0>).rowwise().prod().eval(), MatrixX::Ones(rows,1));
   VERIFY_IS_APPROX(m1.matrix().middleRows(0,fix<0>).colwise().prod().eval(), MatrixX::Ones(1,cols));
-  
   VERIFY_IS_APPROX(m1.matrix().middleCols(0,0).rowwise().squaredNorm().eval(), MatrixX::Zero(rows,1));
 
-  VERIFY_RAISES_ASSERT(m1.real().middleCols(0,0).rowwise().minCoeff().eval());
-  VERIFY_RAISES_ASSERT(m1.real().middleRows(0,0).colwise().maxCoeff().eval());
   VERIFY_IS_EQUAL(m1.real().middleRows(0,0).rowwise().maxCoeff().eval().rows(),0);
   VERIFY_IS_EQUAL(m1.real().middleCols(0,0).colwise().maxCoeff().eval().cols(),0);
   VERIFY_IS_EQUAL(m1.real().middleRows(0,fix<0>).rowwise().maxCoeff().eval().rows(),0);
diff --git a/libs/eigen/test/visitor.cpp b/libs/eigen/test/visitor.cpp
index 20fb2c3..bc34917 100644
--- a/libs/eigen/test/visitor.cpp
+++ b/libs/eigen/test/visitor.cpp
@@ -21,7 +21,7 @@ template<typename MatrixType> void matrixVisitor(const MatrixType& p)
   m = MatrixType::Random(rows, cols);
   for(Index i = 0; i < m.size(); i++)
     for(Index i2 = 0; i2 < i; i2++)
-      while(m(i) == m(i2)) // yes, ==
+      while(numext::equal_strict(m(i), m(i2))) // yes, strict equality
         m(i) = internal::random<Scalar>();
   
   Scalar minc = Scalar(1000), maxc = Scalar(-1000);
@@ -173,6 +173,155 @@ template<typename VectorType> void vectorVisitor(const VectorType& w)
   }
 }
 
+template<typename T, bool Vectorizable>
+struct TrackedVisitor {
+  void init(T v, Index i, Index j) { return this->operator()(v,i,j); }
+  void operator()(T v, Index i, Index j) {
+    EIGEN_UNUSED_VARIABLE(v)
+    visited.push_back({i, j});
+    vectorized = false;
+  }
+  
+  template<typename Packet>
+  void packet(Packet p, Index i, Index j) {
+    EIGEN_UNUSED_VARIABLE(p)  
+    visited.push_back({i, j});
+    vectorized = true;
+  }
+  std::vector<std::pair<int,int>> visited;
+  bool vectorized;
+};
+
+namespace Eigen {
+namespace internal {
+
+template<typename T, bool Vectorizable>
+struct functor_traits<TrackedVisitor<T, Vectorizable> > {
+  enum { PacketAccess = Vectorizable, Cost = 1 };
+};
+
+}  // namespace internal
+}  // namespace Eigen
+
+void checkOptimalTraversal() {
+  
+  // Unrolled - ColMajor.
+  {
+    using MatrixType = Matrix<float, 4, 4, ColMajor>;
+    MatrixType X = MatrixType::Random(4, 4);
+    TrackedVisitor<MatrixType::Scalar, false> visitor;
+    X.visit(visitor);
+    Index count = 0;
+    for (Index j=0; j<X.cols(); ++j) {
+      for (Index i=0; i<X.rows(); ++i) {
+        VERIFY_IS_EQUAL(visitor.visited[count].first, i);
+        VERIFY_IS_EQUAL(visitor.visited[count].second, j);
+        ++count;
+      }
+    }
+  }
+  
+  // Unrolled - RowMajor.
+  {
+    using MatrixType = Matrix<float, 4, 4, RowMajor>;
+    MatrixType X = MatrixType::Random(4, 4);
+    TrackedVisitor<MatrixType::Scalar, false> visitor;
+    X.visit(visitor);
+    Index count = 0;
+    for (Index i=0; i<X.rows(); ++i) {
+      for (Index j=0; j<X.cols(); ++j) {
+        VERIFY_IS_EQUAL(visitor.visited[count].first, i);
+        VERIFY_IS_EQUAL(visitor.visited[count].second, j);
+        ++count;
+      }
+    }
+  }
+  
+  // Not unrolled - ColMajor
+  {
+    using MatrixType = Matrix<float, Dynamic, Dynamic, ColMajor>;
+    MatrixType X = MatrixType::Random(4, 4);
+    TrackedVisitor<MatrixType::Scalar, false> visitor;
+    X.visit(visitor);
+    Index count = 0;
+    for (Index j=0; j<X.cols(); ++j) {
+      for (Index i=0; i<X.rows(); ++i) {
+        VERIFY_IS_EQUAL(visitor.visited[count].first, i);
+        VERIFY_IS_EQUAL(visitor.visited[count].second, j);
+        ++count;
+      }
+    }
+  }
+  
+  // Not unrolled - RowMajor.
+  {
+    using MatrixType = Matrix<float, Dynamic, Dynamic, RowMajor>;
+    MatrixType X = MatrixType::Random(4, 4);
+    TrackedVisitor<MatrixType::Scalar, false> visitor;
+    X.visit(visitor);
+    Index count = 0;
+    for (Index i=0; i<X.rows(); ++i) {
+      for (Index j=0; j<X.cols(); ++j) {
+        VERIFY_IS_EQUAL(visitor.visited[count].first, i);
+        VERIFY_IS_EQUAL(visitor.visited[count].second, j);
+        ++count;
+      }
+    }
+  }
+  
+  // Vectorized - ColMajor
+  {
+    using MatrixType = Matrix<float, Dynamic, Dynamic, ColMajor>;
+    // Ensure rows/cols is larger than packet size.
+    constexpr int PacketSize = Eigen::internal::packet_traits<MatrixType::Scalar>::size;
+    MatrixType X = MatrixType::Random(4 * PacketSize, 4 * PacketSize);
+    TrackedVisitor<MatrixType::Scalar, true> visitor;
+    X.visit(visitor);
+    Index previ = -1;
+    Index prevj = 0;
+    for (const auto& p : visitor.visited) {
+      Index i = p.first;
+      Index j = p.second;
+      VERIFY(
+        (j == prevj && i == previ + 1)             // Advance single element
+        || (j == prevj && i == previ + PacketSize) // Advance packet
+        || (j == prevj + 1 && i == 0)              // Advance column
+      );
+      previ = i;
+      prevj = j;
+    }
+    if (Eigen::internal::packet_traits<MatrixType::Scalar>::Vectorizable) {
+      VERIFY(visitor.vectorized);
+    }
+  }
+  
+  // Vectorized - RowMajor.
+  {
+    using MatrixType = Matrix<float, Dynamic, Dynamic, RowMajor>;
+    // Ensure rows/cols is larger than packet size.
+    constexpr int PacketSize = Eigen::internal::packet_traits<MatrixType::Scalar>::size;
+    MatrixType X = MatrixType::Random(4 * PacketSize, 4 * PacketSize);
+    TrackedVisitor<MatrixType::Scalar, true> visitor;
+    X.visit(visitor);
+    Index previ = 0;
+    Index prevj = -1;
+    for (const auto& p : visitor.visited) {
+      Index i = p.first;
+      Index j = p.second;
+      VERIFY(
+        (i == previ && j == prevj + 1)             // Advance single element
+        || (i == previ && j == prevj + PacketSize) // Advance packet
+        || (i == previ + 1 && j == 0)              // Advance row
+      );
+      previ = i;
+      prevj = j;
+    }
+    if (Eigen::internal::packet_traits<MatrixType::Scalar>::Vectorizable) {
+      VERIFY(visitor.vectorized);
+    }
+  }
+}
+
 EIGEN_DECLARE_TEST(visitor)
 {
   for(int i = 0; i < g_repeat; i++) {
@@ -190,4 +339,5 @@ EIGEN_DECLARE_TEST(visitor)
     CALL_SUBTEST_9( vectorVisitor(RowVectorXd(10)) );
     CALL_SUBTEST_10( vectorVisitor(VectorXf(33)) );
   }
+  CALL_SUBTEST_11(checkOptimalTraversal());
 }
diff --git a/libs/eigen/test/zerosized.cpp b/libs/eigen/test/zerosized.cpp
index 07afd0f..86aa5eb 100644
--- a/libs/eigen/test/zerosized.cpp
+++ b/libs/eigen/test/zerosized.cpp
@@ -47,7 +47,7 @@ template<typename MatrixType> void zeroSizedMatrix()
     if (MatrixType::RowsAtCompileTime == Dynamic && MatrixType::ColsAtCompileTime == Dynamic)
     {
 
-      MatrixType t2(0, 0), t3(t1);
+      MatrixType t2(0, 0);
       VERIFY(t2.rows() == 0);
       VERIFY(t2.cols() == 0);
 
diff --git a/libs/eigen/unsupported/CMakeLists.txt b/libs/eigen/unsupported/CMakeLists.txt
index 34408c0..67d1f62 100644
--- a/libs/eigen/unsupported/CMakeLists.txt
+++ b/libs/eigen/unsupported/CMakeLists.txt
@@ -2,7 +2,7 @@ add_subdirectory(Eigen)
 if(EIGEN_BUILD_DOC)
   add_subdirectory(doc EXCLUDE_FROM_ALL)
 endif()
-if(BUILD_TESTING)
+if(EIGEN_BUILD_TESTING)
   if(EIGEN_LEAVE_TEST_IN_ALL_TARGET)
     add_subdirectory(test) # can't do EXCLUDE_FROM_ALL here, breaks CTest
   else()
diff --git a/libs/eigen/unsupported/Eigen/AdolcForward b/libs/eigen/unsupported/Eigen/AdolcForward
index 56caeae..ad05504 100644
--- a/libs/eigen/unsupported/Eigen/AdolcForward
+++ b/libs/eigen/unsupported/Eigen/AdolcForward
@@ -7,8 +7,8 @@
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
-#ifndef EIGEN_ADLOC_FORWARD
-#define EIGEN_ADLOC_FORWARD
+#ifndef EIGEN_ADLOC_FORWARD_MODULE_H
+#define EIGEN_ADLOC_FORWARD_MODULE_H
 
 //--------------------------------------------------------------------------------
 //
@@ -156,4 +156,4 @@ protected:
 
 }
 
-#endif // EIGEN_ADLOC_FORWARD
+#endif // EIGEN_ADLOC_FORWARD_MODULE_H
diff --git a/libs/eigen/unsupported/Eigen/AlignedVector3 b/libs/eigen/unsupported/Eigen/AlignedVector3
index 4fa1842..cf2b9f6 100644
--- a/libs/eigen/unsupported/Eigen/AlignedVector3
+++ b/libs/eigen/unsupported/Eigen/AlignedVector3
@@ -7,8 +7,8 @@
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
-#ifndef EIGEN_ALIGNED_VECTOR3
-#define EIGEN_ALIGNED_VECTOR3
+#ifndef EIGEN_ALIGNED_VECTOR3_MODULE_H
+#define EIGEN_ALIGNED_VECTOR3_MODULE_H
 
 #include "../../Eigen/Geometry"
 
@@ -37,23 +37,23 @@ namespace Eigen {
   *
   */
 // TODO specialize Cwise
-template<typename _Scalar> class AlignedVector3;
+template<typename Scalar_> class AlignedVector3;
 
 namespace internal {
-template<typename _Scalar> struct traits<AlignedVector3<_Scalar> >
-  : traits<Matrix<_Scalar,3,1,0,4,1> >
+template<typename Scalar_> struct traits<AlignedVector3<Scalar_> >
+  : traits<Matrix<Scalar_,3,1,0,4,1> >
 {
 };
 }
 
-template<typename _Scalar> class AlignedVector3
-  : public MatrixBase<AlignedVector3<_Scalar> >
+template<typename Scalar_> class AlignedVector3
+  : public MatrixBase<AlignedVector3<Scalar_> >
 {
-    typedef Matrix<_Scalar,4,1> CoeffType;
+    typedef Matrix<Scalar_,4,1> CoeffType;
     CoeffType m_coeffs;
   public:
 
-    typedef MatrixBase<AlignedVector3<_Scalar> > Base;	
+    typedef MatrixBase<AlignedVector3<Scalar_> > Base;
     EIGEN_DENSE_PUBLIC_INTERFACE(AlignedVector3)
     using Base::operator*;
 
@@ -207,10 +207,10 @@ template<typename _Scalar> class AlignedVector3
 
 namespace internal {
 
-template<typename _Scalar>
-struct eval<AlignedVector3<_Scalar>, Dense>
+template<typename Scalar_>
+struct eval<AlignedVector3<Scalar_>, Dense>
 {
- typedef const AlignedVector3<_Scalar>& type;
+ typedef const AlignedVector3<Scalar_>& type;
 };
 
 template<typename Scalar>
@@ -231,4 +231,4 @@ struct evaluator<AlignedVector3<Scalar> >
 
 #include "../../Eigen/src/Core/util/ReenableStupidWarnings.h"
 
-#endif // EIGEN_ALIGNED_VECTOR3
+#endif // EIGEN_ALIGNED_VECTOR3_MODULE_H
diff --git a/libs/eigen/unsupported/Eigen/AutoDiff b/libs/eigen/unsupported/Eigen/AutoDiff
index 7a4ff46..62fc0b3 100644
--- a/libs/eigen/unsupported/Eigen/AutoDiff
+++ b/libs/eigen/unsupported/Eigen/AutoDiff
@@ -7,8 +7,10 @@
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
-#ifndef EIGEN_AUTODIFF_MODULE
-#define EIGEN_AUTODIFF_MODULE
+#ifndef EIGEN_AUTODIFF_MODULE_H
+#define EIGEN_AUTODIFF_MODULE_H
+
+#include "../../Eigen/Core"
 
 namespace Eigen {
 
@@ -43,4 +45,4 @@ namespace Eigen {
 //@}
 }
 
-#endif // EIGEN_AUTODIFF_MODULE
+#endif // EIGEN_AUTODIFF_MODULE_H
diff --git a/libs/eigen/unsupported/Eigen/CMakeLists.txt b/libs/eigen/unsupported/Eigen/CMakeLists.txt
index 631a060..dcf9500 100644
--- a/libs/eigen/unsupported/Eigen/CMakeLists.txt
+++ b/libs/eigen/unsupported/Eigen/CMakeLists.txt
@@ -12,6 +12,7 @@ set(Eigen_HEADERS
   MatrixFunctions 
   MoreVectorization
   MPRealSupport
+  NNLS
   NonLinearOptimization
   NumericalDiff
   OpenGLSupport
diff --git a/libs/eigen/unsupported/Eigen/CXX11/Tensor b/libs/eigen/unsupported/Eigen/CXX11/Tensor
index 0938bb5..0a04a0e 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/Tensor
+++ b/libs/eigen/unsupported/Eigen/CXX11/Tensor
@@ -8,13 +8,11 @@
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
-//#ifndef EIGEN_CXX11_TENSOR_MODULE
-//#define EIGEN_CXX11_TENSOR_MODULE
+//#ifndef EIGEN_CXX11_TENSOR_MODULE_H
+#define EIGEN_CXX11_TENSOR_MODULE_H
 
 #include "../../../Eigen/Core"
 
-#if EIGEN_HAS_CXX11
-
 #include "../SpecialFunctions"
 
 #include "../../../Eigen/src/Core/util/DisableStupidWarnings.h"
@@ -38,6 +36,8 @@
 #include <cmath>
 #include <cstddef>
 #include <cstring>
+#include <iterator>
+#include <numeric>
 #include <random>
 #include <thread>
 
@@ -76,6 +76,8 @@
 #include "src/Tensor/TensorIntDiv.h"
 #include "src/Tensor/TensorGlobalFunctions.h"
 
+#include "src/Tensor/TensorIO.h"
+
 #include "src/Tensor/TensorBase.h"
 #include "src/Tensor/TensorBlock.h"
 
@@ -129,9 +131,8 @@
 #include "src/Tensor/TensorMap.h"
 #include "src/Tensor/TensorRef.h"
 
-#include "src/Tensor/TensorIO.h"
+
 
 #include "../../../Eigen/src/Core/util/ReenableStupidWarnings.h"
 
-#endif  // EIGEN_HAS_CXX11
-//#endif // EIGEN_CXX11_TENSOR_MODULE
+//#endif // EIGEN_CXX11_TENSOR_MODULE_H
diff --git a/libs/eigen/unsupported/Eigen/CXX11/TensorSymmetry b/libs/eigen/unsupported/Eigen/CXX11/TensorSymmetry
index b09c5e4..a5c9609 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/TensorSymmetry
+++ b/libs/eigen/unsupported/Eigen/CXX11/TensorSymmetry
@@ -7,8 +7,8 @@
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
-#ifndef EIGEN_CXX11_TENSORSYMMETRY_MODULE
-#define EIGEN_CXX11_TENSORSYMMETRY_MODULE
+#ifndef EIGEN_CXX11_TENSORSYMMETRY_MODULE_H
+#define EIGEN_CXX11_TENSORSYMMETRY_MODULE_H
 
 #include "Tensor"
 
@@ -35,8 +35,4 @@
 
 #include "../../../Eigen/src/Core/util/ReenableStupidWarnings.h"
 
-#endif // EIGEN_CXX11_TENSORSYMMETRY_MODULE
-
-/*
- * kate: space-indent on; indent-width 2; mixedindent off; indent-mode cstyle;
- */
+#endif // EIGEN_CXX11_TENSORSYMMETRY_MODULE_H
diff --git a/libs/eigen/unsupported/Eigen/CXX11/ThreadPool b/libs/eigen/unsupported/Eigen/CXX11/ThreadPool
index c5cafb2..6ebe9e7 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/ThreadPool
+++ b/libs/eigen/unsupported/Eigen/CXX11/ThreadPool
@@ -7,8 +7,8 @@
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
-#ifndef EIGEN_CXX11_THREADPOOL_MODULE
-#define EIGEN_CXX11_THREADPOOL_MODULE
+#ifndef EIGEN_CXX11_THREADPOOL_MODULE_H
+#define EIGEN_CXX11_THREADPOOL_MODULE_H
 
 #include "../../../Eigen/Core"
 
@@ -30,7 +30,6 @@
 
 // The code depends on CXX11, so only include the module if the
 // compiler supports it.
-#if (EIGEN_COMP_CXXVER >= 11)
 #include <cstddef>
 #include <cstring>
 #include <time.h>
@@ -67,8 +66,6 @@
 #include "src/ThreadPool/Barrier.h"
 #include "src/ThreadPool/NonBlockingThreadPool.h"
 
-#endif
-
 #include "../../../Eigen/src/Core/util/ReenableStupidWarnings.h"
 
-#endif // EIGEN_CXX11_THREADPOOL_MODULE
+#endif // EIGEN_CXX11_THREADPOOL_MODULE_H
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/InternalHeaderCheck.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/InternalHeaderCheck.h
new file mode 100644
index 0000000..9e4c1ed
--- /dev/null
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/InternalHeaderCheck.h
@@ -0,0 +1,3 @@
+#ifndef EIGEN_CXX11_TENSOR_MODULE_H
+#error "Please include unsupported/Eigen/CXX11/Tensor instead of including headers inside the src directory directly."
+#endif
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md
index 2f65b1b..e4b5e2e 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md
@@ -120,9 +120,7 @@ specified position.  The value returned is of the datatype of the tensor.
 ## TensorLayout
 
 The tensor library supports 2 layouts: `ColMajor` (the default) and
-`RowMajor`.  Only the default column major layout is currently fully
-supported, and it is therefore not recommended to attempt to use the row major
-layout at the moment.
+`RowMajor`. 
 
 The layout of a tensor is optionally specified as part of its type. If not
 specified explicitly column major is assumed.
@@ -888,6 +886,23 @@ containing the natural logarithms of the original tensor.
 Returns a tensor of the same type and dimensions as the original tensor
 containing the absolute values of the original tensor.
 
+### <Operation> arg()
+
+Returns a tensor with the same dimensions as the original tensor
+containing the complex argument (phase angle) of the values of the
+original tensor.
+
+### <Operation> real()
+
+Returns a tensor with the same dimensions as the original tensor
+containing the real part of the complex values of the original tensor.
+
+### <Operation> imag()
+
+Returns a tensor with the same dimensions as the orginal tensor
+containing the imaginary part of the complex values of the original
+tensor.
+
 ### <Operation> pow(Scalar exponent)
 
 Returns a tensor of the same type and dimensions as the original tensor
@@ -1466,9 +1481,9 @@ the input tensor.
     Eigen::Tensor<int, 2> a(4, 3);
     a.setValues({{0, 100, 200}, {300, 400, 500},
                  {600, 700, 800}, {900, 1000, 1100}});
-    Eigen::array<int, 2> offsets = {1, 0};
-    Eigen::array<int, 2> extents = {2, 2};
-    Eigen::Tensor<int, 1> slice = a.slice(offsets, extents);
+    Eigen::array<Eigen::Index, 2> offsets = {1, 0};
+    Eigen::array<Eigen::Index, 2> extents = {2, 2};
+    Eigen::Tensor<int, 2> slice = a.slice(offsets, extents);
     cout << "a" << endl << a << endl;
     =>
     a
@@ -1794,6 +1809,45 @@ but you can easily cast the tensors to floats to do the division:
 
 TODO
 
+## Tensor Printing
+Tensors can be printed into a stream object (e.g. `std::cout`) using different formatting options.
+
+	Eigen::Tensor<float, 3> tensor3d = {4, 3, 2};
+	tensor3d.setValues( {{{1, 2}, {3, 4}, {5, 6}}, {{7, 8}, {9, 10}, {11, 12}}, {{13, 14}, {15, 16}, {17, 18}}, {{19, 20}, {21, 22}, {23, 24}}} );
+	std::cout << tensor3d.format(Eigen::TensorIOFormat::Plain()) << std::endl;
+	==>
+	 1  2 
+	 3  4 
+	 5  6 
+	
+	 7  8 
+	 9 10
+	11 12
+	
+	13 14
+	15 16
+	17 18
+	
+	19 20
+	21 22
+	23 24
+
+
+In the example, we used the predefined format `Eigen::TensorIOFormat::Plain`.
+Here is the list of all predefined formats from which you can choose:
+- `Eigen::TensorIOFormat::Plain()` for a plain output without braces. Different submatrices are separated by a blank line.
+- `Eigen::TensorIOFormat::Numpy()` for numpy-like output.
+- `Eigen::TensorIOFormat::Native()` for a `c++` like output which can be directly copy-pasted to setValues().
+- `Eigen::TensorIOFormat::Legacy()` for a backwards compatible printing of tensors.
+
+If you send the tensor directly to the stream the default format is called which is `Eigen::IOFormats::Plain()`.
+
+You can define your own format by explicitly providing a `Eigen::TensorIOFormat` class instance. Here, you can specify:
+- The overall prefix and suffix with `std::string tenPrefix` and `std::string tenSuffix`
+- The prefix, separator and suffix for each new element, row, matrix, 3d subtensor, ... with `std::vector<std::string> prefix`, `std::vector<std::string> separator` and `std::vector<std::string> suffix`. Note that the first entry in each of the vectors refer to the last dimension of the tensor, e.g. `separator[0]` will be printed between adjacent elements,  `separator[1]` will be printed between adjacent matrices, ...
+- `char fill`: character which will be placed if the elements are aligned.
+- `int precision`
+- `int flags`: an OR-ed combination of flags, the default value is 0, the only currently available flag is `Eigen::DontAlignCols` which allows to disable the alignment of columns, resulting in faster code.
 
 ## Representation of scalar values
 
@@ -1808,8 +1862,3 @@ product of 2 1d tensors (through contractions) returns a 0d tensor.
 *   The IndexList class requires a cxx11 compliant compiler. You can use an
     array of indices instead if you don't have access to a modern compiler.
 *   On GPUs only floating point values are properly tested and optimized for.
-*   Complex and integer values are known to be broken on GPUs. If you try to use
-    them you'll most likely end up triggering a static assertion failure such as
-    EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE)
-
-
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/Tensor.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/Tensor.h
index 8cac2bb..b830984 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/Tensor.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/Tensor.h
@@ -11,6 +11,8 @@
 #ifndef EIGEN_CXX11_TENSOR_TENSOR_H
 #define EIGEN_CXX11_TENSOR_TENSOR_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /** \class Tensor
@@ -42,7 +44,8 @@ namespace Eigen {
   * \endcode
   *
   * This class can be extended with the help of the plugin mechanism described on the page
-  * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_TENSOR_PLUGIN.
+  * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_TENSOR_PLUGIN,
+  * \c EIGEN_TENSORBASE_PLUGIN, and \c EIGEN_READONLY_TENSORBASE_PLUGIN.
   *
   * <i><b>Some notes:</b></i>
   *
@@ -73,27 +76,25 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
     typedef typename Base::CoeffReturnType CoeffReturnType;
 
     enum {
-      IsAligned = bool(EIGEN_MAX_ALIGN_BYTES>0) & !(Options_&DontAlign),
-      Layout = Options_ & RowMajor ? RowMajor : ColMajor,
+      IsAligned = (EIGEN_MAX_ALIGN_BYTES>0) && !(Options_&DontAlign),
       CoordAccess = true,
       RawAccess = true
     };
 
-    static const int Options = Options_;
-    static const int NumIndices = NumIndices_;
+    static constexpr int Layout = Options_ & RowMajor ? RowMajor : ColMajor;
+    static constexpr int Options = Options_;
+    static constexpr int NumIndices = NumIndices_;
     typedef DSizes<Index, NumIndices_> Dimensions;
 
   protected:
     TensorStorage<Scalar, Dimensions, Options> m_storage;
 
-#ifdef EIGEN_HAS_SFINAE
     template<typename CustomIndices>
     struct isOfNormalIndex{
       static const bool is_array = internal::is_base_of<array<Index, NumIndices>, CustomIndices>::value;
       static const bool is_int = NumTraits<CustomIndices>::IsInteger;
       static const bool value = is_array | is_int;
     };
-#endif
 
   public:
     // Metadata
@@ -110,7 +111,6 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
     inline Self& base()             { return *this; }
     inline const Self& base() const { return *this; }
 
-#if EIGEN_HAS_VARIADIC_TEMPLATES
     template<typename... IndexTypes>
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) const
     {
@@ -118,7 +118,6 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
       EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
       return coeff(array<Index, NumIndices>{{firstIndex, secondIndex, otherIndices...}});
     }
-#endif
 
     // normal indices
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(const array<Index, NumIndices>& indices) const
@@ -128,7 +127,6 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
     }
 
     // custom indices
-#ifdef EIGEN_HAS_SFINAE
     template<typename CustomIndices,
              EIGEN_SFINAE_ENABLE_IF( !(isOfNormalIndex<CustomIndices>::value) )
     >
@@ -136,7 +134,6 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
     {
         return coeff(internal::customIndices2Array<Index,NumIndices>(indices));
     }
-#endif
 
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff() const
     {
@@ -150,7 +147,6 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
       return m_storage.data()[index];
     }
 
-#if EIGEN_HAS_VARIADIC_TEMPLATES
     template<typename... IndexTypes>
     inline Scalar& coeffRef(Index firstIndex, Index secondIndex, IndexTypes... otherIndices)
     {
@@ -158,7 +154,6 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
       EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
       return coeffRef(array<Index, NumIndices>{{firstIndex, secondIndex, otherIndices...}});
     }
-#endif
 
     // normal indices
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(const array<Index, NumIndices>& indices)
@@ -168,7 +163,6 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
     }
 
     // custom indices
-#ifdef EIGEN_HAS_SFINAE
     template<typename CustomIndices,
              EIGEN_SFINAE_ENABLE_IF( !(isOfNormalIndex<CustomIndices>::value) )
              >
@@ -176,7 +170,6 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
     {
         return coeffRef(internal::customIndices2Array<Index,NumIndices>(indices));
     }
-#endif
 
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef()
     {
@@ -190,7 +183,6 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
       return m_storage.data()[index];
     }
 
-#if EIGEN_HAS_VARIADIC_TEMPLATES
     template<typename... IndexTypes>
     inline const Scalar& operator()(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) const
     {
@@ -198,31 +190,8 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
       EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
       return this->operator()(array<Index, NumIndices>{{firstIndex, secondIndex, otherIndices...}});
     }
-#else
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1) const
-    {
-      return coeff(array<Index, 2>(i0, i1));
-    }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2) const
-    {
-      return coeff(array<Index, 3>(i0, i1, i2));
-    }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2, Index i3) const
-    {
-      return coeff(array<Index, 4>(i0, i1, i2, i3));
-    }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2, Index i3, Index i4) const
-    {
-      return coeff(array<Index, 5>(i0, i1, i2, i3, i4));
-    }
-#endif
 
     // custom indices
-#ifdef EIGEN_HAS_SFINAE
     template<typename CustomIndices,
              EIGEN_SFINAE_ENABLE_IF( !(isOfNormalIndex<CustomIndices>::value) )
     >
@@ -230,7 +199,6 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
     {
         return coeff(internal::customIndices2Array<Index,NumIndices>(indices));
     }
-#endif
 
     // normal indices
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(const array<Index, NumIndices>& indices) const
@@ -257,7 +225,6 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
       return coeff(index);
     }
 
-#if EIGEN_HAS_VARIADIC_TEMPLATES
     template<typename... IndexTypes>
     inline Scalar& operator()(Index firstIndex, Index secondIndex, IndexTypes... otherIndices)
     {
@@ -265,28 +232,6 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
       EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
       return operator()(array<Index, NumIndices>{{firstIndex, secondIndex, otherIndices...}});
     }
-#else
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1)
-    {
-      return coeffRef(array<Index, 2>(i0, i1));
-    }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2)
-    {
-      return coeffRef(array<Index, 3>(i0, i1, i2));
-    }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3)
-    {
-      return coeffRef(array<Index, 4>(i0, i1, i2, i3));
-    }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3, Index i4)
-    {
-      return coeffRef(array<Index, 5>(i0, i1, i2, i3, i4));
-    }
-#endif
 
     // normal indices
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()(const array<Index, NumIndices>& indices)
@@ -295,7 +240,6 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
     }
 
     // custom indices
-#ifdef EIGEN_HAS_SFINAE
     template<typename CustomIndices,
              EIGEN_SFINAE_ENABLE_IF( !(isOfNormalIndex<CustomIndices>::value) )
     >
@@ -303,7 +247,6 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
     {
       return coeffRef(internal::customIndices2Array<Index,NumIndices>(indices));
     }
-#endif
 
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()(Index index)
     {
@@ -332,11 +275,10 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
 
     EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Tensor(const Self& other)
-      : m_storage(other.m_storage)
+      : Base(other), m_storage(other.m_storage)
     {
     }
 
-#if EIGEN_HAS_VARIADIC_TEMPLATES
     template<typename... IndexTypes>
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor(Index firstDimension, IndexTypes... otherDimensions)
         : m_storage(firstDimension, otherDimensions...)
@@ -344,33 +286,6 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
       // The number of dimensions used to construct a tensor must be equal to the rank of the tensor.
       EIGEN_STATIC_ASSERT(sizeof...(otherDimensions) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
     }
-#else
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit Tensor(Index dim1)
-      : m_storage(dim1, array<Index, 1>(dim1))
-    {
-      EIGEN_STATIC_ASSERT(1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
-    }
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor(Index dim1, Index dim2)
-      : m_storage(dim1*dim2, array<Index, 2>(dim1, dim2))
-    {
-      EIGEN_STATIC_ASSERT(2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
-    }
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor(Index dim1, Index dim2, Index dim3)
-      : m_storage(dim1*dim2*dim3, array<Index, 3>(dim1, dim2, dim3))
-    {
-      EIGEN_STATIC_ASSERT(3 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
-    }
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor(Index dim1, Index dim2, Index dim3, Index dim4)
-      : m_storage(dim1*dim2*dim3*dim4, array<Index, 4>(dim1, dim2, dim3, dim4))
-    {
-      EIGEN_STATIC_ASSERT(4 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
-    }
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor(Index dim1, Index dim2, Index dim3, Index dim4, Index dim5)
-      : m_storage(dim1*dim2*dim3*dim4*dim5, array<Index, 5>(dim1, dim2, dim3, dim4, dim5))
-    {
-      EIGEN_STATIC_ASSERT(5 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
-    }
-#endif
 
     /** Normal Dimension */
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit Tensor(const array<Index, NumIndices>& dimensions)
@@ -399,7 +314,6 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
       internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
     }
 
-    #if EIGEN_HAS_RVALUE_REFERENCES
     EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Tensor(Self&& other)
       : m_storage(std::move(other.m_storage))
@@ -411,7 +325,6 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
       m_storage = std::move(other.m_storage);
       return *this;
     }
-    #endif
 
     EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Tensor& operator=(const Tensor& other)
@@ -433,7 +346,6 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
       return *this;
     }
 
-#if EIGEN_HAS_VARIADIC_TEMPLATES
     template<typename... IndexTypes> EIGEN_DEVICE_FUNC
     void resize(Index firstDimension, IndexTypes... otherDimensions)
     {
@@ -441,7 +353,6 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
       EIGEN_STATIC_ASSERT(sizeof...(otherDimensions) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
       resize(array<Index, NumIndices>{{firstDimension, otherDimensions...}});
     }
-#endif
 
     /** Normal Dimension */
     EIGEN_DEVICE_FUNC void resize(const array<Index, NumIndices>& dimensions)
@@ -477,7 +388,6 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
       // Nothing to do: rank 0 tensors have fixed size
     }
 
-#ifdef EIGEN_HAS_INDEX_LIST
     template <typename FirstType, typename... OtherTypes>
     EIGEN_DEVICE_FUNC
     void resize(const Eigen::IndexList<FirstType, OtherTypes...>& dimensions) {
@@ -487,10 +397,8 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
       }
       resize(dims);
     }
-#endif
 
     /** Custom Dimension */
-#ifdef EIGEN_HAS_SFINAE
     template<typename CustomDimension,
              EIGEN_SFINAE_ENABLE_IF( !(isOfNormalIndex<CustomDimension>::value) )
     >
@@ -498,7 +406,6 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
     {
       resize(internal::customIndices2Array<Index,NumIndices>(dimensions));
     }
-#endif
 
 #ifndef EIGEN_EMULATE_CXX11_META_H
     template <typename std::ptrdiff_t... Indices>
@@ -522,6 +429,10 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
     }
 #endif
 
+    #ifdef EIGEN_TENSOR_PLUGIN
+    #include EIGEN_TENSOR_PLUGIN
+    #endif
+
   protected:
 
     bool checkIndexRange(const array<Index, NumIndices>& indices) const
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h
index 8b8fb92..0d60327 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h
@@ -11,60 +11,62 @@
 #ifndef EIGEN_CXX11_TENSOR_TENSOR_ARG_MAX_H
 #define EIGEN_CXX11_TENSOR_TENSOR_ARG_MAX_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 namespace internal {
 
-/** \class TensorIndexTuple
+/** \class TensorIndexPair
   * \ingroup CXX11_Tensor_Module
   *
-  * \brief Tensor + Index Tuple class.
+  * \brief Tensor + Index Pair class.
   *
   *
   */
 template<typename XprType>
-struct traits<TensorIndexTupleOp<XprType> > : public traits<XprType>
+struct traits<TensorIndexPairOp<XprType> > : public traits<XprType>
 {
   typedef traits<XprType> XprTraits;
   typedef typename XprTraits::StorageKind StorageKind;
   typedef typename XprTraits::Index Index;
-  typedef Tuple<Index, typename XprTraits::Scalar> Scalar;
+  typedef Pair<Index, typename XprTraits::Scalar> Scalar;
   typedef typename XprType::Nested Nested;
-  typedef typename remove_reference<Nested>::type _Nested;
-  static const int NumDimensions = XprTraits::NumDimensions;
-  static const int Layout = XprTraits::Layout;
+  typedef std::remove_reference_t<Nested> Nested_;
+  static constexpr int NumDimensions = XprTraits::NumDimensions;
+  static constexpr int Layout = XprTraits::Layout;
 };
 
 template<typename XprType>
-struct eval<TensorIndexTupleOp<XprType>, Eigen::Dense>
+struct eval<TensorIndexPairOp<XprType>, Eigen::Dense>
 {
-  typedef const TensorIndexTupleOp<XprType>EIGEN_DEVICE_REF type;
+  typedef const TensorIndexPairOp<XprType>EIGEN_DEVICE_REF type;
 };
 
 template<typename XprType>
-struct nested<TensorIndexTupleOp<XprType>, 1,
-              typename eval<TensorIndexTupleOp<XprType> >::type>
+struct nested<TensorIndexPairOp<XprType>, 1,
+              typename eval<TensorIndexPairOp<XprType> >::type>
 {
-  typedef TensorIndexTupleOp<XprType> type;
+  typedef TensorIndexPairOp<XprType> type;
 };
 
 }  // end namespace internal
 
 template<typename XprType>
-class TensorIndexTupleOp : public TensorBase<TensorIndexTupleOp<XprType>, ReadOnlyAccessors>
+class TensorIndexPairOp : public TensorBase<TensorIndexPairOp<XprType>, ReadOnlyAccessors>
 {
   public:
-  typedef typename Eigen::internal::traits<TensorIndexTupleOp>::Scalar Scalar;
+  typedef typename Eigen::internal::traits<TensorIndexPairOp>::Scalar Scalar;
   typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
-  typedef typename Eigen::internal::nested<TensorIndexTupleOp>::type Nested;
-  typedef typename Eigen::internal::traits<TensorIndexTupleOp>::StorageKind StorageKind;
-  typedef typename Eigen::internal::traits<TensorIndexTupleOp>::Index Index;
-  typedef Tuple<Index, typename XprType::CoeffReturnType> CoeffReturnType;
+  typedef typename Eigen::internal::nested<TensorIndexPairOp>::type Nested;
+  typedef typename Eigen::internal::traits<TensorIndexPairOp>::StorageKind StorageKind;
+  typedef typename Eigen::internal::traits<TensorIndexPairOp>::Index Index;
+  typedef Pair<Index, typename XprType::CoeffReturnType> CoeffReturnType;
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIndexTupleOp(const XprType& expr)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIndexPairOp(const XprType& expr)
       : m_xpr(expr) {}
 
   EIGEN_DEVICE_FUNC
-  const typename internal::remove_all<typename XprType::Nested>::type&
+  const internal::remove_all_t<typename XprType::Nested>&
   expression() const { return m_xpr; }
 
   protected:
@@ -73,15 +75,15 @@ class TensorIndexTupleOp : public TensorBase<TensorIndexTupleOp<XprType>, ReadOn
 
 // Eval as rvalue
 template<typename ArgType, typename Device>
-struct TensorEvaluator<const TensorIndexTupleOp<ArgType>, Device>
+struct TensorEvaluator<const TensorIndexPairOp<ArgType>, Device>
 {
-  typedef TensorIndexTupleOp<ArgType> XprType;
+  typedef TensorIndexPairOp<ArgType> XprType;
   typedef typename XprType::Index Index;
   typedef typename XprType::Scalar Scalar;
   typedef typename XprType::CoeffReturnType CoeffReturnType;
 
   typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
-  static const int NumDims = internal::array_size<Dimensions>::value;
+  static constexpr int NumDims = internal::array_size<Dimensions>::value;
   typedef StorageMemory<CoeffReturnType, Device> Storage;
   typedef typename Storage::Type EvaluatorPointerType;
 
@@ -90,10 +92,10 @@ struct TensorEvaluator<const TensorIndexTupleOp<ArgType>, Device>
     PacketAccess = /*TensorEvaluator<ArgType, Device>::PacketAccess*/ false,
     BlockAccess = false,
     PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
-    Layout = TensorEvaluator<ArgType, Device>::Layout,
     CoordAccess = false,  // to be implemented
     RawAccess = false
   };
+  static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
 
   //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
   typedef internal::TensorBlockNotImplemented TensorBlock;
@@ -138,59 +140,59 @@ struct TensorEvaluator<const TensorIndexTupleOp<ArgType>, Device>
 
 namespace internal {
 
-/** \class TensorTupleIndex
+/** \class TensorPairIndex
   * \ingroup CXX11_Tensor_Module
   *
-  * \brief Converts to Tensor<Tuple<Index, Scalar> > and reduces to Tensor<Index>.
+  * \brief Converts to Tensor<Pair<Index, Scalar> > and reduces to Tensor<Index>.
   *
   */
 template<typename ReduceOp, typename Dims, typename XprType>
-struct traits<TensorTupleReducerOp<ReduceOp, Dims, XprType> > : public traits<XprType>
+struct traits<TensorPairReducerOp<ReduceOp, Dims, XprType> > : public traits<XprType>
 {
   typedef traits<XprType> XprTraits;
   typedef typename XprTraits::StorageKind StorageKind;
   typedef typename XprTraits::Index Index;
   typedef Index Scalar;
   typedef typename XprType::Nested Nested;
-  typedef typename remove_reference<Nested>::type _Nested;
-  static const int NumDimensions = XprTraits::NumDimensions - array_size<Dims>::value;
-  static const int Layout = XprTraits::Layout;
+  typedef std::remove_reference_t<Nested> Nested_;
+  static constexpr int NumDimensions = XprTraits::NumDimensions - array_size<Dims>::value;
+  static constexpr int Layout = XprTraits::Layout;
 };
 
 template<typename ReduceOp, typename Dims, typename XprType>
-struct eval<TensorTupleReducerOp<ReduceOp, Dims, XprType>, Eigen::Dense>
+struct eval<TensorPairReducerOp<ReduceOp, Dims, XprType>, Eigen::Dense>
 {
-  typedef const TensorTupleReducerOp<ReduceOp, Dims, XprType>EIGEN_DEVICE_REF type;
+  typedef const TensorPairReducerOp<ReduceOp, Dims, XprType>EIGEN_DEVICE_REF type;
 };
 
 template<typename ReduceOp, typename Dims, typename XprType>
-struct nested<TensorTupleReducerOp<ReduceOp, Dims, XprType>, 1,
-              typename eval<TensorTupleReducerOp<ReduceOp, Dims, XprType> >::type>
+struct nested<TensorPairReducerOp<ReduceOp, Dims, XprType>, 1,
+              typename eval<TensorPairReducerOp<ReduceOp, Dims, XprType> >::type>
 {
-  typedef TensorTupleReducerOp<ReduceOp, Dims, XprType> type;
+  typedef TensorPairReducerOp<ReduceOp, Dims, XprType> type;
 };
 
 }  // end namespace internal
 
 template<typename ReduceOp, typename Dims, typename XprType>
-class TensorTupleReducerOp : public TensorBase<TensorTupleReducerOp<ReduceOp, Dims, XprType>, ReadOnlyAccessors>
+class TensorPairReducerOp : public TensorBase<TensorPairReducerOp<ReduceOp, Dims, XprType>, ReadOnlyAccessors>
 {
   public:
-  typedef typename Eigen::internal::traits<TensorTupleReducerOp>::Scalar Scalar;
+  typedef typename Eigen::internal::traits<TensorPairReducerOp>::Scalar Scalar;
   typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
-  typedef typename Eigen::internal::nested<TensorTupleReducerOp>::type Nested;
-  typedef typename Eigen::internal::traits<TensorTupleReducerOp>::StorageKind StorageKind;
-  typedef typename Eigen::internal::traits<TensorTupleReducerOp>::Index Index;
+  typedef typename Eigen::internal::nested<TensorPairReducerOp>::type Nested;
+  typedef typename Eigen::internal::traits<TensorPairReducerOp>::StorageKind StorageKind;
+  typedef typename Eigen::internal::traits<TensorPairReducerOp>::Index Index;
   typedef Index CoeffReturnType;
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorTupleReducerOp(const XprType& expr,
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorPairReducerOp(const XprType& expr,
                                                           const ReduceOp& reduce_op,
                                                           const Index return_dim,
                                                           const Dims& reduce_dims)
       : m_xpr(expr), m_reduce_op(reduce_op), m_return_dim(return_dim), m_reduce_dims(reduce_dims) {}
 
   EIGEN_DEVICE_FUNC
-  const typename internal::remove_all<typename XprType::Nested>::type&
+  const internal::remove_all_t<typename XprType::Nested>&
   expression() const { return m_xpr; }
 
   EIGEN_DEVICE_FUNC
@@ -211,38 +213,37 @@ class TensorTupleReducerOp : public TensorBase<TensorTupleReducerOp<ReduceOp, Di
 
 // Eval as rvalue
 template<typename ReduceOp, typename Dims, typename ArgType, typename Device>
-struct TensorEvaluator<const TensorTupleReducerOp<ReduceOp, Dims, ArgType>, Device>
+struct TensorEvaluator<const TensorPairReducerOp<ReduceOp, Dims, ArgType>, Device>
 {
-  typedef TensorTupleReducerOp<ReduceOp, Dims, ArgType> XprType;
+  typedef TensorPairReducerOp<ReduceOp, Dims, ArgType> XprType;
   typedef typename XprType::Index Index;
   typedef typename XprType::Scalar Scalar;
   typedef typename XprType::CoeffReturnType CoeffReturnType;
-  typedef typename TensorIndexTupleOp<ArgType>::CoeffReturnType TupleType;
-  typedef typename TensorEvaluator<const TensorReductionOp<ReduceOp, Dims, const TensorIndexTupleOp<ArgType> >, Device>::Dimensions Dimensions;
-  typedef typename TensorEvaluator<const TensorIndexTupleOp<ArgType> , Device>::Dimensions InputDimensions;
-  static const int NumDims = internal::array_size<InputDimensions>::value;
+  typedef typename TensorIndexPairOp<ArgType>::CoeffReturnType PairType;
+  typedef typename TensorEvaluator<const TensorReductionOp<ReduceOp, Dims, const TensorIndexPairOp<ArgType> >, Device>::Dimensions Dimensions;
+  typedef typename TensorEvaluator<const TensorIndexPairOp<ArgType> , Device>::Dimensions InputDimensions;
+  static constexpr int NumDims = internal::array_size<InputDimensions>::value;
   typedef array<Index, NumDims> StrideDims;
   typedef StorageMemory<CoeffReturnType, Device> Storage;
   typedef typename Storage::Type EvaluatorPointerType;
-  typedef StorageMemory<TupleType, Device> TupleStorageMem;
+  typedef StorageMemory<PairType, Device> PairStorageMem;
 
   enum {
     IsAligned         = /*TensorEvaluator<ArgType, Device>::IsAligned*/ false,
     PacketAccess      = /*TensorEvaluator<ArgType, Device>::PacketAccess*/ false,
     BlockAccess       = false,
     PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
-    Layout            = TensorEvaluator<const TensorReductionOp<ReduceOp, Dims, const TensorIndexTupleOp<ArgType> >, Device>::Layout,
     CoordAccess       = false,  // to be implemented
     RawAccess         = false
   };
-
+  static constexpr int Layout = TensorEvaluator<const TensorReductionOp<ReduceOp, Dims, const TensorIndexPairOp<ArgType>>, Device>::Layout;
   //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
   typedef internal::TensorBlockNotImplemented TensorBlock;
   //===--------------------------------------------------------------------===//
 
   EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
       : m_orig_impl(op.expression(), device),
-        m_impl(op.expression().index_tuples().reduce(op.reduce_dims(), op.reduce_op()), device),
+        m_impl(op.expression().index_pairs().reduce(op.reduce_dims(), op.reduce_op()), device),
         m_return_dim(op.return_dim())
   {
     gen_strides(m_orig_impl.dimensions(), m_strides);
@@ -272,7 +273,7 @@ struct TensorEvaluator<const TensorTupleReducerOp<ReduceOp, Dims, ArgType>, Devi
   }
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
-    const TupleType v = m_impl.coeff(index);
+    const PairType v = m_impl.coeff(index);
     return (m_return_dim < 0) ? v.first : (v.first % m_stride_mod) / m_stride_div;
   }
 
@@ -316,8 +317,8 @@ struct TensorEvaluator<const TensorTupleReducerOp<ReduceOp, Dims, ArgType>, Devi
   }
 
  protected:
-  TensorEvaluator<const TensorIndexTupleOp<ArgType>, Device> m_orig_impl;
-  TensorEvaluator<const TensorReductionOp<ReduceOp, Dims, const TensorIndexTupleOp<ArgType> >, Device> m_impl;
+  TensorEvaluator<const TensorIndexPairOp<ArgType>, Device> m_orig_impl;
+  TensorEvaluator<const TensorReductionOp<ReduceOp, Dims, const TensorIndexPairOp<ArgType> >, Device> m_impl;
   const Index m_return_dim;
   StrideDims m_strides;
   Index m_stride_mod;
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h
index e5811d6..0bd3a00 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_CXX11_TENSOR_TENSOR_ASSIGN_H
 #define EIGEN_CXX11_TENSOR_TENSOR_ASSIGN_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /** \class TensorAssign
@@ -30,10 +32,10 @@ struct traits<TensorAssignOp<LhsXprType, RhsXprType> >
                                       typename traits<RhsXprType>::Index>::type Index;
   typedef typename LhsXprType::Nested LhsNested;
   typedef typename RhsXprType::Nested RhsNested;
-  typedef typename remove_reference<LhsNested>::type _LhsNested;
-  typedef typename remove_reference<RhsNested>::type _RhsNested;
-  static const std::size_t NumDimensions = internal::traits<LhsXprType>::NumDimensions;
-  static const int Layout = internal::traits<LhsXprType>::Layout;
+  typedef std::remove_reference_t<LhsNested> LhsNested_;
+  typedef std::remove_reference_t<RhsNested> RhsNested_;
+  static constexpr std::size_t NumDimensions = internal::traits<LhsXprType>::NumDimensions;
+  static constexpr int Layout = internal::traits<LhsXprType>::Layout;
   typedef typename traits<LhsXprType>::PointerType PointerType;
 
   enum {
@@ -68,23 +70,23 @@ class TensorAssignOp : public TensorBase<TensorAssignOp<LhsXprType, RhsXprType>
   typedef typename Eigen::internal::traits<TensorAssignOp>::StorageKind StorageKind;
   typedef typename Eigen::internal::traits<TensorAssignOp>::Index Index;
 
-  static const int NumDims = Eigen::internal::traits<TensorAssignOp>::NumDimensions;
+  static constexpr int NumDims = Eigen::internal::traits<TensorAssignOp>::NumDimensions;
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorAssignOp(LhsXprType& lhs, const RhsXprType& rhs)
       : m_lhs_xpr(lhs), m_rhs_xpr(rhs) {}
 
     /** \returns the nested expressions */
     EIGEN_DEVICE_FUNC
-    typename internal::remove_all<typename LhsXprType::Nested>::type&
-    lhsExpression() const { return *((typename internal::remove_all<typename LhsXprType::Nested>::type*)&m_lhs_xpr); }
+    internal::remove_all_t<typename LhsXprType::Nested>&
+    lhsExpression() const { return *((internal::remove_all_t<typename LhsXprType::Nested>*)&m_lhs_xpr); }
 
     EIGEN_DEVICE_FUNC
-    const typename internal::remove_all<typename RhsXprType::Nested>::type&
+    const internal::remove_all_t<typename RhsXprType::Nested>&
     rhsExpression() const { return m_rhs_xpr; }
 
   protected:
-    typename internal::remove_all<typename LhsXprType::Nested>::type& m_lhs_xpr;
-    const typename internal::remove_all<typename RhsXprType::Nested>::type& m_rhs_xpr;
+    internal::remove_all_t<typename LhsXprType::Nested>& m_lhs_xpr;
+    const internal::remove_all_t<typename RhsXprType::Nested>& m_rhs_xpr;
 };
 
 
@@ -100,8 +102,9 @@ struct TensorEvaluator<const TensorAssignOp<LeftArgType, RightArgType>, Device>
   typedef StorageMemory<CoeffReturnType, Device> Storage;
   typedef typename Storage::Type EvaluatorPointerType;
 
-  static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
-  static const int NumDims = XprType::NumDims;
+  static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
+  static constexpr int NumDims = XprType::NumDims;
+  static constexpr int Layout = TensorEvaluator<LeftArgType, Device>::Layout;
 
   enum {
     IsAligned         = int(TensorEvaluator<LeftArgType, Device>::IsAligned) &
@@ -112,7 +115,6 @@ struct TensorEvaluator<const TensorAssignOp<LeftArgType, RightArgType>, Device>
                         int(TensorEvaluator<RightArgType, Device>::BlockAccess),
     PreferBlockAccess = int(TensorEvaluator<LeftArgType, Device>::PreferBlockAccess) |
                         int(TensorEvaluator<RightArgType, Device>::PreferBlockAccess),
-    Layout            = TensorEvaluator<LeftArgType, Device>::Layout,
     RawAccess         = TensorEvaluator<LeftArgType, Device>::RawAccess
   };
 
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
index 35b6458..a4ac2ad 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
@@ -12,6 +12,8 @@
 
 // clang-format off
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /** \class TensorBase
@@ -32,8 +34,8 @@ class TensorBase<Derived, ReadOnlyAccessors>
     typedef internal::traits<Derived> DerivedTraits;
     typedef typename DerivedTraits::Scalar Scalar;
     typedef typename DerivedTraits::Index Index;
-    typedef typename internal::remove_const<Scalar>::type CoeffReturnType;
-    static const int NumDimensions = DerivedTraits::NumDimensions;
+    typedef std::remove_const_t<Scalar> CoeffReturnType;
+    static constexpr int NumDimensions = DerivedTraits::NumDimensions;
 
     // Generic nullary operation support.
     template <typename CustomNullaryOp> EIGEN_DEVICE_FUNC
@@ -309,6 +311,12 @@ class TensorBase<Derived, ReadOnlyAccessors>
       return unaryExpr(internal::scalar_abs_op<Scalar>());
     }
 
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_arg_op<Scalar>, const Derived>
+    arg() const {
+      return unaryExpr(internal::scalar_arg_op<Scalar>());
+    }
+
     EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_clamp_op<Scalar>, const Derived>
     clip(Scalar min, Scalar max) const {
@@ -316,17 +324,19 @@ class TensorBase<Derived, ReadOnlyAccessors>
     }
 
     EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const typename internal::conditional<NumTraits<CoeffReturnType>::IsComplex,
-                                                             TensorCwiseUnaryOp<internal::scalar_conjugate_op<Scalar>, const Derived>,
-                                                             Derived>::type
+    EIGEN_STRONG_INLINE const std::conditional_t<NumTraits<CoeffReturnType>::IsComplex,
+                                                      TensorCwiseUnaryOp<internal::scalar_conjugate_op<Scalar>, const Derived>,
+                                                      Derived>
     conjugate() const {
       return choose(Cond<NumTraits<CoeffReturnType>::IsComplex>(), unaryExpr(internal::scalar_conjugate_op<Scalar>()), derived());
     }
 
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::bind2nd_op<internal::scalar_pow_op<Scalar,Scalar> >, const Derived>
-    pow(Scalar exponent) const {
-      return unaryExpr(internal::bind2nd_op<internal::scalar_pow_op<Scalar,Scalar> >(exponent));
+    template<typename ScalarExponent>
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const std::enable_if_t<internal::is_arithmetic<typename NumTraits<ScalarExponent>::Real>::value,
+        TensorCwiseUnaryOp<internal::scalar_unary_pow_op<Scalar, ScalarExponent>, const Derived>>
+        pow(ScalarExponent exponent) const
+    {
+        return unaryExpr(internal::scalar_unary_pow_op<Scalar, ScalarExponent>(exponent));
     }
 
     EIGEN_DEVICE_FUNC
@@ -417,9 +427,9 @@ class TensorBase<Derived, ReadOnlyAccessors>
 
     template<typename NewType>
     EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const typename internal::conditional<internal::is_same<NewType, CoeffReturnType>::value,
-                                                             Derived,
-                                                             TensorConversionOp<NewType, const Derived> >::type
+    EIGEN_STRONG_INLINE const std::conditional_t<internal::is_same<NewType, CoeffReturnType>::value,
+                                                      Derived,
+                                                      TensorConversionOp<NewType, const Derived> >
     cast() const {
       return choose(Cond<internal::is_same<NewType, CoeffReturnType>::value>(), derived(), TensorConversionOp<NewType, const Derived>(derived()));
     }
@@ -513,34 +523,34 @@ class TensorBase<Derived, ReadOnlyAccessors>
     // Comparisons and tests.
     template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_LT>, const Derived, const OtherDerived>
-    operator<(const OtherDerived& other) const {
+    operator<(const TensorBase<OtherDerived, ReadOnlyAccessors>& other) const {
       return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_LT>());
     }
     template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_LE>, const Derived, const OtherDerived>
-    operator<=(const OtherDerived& other) const {
+    operator<=(const TensorBase<OtherDerived, ReadOnlyAccessors>& other) const {
       return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_LE>());
     }
     template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_GT>, const Derived, const OtherDerived>
-    operator>(const OtherDerived& other) const {
+    operator>(const TensorBase<OtherDerived, ReadOnlyAccessors>& other) const {
       return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_GT>());
     }
     template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_GE>, const Derived, const OtherDerived>
-    operator>=(const OtherDerived& other) const {
+    operator>=(const TensorBase<OtherDerived, ReadOnlyAccessors>& other) const {
       return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_GE>());
     }
 
     template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_EQ>, const Derived, const OtherDerived>
-    operator==(const OtherDerived& other) const {
+    operator==(const TensorBase<OtherDerived, ReadOnlyAccessors>& other) const {
       return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_EQ>());
     }
 
     template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_NEQ>, const Derived, const OtherDerived>
-    operator!=(const OtherDerived& other) const {
+    operator!=(const TensorBase<OtherDerived, ReadOnlyAccessors>& other) const {
       return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_NEQ>());
     }
 
@@ -715,81 +725,81 @@ class TensorBase<Derived, ReadOnlyAccessors>
     }
 
     template <typename Dims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorReductionOp<internal::AndReducer, const Dims, const typename internal::conditional<internal::is_same<bool, CoeffReturnType>::value, Derived, TensorConversionOp<bool, const Derived> >::type >
+    const TensorReductionOp<internal::AndReducer, const Dims, const std::conditional_t<internal::is_same<bool, CoeffReturnType>::value, Derived, TensorConversionOp<bool, const Derived> > >
     all(const Dims& dims) const {
       return cast<bool>().reduce(dims, internal::AndReducer());
     }
 
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorReductionOp<internal::AndReducer, const DimensionList<Index, NumDimensions>, const typename internal::conditional<internal::is_same<bool, CoeffReturnType>::value, Derived, TensorConversionOp<bool, const Derived> >::type >
+    const TensorReductionOp<internal::AndReducer, const DimensionList<Index, NumDimensions>, const std::conditional_t<internal::is_same<bool, CoeffReturnType>::value, Derived, TensorConversionOp<bool, const Derived> > >
     all() const {
       DimensionList<Index, NumDimensions> in_dims;
       return cast<bool>().reduce(in_dims, internal::AndReducer());
     }
 
     template <typename Dims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorReductionOp<internal::OrReducer, const Dims, const typename internal::conditional<internal::is_same<bool, CoeffReturnType>::value, Derived, TensorConversionOp<bool, const Derived> >::type >
+    const TensorReductionOp<internal::OrReducer, const Dims, const std::conditional_t<internal::is_same<bool, CoeffReturnType>::value, Derived, TensorConversionOp<bool, const Derived> > >
     any(const Dims& dims) const {
       return cast<bool>().reduce(dims, internal::OrReducer());
     }
 
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorReductionOp<internal::OrReducer, const DimensionList<Index, NumDimensions>, const typename internal::conditional<internal::is_same<bool, CoeffReturnType>::value, Derived, TensorConversionOp<bool, const Derived> >::type >
+    const TensorReductionOp<internal::OrReducer, const DimensionList<Index, NumDimensions>, const std::conditional_t<internal::is_same<bool, CoeffReturnType>::value, Derived, TensorConversionOp<bool, const Derived> > >
     any() const {
       DimensionList<Index, NumDimensions> in_dims;
       return cast<bool>().reduce(in_dims, internal::OrReducer());
     }
 
    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorTupleReducerOp<
-      internal::ArgMaxTupleReducer<Tuple<Index, CoeffReturnType> >,
+    const TensorPairReducerOp<
+      internal::ArgMaxPairReducer<Pair<Index, CoeffReturnType> >,
       const array<Index, NumDimensions>, const Derived>
     argmax() const {
       array<Index, NumDimensions> in_dims;
       for (Index d = 0; d < NumDimensions; ++d) in_dims[d] = d;
-      return TensorTupleReducerOp<
-        internal::ArgMaxTupleReducer<Tuple<Index, CoeffReturnType> >,
+      return TensorPairReducerOp<
+        internal::ArgMaxPairReducer<Pair<Index, CoeffReturnType> >,
         const array<Index, NumDimensions>,
-        const Derived>(derived(), internal::ArgMaxTupleReducer<Tuple<Index, CoeffReturnType> >(), -1, in_dims);
+        const Derived>(derived(), internal::ArgMaxPairReducer<Pair<Index, CoeffReturnType> >(), -1, in_dims);
     }
 
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorTupleReducerOp<
-      internal::ArgMinTupleReducer<Tuple<Index, CoeffReturnType> >,
+    const TensorPairReducerOp<
+      internal::ArgMinPairReducer<Pair<Index, CoeffReturnType> >,
       const array<Index, NumDimensions>, const Derived>
     argmin() const {
       array<Index, NumDimensions> in_dims;
       for (Index d = 0; d < NumDimensions; ++d) in_dims[d] = d;
-      return TensorTupleReducerOp<
-        internal::ArgMinTupleReducer<Tuple<Index, CoeffReturnType> >,
+      return TensorPairReducerOp<
+        internal::ArgMinPairReducer<Pair<Index, CoeffReturnType> >,
         const array<Index, NumDimensions>,
-        const Derived>(derived(), internal::ArgMinTupleReducer<Tuple<Index, CoeffReturnType> >(), -1, in_dims);
+        const Derived>(derived(), internal::ArgMinPairReducer<Pair<Index, CoeffReturnType> >(), -1, in_dims);
     }
 
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorTupleReducerOp<
-      internal::ArgMaxTupleReducer<Tuple<Index, CoeffReturnType> >,
+    const TensorPairReducerOp<
+      internal::ArgMaxPairReducer<Pair<Index, CoeffReturnType> >,
       const array<Index, 1>, const Derived>
     argmax(const Index return_dim) const {
       array<Index, 1> in_dims;
       in_dims[0] = return_dim;
-      return TensorTupleReducerOp<
-        internal::ArgMaxTupleReducer<Tuple<Index, CoeffReturnType> >,
+      return TensorPairReducerOp<
+        internal::ArgMaxPairReducer<Pair<Index, CoeffReturnType> >,
         const array<Index, 1>,
-        const Derived>(derived(), internal::ArgMaxTupleReducer<Tuple<Index, CoeffReturnType> >(), return_dim, in_dims);
+        const Derived>(derived(), internal::ArgMaxPairReducer<Pair<Index, CoeffReturnType> >(), return_dim, in_dims);
     }
 
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorTupleReducerOp<
-      internal::ArgMinTupleReducer<Tuple<Index, CoeffReturnType> >,
+    const TensorPairReducerOp<
+      internal::ArgMinPairReducer<Pair<Index, CoeffReturnType> >,
       const array<Index, 1>, const Derived>
     argmin(const Index return_dim) const {
       array<Index, 1> in_dims;
       in_dims[0] = return_dim;
-      return TensorTupleReducerOp<
-        internal::ArgMinTupleReducer<Tuple<Index, CoeffReturnType> >,
+      return TensorPairReducerOp<
+        internal::ArgMinPairReducer<Pair<Index, CoeffReturnType> >,
         const array<Index, 1>,
-        const Derived>(derived(), internal::ArgMinTupleReducer<Tuple<Index, CoeffReturnType> >(), return_dim, in_dims);
+        const Derived>(derived(), internal::ArgMinPairReducer<Pair<Index, CoeffReturnType> >(), return_dim, in_dims);
     }
 
     template <typename Reducer, typename Dims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
@@ -935,11 +945,11 @@ class TensorBase<Derived, ReadOnlyAccessors>
       return TensorInflationOp<const Strides, const Derived>(derived(), strides);
     }
 
-    // Returns a tensor containing index/value tuples
+    // Returns a tensor containing index/value pairs
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorIndexTupleOp<const Derived>
-    index_tuples() const {
-      return TensorIndexTupleOp<const Derived>(derived());
+    const TensorIndexPairOp<const Derived>
+    index_pairs() const {
+      return TensorIndexPairOp<const Derived>(derived());
     }
 
     // Support for custom unary and binary operations
@@ -960,6 +970,15 @@ class TensorBase<Derived, ReadOnlyAccessors>
       return TensorForcedEvalOp<const Derived>(derived());
     }
 
+    // Returns a formatted tensor ready for printing to a stream
+    inline const TensorWithFormat<Derived,DerivedTraits::Layout,DerivedTraits::NumDimensions> format(const TensorIOFormat& fmt) const {
+      return TensorWithFormat<Derived,DerivedTraits::Layout,DerivedTraits::NumDimensions>(derived(), fmt);
+    }
+
+    #ifdef EIGEN_READONLY_TENSORBASE_PLUGIN
+    #include EIGEN_READONLY_TENSORBASE_PLUGIN
+    #endif
+
   protected:
     template <typename Scalar, int NumIndices, int Options, typename IndexType> friend class Tensor;
     template <typename Scalar, typename Dimensions, int Option, typename IndexTypes> friend class TensorFixedSize;
@@ -977,7 +996,7 @@ class TensorBase : public TensorBase<Derived, ReadOnlyAccessors> {
     typedef typename DerivedTraits::Scalar Scalar;
     typedef typename DerivedTraits::Index Index;
     typedef Scalar CoeffReturnType;
-    static const int NumDimensions = DerivedTraits::NumDimensions;
+    static constexpr int NumDimensions = DerivedTraits::NumDimensions;
 
     template <typename Scalar, int NumIndices, int Options, typename IndexType> friend class Tensor;
     template <typename Scalar, typename Dimensions, int Option, typename IndexTypes> friend class TensorFixedSize;
@@ -1001,7 +1020,6 @@ class TensorBase : public TensorBase<Derived, ReadOnlyAccessors> {
       return derived() = this->template random<RandomGenerator>();
     }
 
-#if EIGEN_HAS_VARIADIC_TEMPLATES
     EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Derived& setValues(
         const typename internal::Initializer<Derived, NumDimensions>::InitList& vals) {
@@ -1009,7 +1027,6 @@ class TensorBase : public TensorBase<Derived, ReadOnlyAccessors> {
       internal::initialize_tensor<Derived, NumDimensions>(eval, vals);
       return derived();
     }
-#endif  // EIGEN_HAS_VARIADIC_TEMPLATES
 
     template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     Derived& operator+=(const OtherDerived& other) {
@@ -1152,6 +1169,10 @@ class TensorBase : public TensorBase<Derived, ReadOnlyAccessors> {
       return TensorAsyncDevice<Derived, DeviceType, DoneCallback>(dev, derived(), std::move(done));
     }
 
+    #ifdef EIGEN_TENSORBASE_PLUGIN
+    #include EIGEN_TENSORBASE_PLUGIN
+    #endif
+
  protected:
     EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(TensorBase)
     EIGEN_DEFAULT_COPY_CONSTRUCTOR(TensorBase)
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h
index 1e55d12..2e66340 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h
@@ -8,6 +8,8 @@
 #ifndef EIGEN_CXX11_TENSOR_TENSOR_BLOCK_H
 #define EIGEN_CXX11_TENSOR_TENSOR_BLOCK_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 namespace internal {
 
@@ -242,7 +244,7 @@ class TensorBlockDescriptor {
     const DestinationBufferKind& kind() const { return m_kind; }
 
    private:
-    friend class TensorBlockDescriptor;
+    friend class TensorBlockDescriptor<NumDims, IndexType>;
 
     DestinationBuffer() : m_data(NULL), m_data_type_size(0), m_kind(kEmpty) {}
 
@@ -706,7 +708,7 @@ class TensorMaterializedBlock {
     }
 
    private:
-    friend class TensorMaterializedBlock;
+    friend class TensorMaterializedBlock<Scalar, NumDims, Layout, IndexType>;
 
     Storage(Scalar* data, const Dimensions& dimensions,
             const Dimensions& strides, bool materialized_in_output,
@@ -833,14 +835,14 @@ class TensorMaterializedBlock {
 
 template <typename UnaryOp, typename ArgTensorBlock>
 class TensorCwiseUnaryBlock {
-  static const bool NoArgBlockAccess =
+  static constexpr bool NoArgBlockAccess =
       internal::is_void<typename ArgTensorBlock::XprType>::value;
 
  public:
-  typedef typename conditional<
+  typedef std::conditional_t<
       NoArgBlockAccess, void,
-      TensorCwiseUnaryOp<UnaryOp, const typename ArgTensorBlock::XprType> >::
-      type XprType;
+      TensorCwiseUnaryOp<UnaryOp, const typename ArgTensorBlock::XprType> >
+      XprType;
 
   typedef typename XprScalar<XprType>::type Scalar;
 
@@ -864,15 +866,15 @@ class TensorCwiseUnaryBlock {
 
 template <typename BinaryOp, typename LhsTensorBlock, typename RhsTensorBlock>
 class TensorCwiseBinaryBlock {
-  static const bool NoArgBlockAccess =
+  static constexpr bool NoArgBlockAccess =
       internal::is_void<typename LhsTensorBlock::XprType>::value ||
       internal::is_void<typename RhsTensorBlock::XprType>::value;
 
  public:
-  typedef typename conditional<
+  typedef std::conditional_t<
       NoArgBlockAccess, void,
       TensorCwiseBinaryOp<BinaryOp, const typename LhsTensorBlock::XprType,
-                          const typename RhsTensorBlock::XprType> >::type
+                          const typename RhsTensorBlock::XprType> >
       XprType;
 
   typedef typename XprScalar<XprType>::type Scalar;
@@ -911,12 +913,12 @@ class TensorCwiseBinaryBlock {
 template <typename BlockFactory, typename ArgTensorBlock>
 class TensorUnaryExprBlock {
   typedef typename ArgTensorBlock::XprType ArgXprType;
-  static const bool NoArgBlockAccess = internal::is_void<ArgXprType>::value;
+  static constexpr bool NoArgBlockAccess = internal::is_void<ArgXprType>::value;
 
  public:
-  typedef typename conditional<
+  typedef std::conditional_t<
       NoArgBlockAccess, void,
-      typename BlockFactory::template XprType<ArgXprType>::type>::type XprType;
+      typename BlockFactory::template XprType<ArgXprType>::type> XprType;
 
   typedef typename XprScalar<XprType>::type Scalar;
 
@@ -945,15 +947,15 @@ class TensorTernaryExprBlock {
   typedef typename Arg2TensorBlock::XprType Arg2XprType;
   typedef typename Arg3TensorBlock::XprType Arg3XprType;
 
-  static const bool NoArgBlockAccess = internal::is_void<Arg1XprType>::value ||
-                                       internal::is_void<Arg2XprType>::value ||
-                                       internal::is_void<Arg3XprType>::value;
+  static constexpr bool NoArgBlockAccess = internal::is_void<Arg1XprType>::value ||
+                                           internal::is_void<Arg2XprType>::value ||
+                                           internal::is_void<Arg3XprType>::value;
 
  public:
-  typedef typename conditional<
+  typedef std::conditional_t<
       NoArgBlockAccess, void,
       typename BlockFactory::template XprType<Arg1XprType, Arg2XprType,
-                                              Arg3XprType>::type>::type XprType;
+                                              Arg3XprType>::type> XprType;
 
   typedef typename XprScalar<XprType>::type Scalar;
 
@@ -1141,7 +1143,7 @@ class StridedLinearBufferCopy {
 
 template <typename Scalar, typename IndexType, int NumDims, int Layout>
 class TensorBlockIO {
-  static const bool IsColMajor = (Layout == ColMajor);
+  static constexpr bool IsColMajor = (Layout == ColMajor);
 
   typedef StridedLinearBufferCopy<Scalar, IndexType> LinCopy;
 
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
index a354132..64c39ca 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_CXX11_TENSOR_TENSOR_BROADCASTING_H
 #define EIGEN_CXX11_TENSOR_TENSOR_BROADCASTING_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /** \class TensorBroadcasting
@@ -28,9 +30,9 @@ struct traits<TensorBroadcastingOp<Broadcast, XprType> > : public traits<XprType
   typedef typename XprTraits::StorageKind StorageKind;
   typedef typename XprTraits::Index Index;
   typedef typename XprType::Nested Nested;
-  typedef typename remove_reference<Nested>::type _Nested;
-  static const int NumDimensions = XprTraits::NumDimensions;
-  static const int Layout = XprTraits::Layout;
+  typedef std::remove_reference_t<Nested> Nested_;
+  static constexpr int NumDimensions = XprTraits::NumDimensions;
+  static constexpr int Layout = XprTraits::Layout;
   typedef typename XprTraits::PointerType PointerType;
 };
 
@@ -83,7 +85,7 @@ class TensorBroadcastingOp : public TensorBase<TensorBroadcastingOp<Broadcast, X
     const Broadcast& broadcast() const { return m_broadcast; }
 
     EIGEN_DEVICE_FUNC
-    const typename internal::remove_all<typename XprType::Nested>::type&
+    const internal::remove_all_t<typename XprType::Nested>&
     expression() const { return m_xpr; }
 
   protected:
@@ -98,14 +100,14 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
 {
   typedef TensorBroadcastingOp<Broadcast, ArgType> XprType;
   typedef typename XprType::Index Index;
-  static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
+  static constexpr int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
   typedef DSizes<Index, NumDims> Dimensions;
   typedef typename XprType::Scalar Scalar;
   typedef typename TensorEvaluator<ArgType, Device>::Dimensions InputDimensions;
   typedef typename XprType::CoeffReturnType CoeffReturnType;
   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
-  static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
-  protected: //  all the non-static fields must have the same access control, otherwise the TensorEvaluator wont be standard layout;
+  static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
+  protected: //  all the non-static fields must have the same access control, otherwise the TensorEvaluator won't be standard layout;
   bool isCopy, nByOne, oneByN;
   public:
   typedef StorageMemory<CoeffReturnType, Device> Storage;
@@ -116,18 +118,18 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
     PacketAccess      = TensorEvaluator<ArgType, Device>::PacketAccess,
     BlockAccess       = TensorEvaluator<ArgType, Device>::BlockAccess,
     PreferBlockAccess = true,
-    Layout            = TensorEvaluator<ArgType, Device>::Layout,
     RawAccess         = false
   };
+  static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
 
-  typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
+  typedef std::remove_const_t<Scalar> ScalarNoConst;
 
   // We do block based broadcasting using a trick with 2x tensor rank and 0
   // strides. See block method implementation for details.
   typedef DSizes<Index, 2 * NumDims> BroadcastDimensions;
 
   //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
- typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
+  typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
   typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
 
   typedef typename TensorEvaluator<const ArgType, Device>::TensorBlock
@@ -144,7 +146,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
   {
 
     // The broadcasting op doesn't change the rank of the tensor. One can't broadcast a scalar
-    // and store the result in a scalar. Instead one should reshape the scalar into a a N-D
+    // and store the result in a scalar. Instead one should reshape the scalar into a N-D
     // tensor with N >= 1 of 1 element first and then broadcast.
     EIGEN_STATIC_ASSERT((NumDims > 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
     const InputDimensions& input_dims = m_impl.dimensions();
@@ -229,7 +231,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
 
   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE CoeffReturnType coeff(Index index) const
   {
-    if (internal::is_input_scalar<typename internal::remove_all<InputDimensions>::type>::value) {
+    if (internal::is_input_scalar<internal::remove_all_t<InputDimensions>>::value) {
       return m_impl.coeff(0);
     }
 
@@ -322,7 +324,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
   template<int LoadMode>
   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketReturnType packet(Index index) const
   {
-    if (internal::is_input_scalar<typename internal::remove_all<InputDimensions>::type>::value) {
+    if (internal::is_input_scalar<internal::remove_all_t<InputDimensions>>::value) {
       return internal::pset1<PacketReturnType>(m_impl.coeff(0));
     }
 
@@ -368,10 +370,9 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetOneByNByOne
   (Index index) const
   {
-    EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
     eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
 
-    EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
+    EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
     Index startDim, endDim;
     Index inputIndex, outputOffset, batchedIndex;
 
@@ -410,25 +411,23 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
   template<int LoadMode>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetOneByN(Index index) const
   {
-    EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
+    // Consider the flattened tensor [v0, ..., vN],
+    // Concatenates m_broadcast[dim] copies,
+    //    [v0, ..., vN, v0, ..., vN, ... ]
+    // with dim == NumDims - 1 for col-major, dim == 0 for row-major.
     eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
 
-    Index dim, inputIndex;
-
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      dim = NumDims - 1;
-    } else {
-      dim = 0;
-    }
-
-    inputIndex = index % m_inputStrides[dim];
-    if (inputIndex + PacketSize <= m_inputStrides[dim]) {
+    // Size of flattened tensor.
+    const Index M = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ?
+                      m_inputStrides[NumDims - 1] : m_inputStrides[0];
+    Index inputIndex = index % M;
+    if (inputIndex + PacketSize <= M) {
       return m_impl.template packet<Unaligned>(inputIndex);
     } else {
-      EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
+      EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
       EIGEN_UNROLL_LOOP
       for (int i = 0; i < PacketSize; ++i) {
-        if (inputIndex > m_inputStrides[dim]-1) {
+        if (inputIndex > M - 1) {
           inputIndex = 0;
         }
         values[i] = m_impl.coeff(inputIndex++);
@@ -440,32 +439,29 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
   template<int LoadMode>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetNByOne(Index index) const
   {
-    EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
-    eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
+    // Consider the flattened tensor [v0, ..., vN],
+    // Interleaves m_broadcast[dim] copies,
+    //    [v0, v0, ..., v1, v1, ..., vN, vN, ... ]
+    // with dim == 0 for col-major, dim == NumDims - 1 for row-major.
+    eigen_assert(index + PacketSize-1 < dimensions().TotalSize());
 
-    EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
-    Index dim, inputIndex, outputOffset;
+    const Index M = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ?
+                      m_broadcast[0] : m_broadcast[NumDims - 1];
 
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      dim = 1;
-    } else {
-      dim = NumDims - 2;
-    }
-
-    inputIndex   = index / m_outputStrides[dim];
-    outputOffset = index % m_outputStrides[dim];
-    if (outputOffset + PacketSize <= m_outputStrides[dim]) {
-      values[0] = m_impl.coeff(inputIndex);
-      return internal::pload1<PacketReturnType>(values);
+    Index inputIndex   = index / M;
+    Index outputOffset = index % M;
+    if (outputOffset + PacketSize <= M) {
+      return internal::pset1<PacketReturnType>(m_impl.coeff(inputIndex));
     } else {
+      EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
       EIGEN_UNROLL_LOOP
-      for (int i = 0, cur = 0; i < PacketSize; ++i, ++cur) {
-        if (outputOffset + cur < m_outputStrides[dim]) {
+      for (int i = 0; i < PacketSize; ++i) {
+        if (outputOffset < M) {
           values[i] = m_impl.coeff(inputIndex);
+          ++outputOffset;
         } else {
           values[i] = m_impl.coeff(++inputIndex);
-          outputOffset = 0;
-          cur = 0;
+          outputOffset = 1;  // Next offset.
         }
       }
       return internal::pload<PacketReturnType>(values);
@@ -477,7 +473,6 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
   template<int LoadMode>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetColMajor(Index index) const
   {
-    EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
     eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
 
     const Index originalIndex = index;
@@ -517,7 +512,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
     if (innermostLoc + PacketSize <= m_impl.dimensions()[0]) {
       return m_impl.template packet<Unaligned>(inputIndex);
     } else {
-      EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
+      EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
       values[0] = m_impl.coeff(inputIndex);
       EIGEN_UNROLL_LOOP
       for (int i = 1; i < PacketSize; ++i) {
@@ -535,7 +530,6 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
   template<int LoadMode>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetRowMajor(Index index) const
   {
-    EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
     eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
 
     const Index originalIndex = index;
@@ -575,7 +569,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
     if (innermostLoc + PacketSize <= m_impl.dimensions()[NumDims-1]) {
       return m_impl.template packet<Unaligned>(inputIndex);
     } else {
-      EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
+      EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
       values[0] = m_impl.coeff(inputIndex);
       EIGEN_UNROLL_LOOP
       for (int i = 1; i < PacketSize; ++i) {
@@ -701,7 +695,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
   }
 #endif
  private:
-  static const bool IsColMajor =
+  static constexpr bool IsColMajor =
       static_cast<int>(Layout) == static_cast<int>(ColMajor);
 
   // We will build a general case block broadcasting on top of broadcasting
@@ -1080,7 +1074,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
 
 protected:
   const Device EIGEN_DEVICE_REF m_device;
-  const typename internal::remove_reference<Broadcast>::type m_broadcast;
+  const std::remove_reference_t<Broadcast> m_broadcast;
   Dimensions m_dimensions;
   array<Index, NumDims> m_outputStrides;
   array<Index, NumDims> m_inputStrides;
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h
index 3764573..ef01e30 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_CXX11_TENSOR_TENSOR_CHIPPING_H
 #define EIGEN_CXX11_TENSOR_TENSOR_CHIPPING_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /** \class TensorKChippingReshaping
@@ -29,9 +31,9 @@ struct traits<TensorChippingOp<DimId, XprType> > : public traits<XprType>
   typedef typename XprTraits::StorageKind StorageKind;
   typedef typename XprTraits::Index Index;
   typedef typename XprType::Nested Nested;
-  typedef typename remove_reference<Nested>::type _Nested;
-  static const int NumDimensions = XprTraits::NumDimensions - 1;
-  static const int Layout = XprTraits::Layout;
+  typedef std::remove_reference_t<Nested> Nested_;
+  static constexpr int NumDimensions = XprTraits::NumDimensions - 1;
+  static constexpr int Layout = XprTraits::Layout;
   typedef typename XprTraits::PointerType PointerType;
 };
 
@@ -98,7 +100,7 @@ class TensorChippingOp : public TensorBase<TensorChippingOp<DimId, XprType> >
     const Index dim() const { return m_dim.actualDim(); }
 
     EIGEN_DEVICE_FUNC
-    const typename internal::remove_all<typename XprType::Nested>::type&
+    const internal::remove_all_t<typename XprType::Nested>&
     expression() const { return m_xpr; }
 
     EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorChippingOp)
@@ -115,31 +117,31 @@ template<DenseIndex DimId, typename ArgType, typename Device>
 struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
 {
   typedef TensorChippingOp<DimId, ArgType> XprType;
-  static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
-  static const int NumDims = NumInputDims-1;
+  static constexpr int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
+  static constexpr int NumDims = NumInputDims-1;
   typedef typename XprType::Index Index;
   typedef DSizes<Index, NumDims> Dimensions;
   typedef typename XprType::Scalar Scalar;
   typedef typename XprType::CoeffReturnType CoeffReturnType;
   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
-  static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
+  static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
   typedef StorageMemory<CoeffReturnType, Device> Storage;
   typedef typename Storage::Type EvaluatorPointerType;
+  static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
 
   enum {
     // Alignment can't be guaranteed at compile time since it depends on the
     // slice offsets.
     IsAligned         = false,
-    Layout            = TensorEvaluator<ArgType, Device>::Layout,
     PacketAccess      = TensorEvaluator<ArgType, Device>::PacketAccess,
     BlockAccess       = TensorEvaluator<ArgType, Device>::BlockAccess,
     // Chipping of outer-most dimension is a trivial operation, because we can
     // read and write directly from the underlying tensor using single offset.
-    IsOuterChipping   = (static_cast<int>(Layout) == ColMajor && DimId == NumInputDims - 1) ||
-                        (static_cast<int>(Layout) == RowMajor && DimId == 0),
+    IsOuterChipping   = (Layout == ColMajor && DimId == NumInputDims - 1) ||
+                        (Layout == RowMajor && DimId == 0),
     // Chipping inner-most dimension.
-    IsInnerChipping   = (static_cast<int>(Layout) == ColMajor && DimId == 0) ||
-                        (static_cast<int>(Layout) == RowMajor && DimId == NumInputDims - 1),
+    IsInnerChipping   = (Layout == ColMajor && DimId == 0) ||
+                        (Layout == RowMajor && DimId == NumInputDims - 1),
     // Prefer block access if the underlying expression prefers it, otherwise
     // only if chipping is not trivial.
     PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess ||
@@ -148,7 +150,7 @@ struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
     RawAccess         = false
   };
 
-  typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
+  typedef std::remove_const_t<Scalar> ScalarNoConst;
 
   //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
   typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
@@ -217,14 +219,13 @@ struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
   template<int LoadMode>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
   {
-    EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
     eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
 
     if (isInnerChipping()) {
       // m_stride is equal to 1, so let's avoid the integer division.
       eigen_assert(m_stride == 1);
       Index inputIndex = index * m_inputStride + m_inputOffset;
-      EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
+      EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
       EIGEN_UNROLL_LOOP
       for (int i = 0; i < PacketSize; ++i) {
         values[i] = m_impl.coeff(inputIndex);
@@ -244,7 +245,7 @@ struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
         return m_impl.template packet<LoadMode>(inputIndex);
       } else {
         // Cross the stride boundary. Fallback to slow path.
-        EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
+        EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
        EIGEN_UNROLL_LOOP
         for (int i = 0; i < PacketSize; ++i) {
           values[i] = coeff(index);
@@ -412,14 +413,14 @@ struct TensorEvaluator<TensorChippingOp<DimId, ArgType>, Device>
 {
   typedef TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device> Base;
   typedef TensorChippingOp<DimId, ArgType> XprType;
-  static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
-  static const int NumDims = NumInputDims-1;
+  static constexpr int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
+  static constexpr int NumDims = NumInputDims-1;
   typedef typename XprType::Index Index;
   typedef DSizes<Index, NumDims> Dimensions;
   typedef typename XprType::Scalar Scalar;
   typedef typename XprType::CoeffReturnType CoeffReturnType;
   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
-  static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
+  static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
 
   enum {
     IsAligned     = false,
@@ -445,12 +446,10 @@ struct TensorEvaluator<TensorChippingOp<DimId, ArgType>, Device>
   template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   void writePacket(Index index, const PacketReturnType& x)
   {
-    EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
-
     if (this->isInnerChipping()) {
       // m_stride is equal to 1, so let's avoid the integer division.
       eigen_assert(this->m_stride == 1);
-      EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
+      EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
       internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
       Index inputIndex = index * this->m_inputStride + this->m_inputOffset;
       EIGEN_UNROLL_LOOP
@@ -470,7 +469,7 @@ struct TensorEvaluator<TensorChippingOp<DimId, ArgType>, Device>
         this->m_impl.template writePacket<StoreMode>(inputIndex, x);
       } else {
         // Cross stride boundary. Fallback to slow path.
-        EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
+        EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
         internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
         EIGEN_UNROLL_LOOP
         for (int i = 0; i < PacketSize; ++i) {
@@ -484,7 +483,7 @@ struct TensorEvaluator<TensorChippingOp<DimId, ArgType>, Device>
   template <typename TensorBlock>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock(
       const TensorBlockDesc& desc, const TensorBlock& block) {
-    assert(this->m_impl.data() != NULL);
+    eigen_assert(this->m_impl.data() != NULL);
 
     const Index chip_dim = this->m_dim.actualDim();
 
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h
index 5235a8e..073be81 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_CXX11_TENSOR_TENSOR_CONCATENATION_H
 #define EIGEN_CXX11_TENSOR_TENSOR_CONCATENATION_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /** \class TensorConcatenationOp
@@ -32,13 +34,13 @@ struct traits<TensorConcatenationOp<Axis, LhsXprType, RhsXprType> >
                                       typename traits<RhsXprType>::Index>::type Index;
   typedef typename LhsXprType::Nested LhsNested;
   typedef typename RhsXprType::Nested RhsNested;
-  typedef typename remove_reference<LhsNested>::type _LhsNested;
-  typedef typename remove_reference<RhsNested>::type _RhsNested;
-  static const int NumDimensions = traits<LhsXprType>::NumDimensions;
-  static const int Layout = traits<LhsXprType>::Layout;
+  typedef std::remove_reference_t<LhsNested> LhsNested_;
+  typedef std::remove_reference_t<RhsNested> RhsNested_;
+  static constexpr int NumDimensions = traits<LhsXprType>::NumDimensions;
+  static constexpr int Layout = traits<LhsXprType>::Layout;
   enum { Flags = 0 };
-  typedef typename conditional<Pointer_type_promotion<typename LhsXprType::Scalar, Scalar>::val,
-                               typename traits<LhsXprType>::PointerType, typename traits<RhsXprType>::PointerType>::type PointerType;
+  typedef std::conditional_t<Pointer_type_promotion<typename LhsXprType::Scalar, Scalar>::val,
+                        typename traits<LhsXprType>::PointerType, typename traits<RhsXprType>::PointerType> PointerType;
 };
 
 template<typename Axis, typename LhsXprType, typename RhsXprType>
@@ -73,11 +75,11 @@ class TensorConcatenationOp : public TensorBase<TensorConcatenationOp<Axis, LhsX
         : m_lhs_xpr(lhs), m_rhs_xpr(rhs), m_axis(axis) {}
 
     EIGEN_DEVICE_FUNC
-    const typename internal::remove_all<typename LhsXprType::Nested>::type&
+    const internal::remove_all_t<typename LhsXprType::Nested>&
     lhsExpression() const { return m_lhs_xpr; }
 
     EIGEN_DEVICE_FUNC
-    const typename internal::remove_all<typename RhsXprType::Nested>::type&
+    const internal::remove_all_t<typename RhsXprType::Nested>&
     rhsExpression() const { return m_rhs_xpr; }
 
     EIGEN_DEVICE_FUNC const Axis& axis() const { return m_axis; }
@@ -96,14 +98,15 @@ struct TensorEvaluator<const TensorConcatenationOp<Axis, LeftArgType, RightArgTy
 {
   typedef TensorConcatenationOp<Axis, LeftArgType, RightArgType> XprType;
   typedef typename XprType::Index Index;
-  static const int NumDims = internal::array_size<typename TensorEvaluator<LeftArgType, Device>::Dimensions>::value;
-  static const int RightNumDims = internal::array_size<typename TensorEvaluator<RightArgType, Device>::Dimensions>::value;
+  static constexpr int NumDims = internal::array_size<typename TensorEvaluator<LeftArgType, Device>::Dimensions>::value;
+  static constexpr int RightNumDims = internal::array_size<typename TensorEvaluator<RightArgType, Device>::Dimensions>::value;
   typedef DSizes<Index, NumDims> Dimensions;
   typedef typename XprType::Scalar Scalar;
   typedef typename XprType::CoeffReturnType CoeffReturnType;
   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
   typedef StorageMemory<CoeffReturnType, Device> Storage;
   typedef typename Storage::Type EvaluatorPointerType;
+  static constexpr int Layout = TensorEvaluator<LeftArgType, Device>::Layout;
   enum {
     IsAligned         = false,
     PacketAccess      = TensorEvaluator<LeftArgType, Device>::PacketAccess &&
@@ -111,7 +114,6 @@ struct TensorEvaluator<const TensorConcatenationOp<Axis, LeftArgType, RightArgTy
     BlockAccess       = false,
     PreferBlockAccess = TensorEvaluator<LeftArgType, Device>::PreferBlockAccess ||
                         TensorEvaluator<RightArgType, Device>::PreferBlockAccess,
-    Layout            = TensorEvaluator<LeftArgType, Device>::Layout,
     RawAccess         = false
   };
 
@@ -303,6 +305,7 @@ template<typename Axis, typename LeftArgType, typename RightArgType, typename De
   typedef TensorEvaluator<const TensorConcatenationOp<Axis, LeftArgType, RightArgType>, Device> Base;
   typedef TensorConcatenationOp<Axis, LeftArgType, RightArgType> XprType;
   typedef typename Base::Dimensions Dimensions;
+  static constexpr int Layout = TensorEvaluator<LeftArgType, Device>::Layout;
   enum {
     IsAligned         = false,
     PacketAccess      = TensorEvaluator<LeftArgType, Device>::PacketAccess &&
@@ -310,7 +313,6 @@ template<typename Axis, typename LeftArgType, typename RightArgType, typename De
     BlockAccess       = false,
     PreferBlockAccess = TensorEvaluator<LeftArgType, Device>::PreferBlockAccess ||
                         TensorEvaluator<RightArgType, Device>::PreferBlockAccess,
-    Layout            = TensorEvaluator<LeftArgType, Device>::Layout,
     RawAccess         = false
   };
 
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
index 8b35f79..c629e44 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_H
 #define EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /** \class TensorContraction
@@ -25,8 +27,8 @@ template<typename Dimensions, typename LhsXprType, typename RhsXprType, typename
 struct traits<TensorContractionOp<Dimensions, LhsXprType, RhsXprType, OutputKernelType> >
 {
   // Type promotion to handle the case where the types of the lhs and the rhs are different.
-  typedef typename gebp_traits<typename remove_const<typename LhsXprType::Scalar>::type,
-                               typename remove_const<typename RhsXprType::Scalar>::type>::ResScalar Scalar;
+  typedef typename gebp_traits<std::remove_const_t<typename LhsXprType::Scalar>,
+                               std::remove_const_t<typename RhsXprType::Scalar>>::ResScalar Scalar;
 
   typedef typename promote_storage_type<typename traits<LhsXprType>::StorageKind,
                                         typename traits<RhsXprType>::StorageKind>::ret StorageKind;
@@ -34,15 +36,15 @@ struct traits<TensorContractionOp<Dimensions, LhsXprType, RhsXprType, OutputKern
                                       typename traits<RhsXprType>::Index>::type Index;
   typedef typename LhsXprType::Nested LhsNested;
   typedef typename RhsXprType::Nested RhsNested;
-  typedef typename remove_reference<LhsNested>::type _LhsNested;
-  typedef typename remove_reference<RhsNested>::type _RhsNested;
+  typedef std::remove_reference_t<LhsNested> LhsNested_;
+  typedef std::remove_reference_t<RhsNested> RhsNested_;
 
   // From NumDims below.
-  static const int NumDimensions = traits<LhsXprType>::NumDimensions + traits<RhsXprType>::NumDimensions - 2 * array_size<Dimensions>::value;
-  static const int Layout = traits<LhsXprType>::Layout;
-  typedef typename conditional<Pointer_type_promotion<typename LhsXprType::Scalar, Scalar>::val,
-                               typename traits<LhsXprType>::PointerType,
-                               typename traits<RhsXprType>::PointerType>::type
+  static constexpr int NumDimensions = traits<LhsXprType>::NumDimensions + traits<RhsXprType>::NumDimensions - 2 * array_size<Dimensions>::value;
+  static constexpr int Layout = traits<LhsXprType>::Layout;
+  typedef std::conditional_t<Pointer_type_promotion<typename LhsXprType::Scalar, Scalar>::val,
+                        typename traits<LhsXprType>::PointerType,
+                        typename traits<RhsXprType>::PointerType>
       PointerType;
 
   enum {
@@ -71,7 +73,7 @@ struct traits<TensorEvaluator<const TensorContractionOp<Indices_, LeftArgType_,
   typedef Device_ Device;
 
   // From NumDims below.
-  static const int NumDimensions = traits<LeftArgType_>::NumDimensions + traits<RightArgType_>::NumDimensions - 2 * array_size<Indices_>::value;
+  static constexpr int NumDimensions = traits<LeftArgType_>::NumDimensions + traits<RightArgType_>::NumDimensions - 2 * array_size<Indices_>::value;
 };
 
 // Helper class to allocate and deallocate temporary memory for packed buffers.
@@ -343,11 +345,11 @@ class TensorContractionOp : public TensorBase<TensorContractionOp<Indices, LhsXp
 
   /** \returns the nested expressions */
   EIGEN_DEVICE_FUNC
-  const typename internal::remove_all<typename LhsXprType::Nested>::type&
+  const internal::remove_all_t<typename LhsXprType::Nested>&
   lhsExpression() const { return m_lhs_xpr; }
 
   EIGEN_DEVICE_FUNC
-  const typename internal::remove_all<typename RhsXprType::Nested>::type&
+  const internal::remove_all_t<typename RhsXprType::Nested>&
   rhsExpression() const { return m_rhs_xpr; }
 
   EIGEN_DEVICE_FUNC
@@ -371,19 +373,19 @@ struct TensorContractionEvaluatorBase : internal::no_assignment_operator
   typedef typename internal::traits<Derived>::Device Device;
 
   typedef TensorContractionOp<Indices, LeftArgType, RightArgType, OutputKernelType> XprType;
-  typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar;
+  typedef std::remove_const_t<typename XprType::Scalar> Scalar;
   typedef typename XprType::Index Index;
   typedef typename XprType::CoeffReturnType CoeffReturnType;
   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
   typedef StorageMemory<Scalar, Device> Storage;
   typedef typename Storage::Type EvaluatorPointerType;
 
+  static constexpr int Layout = TensorEvaluator<LeftArgType, Device>::Layout;
   enum {
     IsAligned         = true,
     PacketAccess      = (PacketType<CoeffReturnType, Device>::size > 1),
     BlockAccess       = false,
     PreferBlockAccess = false,
-    Layout            = TensorEvaluator<LeftArgType, Device>::Layout,
     CoordAccess       = false,  // to be implemented
     RawAccess         = true
   };
@@ -396,20 +398,20 @@ struct TensorContractionEvaluatorBase : internal::no_assignment_operator
   // inputs are RowMajor, we will "cheat" by swapping the LHS and RHS:
   // If we want to compute A * B = C, where A is LHS and B is RHS, the code
   // will pretend B is LHS and A is RHS.
-  typedef typename internal::conditional<
-    static_cast<int>(Layout) == static_cast<int>(ColMajor), LeftArgType, RightArgType>::type EvalLeftArgType;
-  typedef typename internal::conditional<
-    static_cast<int>(Layout) == static_cast<int>(ColMajor), RightArgType, LeftArgType>::type EvalRightArgType;
+  typedef std::conditional_t<
+    static_cast<int>(Layout) == static_cast<int>(ColMajor), LeftArgType, RightArgType> EvalLeftArgType;
+  typedef std::conditional_t<
+    static_cast<int>(Layout) == static_cast<int>(ColMajor), RightArgType, LeftArgType> EvalRightArgType;
 
   typedef TensorEvaluator<EvalLeftArgType, Device> LeftEvaluatorType;
   typedef TensorEvaluator<EvalRightArgType, Device> RightEvaluatorType;
 
-  static const int LDims =
+  static constexpr int LDims =
       internal::array_size<typename TensorEvaluator<EvalLeftArgType, Device>::Dimensions>::value;
-  static const int RDims =
+  static constexpr int RDims =
       internal::array_size<typename TensorEvaluator<EvalRightArgType, Device>::Dimensions>::value;
-  static const int ContractDims = internal::array_size<Indices>::value;
-  static const int NumDims = LDims + RDims - 2 * ContractDims;
+  static constexpr int ContractDims = internal::array_size<Indices>::value;
+  static constexpr int NumDims = LDims + RDims - 2 * ContractDims;
 
   typedef array<Index, ContractDims> contract_t;
   typedef array<Index, LDims - ContractDims> left_nocontract_t;
@@ -733,8 +735,8 @@ struct TensorContractionEvaluatorBase : internal::no_assignment_operator
     const Index rows = m_i_size;
     const Index cols = m_k_size;
 
-    typedef typename internal::remove_const<typename EvalLeftArgType::Scalar>::type LhsScalar;
-    typedef typename internal::remove_const<typename EvalRightArgType::Scalar>::type RhsScalar;
+    typedef std::remove_const_t<typename EvalLeftArgType::Scalar> LhsScalar;
+    typedef std::remove_const_t<typename EvalRightArgType::Scalar> RhsScalar;
     typedef TensorEvaluator<EvalLeftArgType, Device> LeftEvaluator;
     typedef TensorEvaluator<EvalRightArgType, Device> RightEvaluator;
     const Index lhs_packet_size = internal::unpacket_traits<typename LeftEvaluator::PacketReturnType>::size;
@@ -762,7 +764,7 @@ struct TensorContractionEvaluatorBase : internal::no_assignment_operator
     const Index resIncr(1);
 
     // zero out the result buffer (which must be of size at least rows * sizeof(Scalar)
-    m_device.memset(buffer, 0, rows * sizeof(Scalar));
+    m_device.fill(buffer, buffer + rows, Scalar(0));
 
     internal::general_matrix_vector_product<Index,LhsScalar,LhsMapper,ColMajor,false,RhsScalar,RhsMapper,false>::run(
         rows, cols, lhs, rhs,
@@ -810,8 +812,8 @@ struct TensorContractionEvaluatorBase : internal::no_assignment_operator
     const Index n = this->m_j_size;
 
     // define data mappers for Lhs and Rhs
-    typedef typename internal::remove_const<typename EvalLeftArgType::Scalar>::type LhsScalar;
-    typedef typename internal::remove_const<typename EvalRightArgType::Scalar>::type RhsScalar;
+    typedef std::remove_const_t<typename EvalLeftArgType::Scalar> LhsScalar;
+    typedef std::remove_const_t<typename EvalRightArgType::Scalar> RhsScalar;
 
     typedef TensorEvaluator<EvalLeftArgType, Device> LeftEvaluator;
     typedef TensorEvaluator<EvalRightArgType, Device> RightEvaluator;
@@ -869,7 +871,7 @@ struct TensorContractionEvaluatorBase : internal::no_assignment_operator
     // If a contraction kernel does not support beta, explicitly initialize
     // output buffer with zeroes.
     if (!TensorContractionKernel::HasBeta) {
-      this->m_device.memset(buffer, 0, m * n * sizeof(Scalar));
+      this->m_device.fill(buffer, buffer + m * n, Scalar(0));
     }
 
     for(Index i2=0; i2<m; i2+=mc)
@@ -976,35 +978,31 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
   typedef TensorContractionEvaluatorBase<Self> Base;
 
   typedef TensorContractionOp<Indices, LeftArgType, RightArgType, OutputKernelType> XprType;
-  typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar;
+  typedef std::remove_const_t<typename XprType::Scalar> Scalar;
   typedef typename XprType::Index Index;
   typedef typename XprType::CoeffReturnType CoeffReturnType;
   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
 
-  enum {
-    Layout = TensorEvaluator<LeftArgType, Device>::Layout
-  };
+  static constexpr int Layout = TensorEvaluator<LeftArgType, Device>::Layout;
 
   // Most of the code is assuming that both input tensors are ColMajor. If the
   // inputs are RowMajor, we will "cheat" by swapping the LHS and RHS:
   // If we want to compute A * B = C, where A is LHS and B is RHS, the code
   // will pretend B is LHS and A is RHS.
-  typedef typename internal::conditional<
-    static_cast<int>(Layout) == static_cast<int>(ColMajor), LeftArgType, RightArgType>::type EvalLeftArgType;
-  typedef typename internal::conditional<
-    static_cast<int>(Layout) == static_cast<int>(ColMajor), RightArgType, LeftArgType>::type EvalRightArgType;
+  typedef std::conditional_t<Layout == static_cast<int>(ColMajor), LeftArgType, RightArgType> EvalLeftArgType;
+  typedef std::conditional_t<Layout == static_cast<int>(ColMajor), RightArgType, LeftArgType> EvalRightArgType;
 
-  static const int LDims =
+  static constexpr int LDims =
       internal::array_size<typename TensorEvaluator<EvalLeftArgType, Device>::Dimensions>::value;
-  static const int RDims =
+  static constexpr int RDims =
       internal::array_size<typename TensorEvaluator<EvalRightArgType, Device>::Dimensions>::value;
-  static const int ContractDims = internal::array_size<Indices>::value;
+  static constexpr int ContractDims = internal::array_size<Indices>::value;
 
   typedef array<Index, ContractDims> contract_t;
   typedef array<Index, LDims - ContractDims> left_nocontract_t;
   typedef array<Index, RDims - ContractDims> right_nocontract_t;
 
-  static const int NumDims = LDims + RDims - 2 * ContractDims;
+  static constexpr int NumDims = LDims + RDims - 2 * ContractDims;
 
   // Could we use NumDimensions here?
   typedef DSizes<Index, NumDims> Dimensions;
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h
index 974feb0..4b3d423 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h
@@ -11,6 +11,8 @@
 #define EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_BLOCKING_H
 
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 namespace internal {
 
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h
index c818038..698baab 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h
@@ -14,6 +14,8 @@
 
 #if defined(EIGEN_USE_GPU) && defined(EIGEN_GPUCC)
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 template<typename Scalar, typename Index, typename LhsMapper,
@@ -233,7 +235,7 @@ EigenContractionKernelInternal(const LhsMapper lhs, const RhsMapper rhs,
     }                                                           \
   }                                                             \
 
-#define writeRegToShmem(_)                      \
+#define writeRegToShmem()                       \
   lhs_shmem[lhs_store_idx_0] = lhs_pf0;         \
   rhs_shmem[rhs_store_idx_0] = rhs_pf0;         \
                                                 \
@@ -1225,29 +1227,25 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
   typedef TensorContractionEvaluatorBase<Self> Base;
 
   typedef TensorContractionOp<Indices, LeftArgType, RightArgType, OutputKernelType> XprType;
-  typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar;
+  typedef std::remove_const_t<typename XprType::Scalar> Scalar;
   typedef typename XprType::Index Index;
   typedef typename XprType::CoeffReturnType CoeffReturnType;
   typedef typename PacketType<CoeffReturnType, GpuDevice>::type PacketReturnType;
 
-  enum {
-    Layout = TensorEvaluator<LeftArgType, Device>::Layout,
-  };
+  static constexpr int Layout = TensorEvaluator<LeftArgType, Device>::Layout;
 
   // Most of the code is assuming that both input tensors are ColMajor. If the
   // inputs are RowMajor, we will "cheat" by swapping the LHS and RHS:
   // If we want to compute A * B = C, where A is LHS and B is RHS, the code
   // will pretend B is LHS and A is RHS.
-  typedef typename internal::conditional<
-    static_cast<int>(Layout) == static_cast<int>(ColMajor), LeftArgType, RightArgType>::type EvalLeftArgType;
-  typedef typename internal::conditional<
-    static_cast<int>(Layout) == static_cast<int>(ColMajor), RightArgType, LeftArgType>::type EvalRightArgType;
+  typedef std::conditional_t<Layout == static_cast<int>(ColMajor), LeftArgType, RightArgType> EvalLeftArgType;
+  typedef std::conditional_t<Layout == static_cast<int>(ColMajor), RightArgType, LeftArgType> EvalRightArgType;
 
-  static const int LDims =
+  static constexpr int LDims =
       internal::array_size<typename TensorEvaluator<EvalLeftArgType, Device>::Dimensions>::value;
-  static const int RDims =
+  static constexpr int RDims =
       internal::array_size<typename TensorEvaluator<EvalRightArgType, Device>::Dimensions>::value;
-  static const int ContractDims = internal::array_size<Indices>::value;
+  static constexpr int ContractDims = internal::array_size<Indices>::value;
 
   typedef array<Index, LDims> left_dim_mapper_t;
   typedef array<Index, RDims> right_dim_mapper_t;
@@ -1256,13 +1254,13 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
   typedef array<Index, LDims - ContractDims> left_nocontract_t;
   typedef array<Index, RDims - ContractDims> right_nocontract_t;
 
-  static const int NumDims = LDims + RDims - 2 * ContractDims;
+  static constexpr int NumDims = LDims + RDims - 2 * ContractDims;
 
   typedef DSizes<Index, NumDims> Dimensions;
 
   // typedefs needed in evalTo
-  typedef typename internal::remove_const<typename EvalLeftArgType::Scalar>::type LhsScalar;
-  typedef typename internal::remove_const<typename EvalRightArgType::Scalar>::type RhsScalar;
+  typedef std::remove_const_t<typename EvalLeftArgType::Scalar> LhsScalar;
+  typedef std::remove_const_t<typename EvalRightArgType::Scalar> RhsScalar;
 
   typedef TensorEvaluator<EvalLeftArgType, Device> LeftEvaluator;
   typedef TensorEvaluator<EvalRightArgType, Device> RightEvaluator;
@@ -1370,8 +1368,8 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
     // columns in right side
     const Index n = this->m_j_size;
 
-    // zero out the result buffer (which must be of size at least m * n * sizeof(Scalar)
-    this->m_device.memset(buffer, 0, m * n * sizeof(Scalar));
+    // zero out the result buffer (which must be of size at least m * n * sizeof(Scalar))
+    this->m_device.fill(buffer, buffer + m * n, Scalar(0));
 
     typedef internal::TensorContractionInputMapper<LhsScalar, Index, internal::Lhs,
                                                    LeftEvaluator, left_nocontract_t,
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h
index 9ab900b..92cbaf6 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_MAPPER_H
 #define EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_MAPPER_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
@@ -294,7 +296,7 @@ class BaseTensorContractionMapper : public SimpleTensorContractionMapper<Scalar,
 
   template <typename PacketT,int AlignmentType>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  typename internal::enable_if<internal::unpacket_traits<PacketT>::size==packet_size,PacketT>::type
+  std::enable_if_t<internal::unpacket_traits<PacketT>::size==packet_size,PacketT>
   load(Index i, Index j) const
   {
     // whole method makes column major assumption
@@ -340,7 +342,7 @@ class BaseTensorContractionMapper : public SimpleTensorContractionMapper<Scalar,
 
   template <typename PacketT,int AlignmentType>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  typename internal::enable_if<internal::unpacket_traits<PacketT>::size!=packet_size,PacketT>::type
+  std::enable_if_t<internal::unpacket_traits<PacketT>::size!=packet_size,PacketT>
   load(Index i, Index j) const
   {
     const Index requested_packet_size = internal::unpacket_traits<PacketT>::size;
@@ -414,6 +416,7 @@ class TensorContractionSubMapper {
   typedef BaseTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment, MakePointer_> ParentMapper;
   typedef TensorContractionSubMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment, MakePointer_> Self;
   typedef Self LinearMapper;
+  typedef Self SubMapper;
 
   enum {
     // We can use direct offsets iff the parent mapper supports then and we can compute the strides.
@@ -483,6 +486,13 @@ class TensorContractionSubMapper {
     return LinearMapper(m_base_mapper, i + m_vert_offset, j + m_horiz_offset);
   }
 
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE SubMapper getSubMapper(Index i, Index j) const {
+    if (UseDirectOffsets) {
+      return SubMapper(m_base_mapper, i, j);
+    }
+    return SubMapper(m_base_mapper, i + m_vert_offset, j + m_horiz_offset);
+  }
+
   template <typename PacketT, int AlignmentType>
   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketT load(Index i) const {
     EIGEN_STATIC_ASSERT((internal::is_same<PacketT, PacketT>::value), YOU_MADE_A_PROGRAMMING_MISTAKE);
@@ -529,6 +539,7 @@ class TensorContractionInputMapper
   typedef BaseTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment, MakePointer_> Base;
   typedef TensorContractionSubMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment, MakePointer_> SubMapper;
   typedef SubMapper VectorMapper;
+  typedef SubMapper LinearMapper;
 
   EIGEN_DEVICE_FUNC TensorContractionInputMapper(const Tensor& tensor,
                                const nocontract_t& nocontract_strides,
@@ -542,6 +553,10 @@ class TensorContractionInputMapper
     return SubMapper(*this, i, j);
   }
 
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE LinearMapper getLinearMapper(Index i, Index j) const {
+    return LinearMapper(*this, i, j);
+  }
+
   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE VectorMapper getVectorMapper(Index i, Index j) const {
     return VectorMapper(*this, i, j);
   }
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionSycl.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionSycl.h
old mode 100755
new mode 100644
index 473c228..c6c4077
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionSycl.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionSycl.h
@@ -19,6 +19,8 @@
 #ifndef EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_SYCL_H
 #define EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_SYCL_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace TensorSycl {
@@ -110,7 +112,7 @@ struct TTPanelSize {
   // BC : determines if supporting bank conflict is required
   static EIGEN_CONSTEXPR bool BC = true;
   // DoubleBuffer: determines if double buffering technique should be used (This can be disabled by
-  // EIGEN_SYCL_DISABLE_DOUBLE_BUFFER macro when the device doesnot have sufficient  local memory)
+  // EIGEN_SYCL_DISABLE_DOUBLE_BUFFER macro when the device does not have sufficient local memory)
   static EIGEN_CONSTEXPR bool DoubleBuffer =
 #ifdef EIGEN_SYCL_DISABLE_DOUBLE_BUFFER
       false;
@@ -156,7 +158,7 @@ enum class data_source { global_mem, local_mem, private_mem };
  */
 template <bool PacketLoad, bool is_coalesced_layout, bool, typename PacketType, typename TensorMapper,
           typename StorageIndex>
-static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename ::Eigen::internal::enable_if<PacketLoad, PacketType>::type read(
+static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::enable_if_t<PacketLoad, PacketType> read(
     const TensorMapper &tensorMapper, const StorageIndex &NCIndex, const StorageIndex &CIndex, const StorageIndex &ld) {
   const StorageIndex row = (is_coalesced_layout) ? NCIndex : CIndex;
   const StorageIndex col = (is_coalesced_layout) ? CIndex : NCIndex;
@@ -186,7 +188,7 @@ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename ::Eigen::internal::enable_
  * \param CIndex: is the contracting dim index
  */
 template <bool PacketLoad, bool, bool IsRhs, typename PacketType, typename TensorMapper, typename StorageIndex>
-static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename ::Eigen::internal::enable_if<!PacketLoad, PacketType>::type read(
+static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::enable_if_t<!PacketLoad, PacketType> read(
     const TensorMapper &tensorMapper, const StorageIndex &NCIndex, const StorageIndex &CIndex, const StorageIndex &) {
   const StorageIndex row = (IsRhs) ? CIndex : NCIndex;
   const StorageIndex col = (IsRhs) ? NCIndex : CIndex;
@@ -216,7 +218,7 @@ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename ::Eigen::internal::enable_
 
 template <typename StorageIndex, StorageIndex ld, data_source dt, typename PacketType, typename DataScalar>
 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    typename ::Eigen::internal::enable_if<dt != data_source::global_mem, void>::type
+    std::enable_if_t<dt != data_source::global_mem, void>
     write(PacketType &packet_data, DataScalar ptr) {
   EIGEN_CONSTEXPR int PacketSize = Eigen::internal::unpacket_traits<PacketType>::size;
   EIGEN_UNROLL_LOOP
@@ -242,8 +244,8 @@ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
  */
 
 template <data_source dt, typename PacketType, typename DataScalar>
-static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename ::Eigen::internal::enable_if<
-    Eigen::internal::unpacket_traits<PacketType>::size != 1 && dt == data_source::global_mem, void>::type
+static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename std::enable_if_t<
+    Eigen::internal::unpacket_traits<PacketType>::size != 1 && dt == data_source::global_mem, void>
 write(PacketType &packet_data, DataScalar *ptr) {
   ::Eigen::internal::pstoreu<DataScalar, PacketType>(ptr, packet_data);
 }
@@ -262,8 +264,8 @@ write(PacketType &packet_data, DataScalar *ptr) {
  * \param ptr: a pointer to the local memory
  */
 template <data_source dt, typename PacketType, typename DataScalar>
-static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename ::Eigen::internal::enable_if<
-    Eigen::internal::unpacket_traits<PacketType>::size == 1 && dt == data_source::global_mem, void>::type
+static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename std::enable_if_t<
+    Eigen::internal::unpacket_traits<PacketType>::size == 1 && dt == data_source::global_mem, void>
 write(PacketType &packet_data, DataScalar *ptr) {
   *ptr = packet_data;
 }
@@ -319,7 +321,7 @@ struct BlockProperties {
   static EIGEN_CONSTEXPR bool packet_load = packet_load_;
   typedef typename Eigen::internal::unpacket_traits<PacketType>::type OutScalar;
   static EIGEN_CONSTEXPR bool is_rhs = is_rhs_;
-  typedef typename Eigen::internal::conditional<packet_load, PacketType, OutScalar>::type OutType;
+  typedef std::conditional_t<packet_load, PacketType, OutScalar> OutType;
   static EIGEN_CONSTEXPR int elements_per_access = Eigen::internal::unpacket_traits<OutType>::size;
   static EIGEN_CONSTEXPR bool is_coalesced_layout = !(is_transposed ^ is_rhs);
   static EIGEN_CONSTEXPR int nc_stride = (is_coalesced_layout ? elements_per_access : 1);
@@ -428,7 +430,7 @@ struct ThreadProperties {
  Otherwise, the result of contraction will be written iin a temporary buffer. This is the case when Tall/Skinny
  contraction is used. So in this case, a final reduction step is required to compute final output.
 
- * \tparam contraction_tp: it is an enum value representing whether the local memroy/no local memory implementation of
+ * \tparam contraction_tp: it is an enum value representing whether the local memory/no local memory implementation of
  the algorithm to be used
  *
  * \param scratch: local memory containing tiles of LHS and RHS tensors for each work-group
@@ -475,8 +477,7 @@ class TensorContractionKernel {
   typedef cl::sycl::accessor<OutScalar, 1, cl::sycl::access::mode::read_write, cl::sycl::access::target::local> Scratch;
   typedef cl::sycl::multi_ptr<OutScalar, cl::sycl::access::address_space::local_space> local_ptr;
   typedef OutScalar * /*cl::sycl::multi_ptr<OutScalar, cl::sycl::access::address_space::private_space>*/ private_ptr;
-  typedef
-      typename ::Eigen::internal::conditional<contraction_tp == contraction_type::local, local_ptr, private_ptr>::type
+  typedef std::conditional_t<contraction_tp == contraction_type::local, local_ptr, private_ptr>
           tile_ptr;
   static EIGEN_CONSTEXPR StorageIndex LSDL = contraction_tp == contraction_type::local
                                                  ? Properties::TileSizeDimM + Properties::BC
@@ -493,7 +494,7 @@ class TensorContractionKernel {
    * the TiledMemory for both local and private memory, the MemHolder structs is used as a helper to abstract out
    * different type of memory needed when local/no_local memory computation is called.
    *
-   * \tparam contraction_type: it is an enum value representing whether the local memroy/no local memory implementation
+   * \tparam contraction_type: it is an enum value representing whether the local memory/no local memory implementation
    of the algorithm to be used
    * \tparam the private memory size
    * \param ptr the tile memory pointer type
@@ -520,10 +521,10 @@ class TensorContractionKernel {
    * \param rhs_scratch_extract : determines the RHS tile memory. It is either private or local memory based on the
    * selected contraction_type.
    *
-   * \param lhs_extract_index: determins the position of each thread on a local memory for lhs input. When private
+   * \param lhs_extract_index: determines the position of each thread on a local memory for lhs input. When private
    * memory is used this is set to zero as this is not applicable in case of private memory.
    *
-   * \param rhs_extract_index: determins the position of each thread on a local memory for rhs input. When private
+   * \param rhs_extract_index: determines the position of each thread on a local memory for rhs input. When private
    * memory is used this is set to zero as this is not applicable in case of private memory.
    *
    * \param lhs_scratch_compute : determines the  location to load for computation for lhs_local memory. This is the
@@ -542,7 +543,7 @@ class TensorContractionKernel {
     template <contraction_type tp = contraction_tp>
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     TiledMemory(const ThreadProperties<StorageIndex> &, local_ptr,
-                typename ::Eigen::internal::enable_if<tp == contraction_type::no_local>::type * = 0)
+                std::enable_if_t<tp == contraction_type::no_local> * = 0)
         : lhs_scratch_extract{},
           rhs_scratch_extract{},
           lhs_scratch_ptr_compute(lhs_scratch_extract.ptr),
@@ -553,7 +554,7 @@ class TensorContractionKernel {
     template <contraction_type tp = contraction_tp>
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     TiledMemory(const ThreadProperties<StorageIndex> &thread_properties, local_ptr block_start_ptr,
-                typename ::Eigen::internal::enable_if<tp == contraction_type::local>::type * = 0)
+                std::enable_if_t<tp == contraction_type::local> * = 0)
         : lhs_scratch_extract{block_start_ptr},
           rhs_scratch_extract{lhs_scratch_extract.ptr +
                               ((Properties::DoubleBuffer + 1) * LSDL * Properties::TileSizeDimK)},
@@ -710,7 +711,7 @@ class TensorContractionKernel {
   template <typename InputBlockProperties, bool is_internal_block, typename Input, typename PrivateReg,
             contraction_type contract_tp = contraction_tp>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-      typename ::Eigen::internal::enable_if<contract_tp == contraction_type::no_local>::type
+      std::enable_if_t<contract_tp == contraction_type::no_local>
       extract_block(const Input &inpt, PrivateReg private_ptr, const std::pair<StorageIndex, StorageIndex> &,
                     const StorageIndex &ncOffset, const StorageIndex cOffset) {
     EIGEN_CONSTEXPR StorageIndex LocalThreadSizeNC =
@@ -783,28 +784,28 @@ class TensorContractionKernel {
 
   template <bool db = Properties::DoubleBuffer, contraction_type ctp = contraction_tp>
   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-      typename ::Eigen::internal::enable_if<db && ctp == contraction_type::local>::type
+      std::enable_if_t<db && ctp == contraction_type::local>
       sync_mem(const cl::sycl::nd_item<1> &, bool &db_offset) noexcept {
     db_offset = !db_offset;
   }
 
   template <bool db = Properties::DoubleBuffer, contraction_type ctp = contraction_tp>
   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-      typename ::Eigen::internal::enable_if<!db && ctp == contraction_type::local>::type
+      std::enable_if_t<!db && ctp == contraction_type::local>
       sync_mem(const cl::sycl::nd_item<1> &itemID, bool &) noexcept {
     itemID.barrier(cl::sycl::access::fence_space::local_space);
   }
 
   template <contraction_type ctp = contraction_tp>
   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-      typename ::Eigen::internal::enable_if<ctp == contraction_type::no_local>::type
+      std::enable_if_t<ctp == contraction_type::no_local>
       sync_mem(const cl::sycl::nd_item<1> &, bool &) noexcept {
     return;
   }
 
   template <bool need_sync, contraction_type ctp = contraction_tp>
   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-      typename ::Eigen::internal::enable_if<need_sync && ctp == contraction_type::no_local>::type
+      std::enable_if_t<need_sync && ctp == contraction_type::no_local>
       sync_thread(const cl::sycl::nd_item<1> &
 #ifdef EIGEN_SYCL_ARM_GPU_CACHE_OPTIMISATION
                       itemID
@@ -818,12 +819,12 @@ class TensorContractionKernel {
   }
   template <bool need_sync, contraction_type ctp = contraction_tp>
   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-      typename ::Eigen::internal::enable_if<need_sync && ctp == contraction_type::local>::type
+      std::enable_if_t<need_sync && ctp == contraction_type::local>
       sync_thread(const cl::sycl::nd_item<1> &itemID) {
     itemID.barrier(cl::sycl::access::fence_space::local_space);
   }
   template <bool need_sync>
-  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename ::Eigen::internal::enable_if<!need_sync>::type sync_thread(
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::enable_if_t<!need_sync> sync_thread(
       const cl::sycl::nd_item<1> &) {
     return;
   }
@@ -894,7 +895,7 @@ class TensorContractionKernel {
   template <typename InputBlockProperties, bool is_internal_block, typename Input, typename Local,
             contraction_type contract_tp = contraction_tp>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-      typename ::Eigen::internal::enable_if<contract_tp == contraction_type::local>::type
+      std::enable_if_t<contract_tp == contraction_type::local>
       extract_block(const Input &inpt, Local local_ptr, const std::pair<StorageIndex, StorageIndex>& local_index,
                     const StorageIndex &ncOffset, const StorageIndex cOffset) {
     EIGEN_CONSTEXPR StorageIndex TileSizeDimNC =
@@ -1234,7 +1235,7 @@ struct GeneralVectorTensor {
  *
  * \param out_res: determines the output tensor containing the contraction result
  *
- * \param rng: determins the total input data size
+ * \param rng: determines the total input data size
  */
 template <typename OutScalar, typename LhsScalar, typename RhsScalar, typename OutAccessor, typename LhsMapper,
           typename RhsMapper, typename StorageIndex, bool Vectorizable>
@@ -1292,7 +1293,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
   typedef TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType, OutputKernelType>, Device> Self;
   typedef TensorContractionEvaluatorBase<Self> Base;
   typedef TensorContractionOp<Indices, LeftArgType, RightArgType, OutputKernelType> XprType;
-  typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar;
+  typedef std::remove_const_t<typename XprType::Scalar> Scalar;
   typedef typename XprType::Index StorageIndex;
   typedef typename XprType::CoeffReturnType CoeffReturnType;
   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
@@ -1305,14 +1306,14 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
     TripleDim(const StorageIndex M_, const StorageIndex N_, const StorageIndex K_) : M(M_), N(N_), K(K_) {}
   };
   enum {
-    Layout = TensorEvaluator<LeftArgType, Device>::Layout,
     PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
     BlockAccess = false,
   };
 
-  static EIGEN_CONSTEXPR int LDims = Base::LDims;
-  static EIGEN_CONSTEXPR int RDims = Base::RDims;
-  static EIGEN_CONSTEXPR int ContractDims = Base::ContractDims;
+  static constexpr int Layout = TensorEvaluator<LeftArgType, Device>::Layout;
+  static constexpr int LDims = Base::LDims;
+  static constexpr int RDims = Base::RDims;
+  static constexpr int ContractDims = Base::ContractDims;
 
   typedef array<StorageIndex, LDims> left_dim_mapper_t;
   typedef array<StorageIndex, RDims> right_dim_mapper_t;
@@ -1321,14 +1322,14 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
   typedef array<StorageIndex, LDims - ContractDims> left_nocontract_t;
   typedef array<StorageIndex, RDims - ContractDims> right_nocontract_t;
 
-  static const int NumDims = LDims + RDims - 2 * ContractDims;
+  static constexpr int NumDims = LDims + RDims - 2 * ContractDims;
 
   typedef DSizes<StorageIndex, NumDims> Dimensions;
 
   typedef TensorEvaluator<typename Base::EvalLeftArgType, Device> LeftEvaluator;
   typedef TensorEvaluator<typename Base::EvalRightArgType, Device> RightEvaluator;
-  typedef typename Eigen::internal::remove_const<typename LeftEvaluator::CoeffReturnType>::type LhsScalar;
-  typedef typename Eigen::internal::remove_const<typename RightEvaluator::CoeffReturnType>::type RhsScalar;
+  typedef std::remove_const_t<typename LeftEvaluator::CoeffReturnType> LhsScalar;
+  typedef std::remove_const_t<typename RightEvaluator::CoeffReturnType> RhsScalar;
 
   typedef typename LeftEvaluator::Dimensions LeftDimensions;
   typedef typename RightEvaluator::Dimensions RightDimensions;
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h
index 21be6ea..19f664c 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h
@@ -13,6 +13,8 @@
 // evaluator for thread pool device
 #ifdef EIGEN_USE_THREADS
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 template<typename Indices, typename LeftArgType, typename RightArgType, typename OutputKernelType>
@@ -25,29 +27,27 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
   typedef TensorContractionEvaluatorBase<Self> Base;
 
   typedef TensorContractionOp<Indices, LeftArgType, RightArgType, OutputKernelType> XprType;
-  typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar;
+  typedef std::remove_const_t<typename XprType::Scalar> Scalar;
   typedef typename XprType::Index Index;
   typedef typename XprType::CoeffReturnType CoeffReturnType;
   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
 
-  enum {
-    Layout = TensorEvaluator<LeftArgType, Device>::Layout,
-  };
+  static constexpr int Layout = TensorEvaluator<LeftArgType, Device>::Layout;
 
   // Most of the code is assuming that both input tensors are ColMajor. If the
   // inputs are RowMajor, we will "cheat" by swapping the LHS and RHS:
   // If we want to compute A * B = C, where A is LHS and B is RHS, the code
   // will pretend B is LHS and A is RHS.
-  typedef typename internal::conditional<
-    static_cast<int>(Layout) == static_cast<int>(ColMajor), LeftArgType, RightArgType>::type EvalLeftArgType;
-  typedef typename internal::conditional<
-    static_cast<int>(Layout) == static_cast<int>(ColMajor), RightArgType, LeftArgType>::type EvalRightArgType;
+  typedef std::conditional_t<
+    static_cast<int>(Layout) == static_cast<int>(ColMajor), LeftArgType, RightArgType> EvalLeftArgType;
+  typedef std::conditional_t<
+    static_cast<int>(Layout) == static_cast<int>(ColMajor), RightArgType, LeftArgType> EvalRightArgType;
 
-  static const int LDims =
+  static constexpr int LDims =
       internal::array_size<typename TensorEvaluator<EvalLeftArgType, Device>::Dimensions>::value;
-  static const int RDims =
+  static constexpr int RDims =
       internal::array_size<typename TensorEvaluator<EvalRightArgType, Device>::Dimensions>::value;
-  static const int ContractDims = internal::array_size<Indices>::value;
+  static constexpr int ContractDims = internal::array_size<Indices>::value;
 
   typedef array<Index, LDims> left_dim_mapper_t;
   typedef array<Index, RDims> right_dim_mapper_t;
@@ -56,13 +56,13 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
   typedef array<Index, LDims - ContractDims> left_nocontract_t;
   typedef array<Index, RDims - ContractDims> right_nocontract_t;
 
-  static const int NumDims = LDims + RDims - 2 * ContractDims;
+  static constexpr int NumDims = LDims + RDims - 2 * ContractDims;
 
   typedef DSizes<Index, NumDims> Dimensions;
 
   // typedefs needed in evalTo
-  typedef typename internal::remove_const<typename EvalLeftArgType::Scalar>::type LhsScalar;
-  typedef typename internal::remove_const<typename EvalRightArgType::Scalar>::type RhsScalar;
+  typedef std::remove_const_t<typename EvalLeftArgType::Scalar> LhsScalar;
+  typedef std::remove_const_t<typename EvalRightArgType::Scalar> RhsScalar;
   typedef typename internal::gebp_traits<LhsScalar, RhsScalar> Traits;
 
   typedef TensorEvaluator<EvalLeftArgType, Device> LeftEvaluator;
@@ -96,7 +96,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
     //     context from the heap.
     //
     // (*) EvalParallelContext & EvalShardedByInnerDimContext owns all the state
-    // and temporary buffers, requried for executing the tensor contraction.
+    // and temporary buffers, required for executing the tensor contraction.
     // They are responsible for cleaning it up after contraction is done.
     static const bool IsEvalInSyncMode =
         std::is_same<DoneCallback, NoCallback>::value;
@@ -599,7 +599,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
     // These variable are rolling over 3 consecutive k slices: first two we are
     // actively executing + one to track completion of kernels in the second
     // slice.
-    static const Index P = 3;
+    static constexpr Index P = 3;
 
     // Handle to the allocated temporary storage for Lhs/Rhs blocks.
     BlockMemHandle packed_mem_;
@@ -698,7 +698,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
           !is_rhs && std::is_same<BlockType, LhsBlock>::value;
       static const bool kIsRhs =
           is_rhs && std::is_same<BlockType, RhsBlock>::value;
-      static_assert(kIsLhs || kIsRhs, "Unkown block type");
+      static_assert(kIsLhs || kIsRhs, "Unknown block type");
 
       using Blocks = ThreadLocalBlocks<BlockType>;
 
@@ -874,7 +874,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
                         lhs_.getSubMapper(m1 * bm_, k * bk_), bk(k), bm(m1));
 
       if (!parallel_pack_ && shard_by_col_) {
-        assert(!use_thread_local);
+        eigen_assert(!use_thread_local);
         signal_packing(k);
       } else {
         signal_switch(k + 1);
@@ -895,7 +895,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
         } else {
           // If we can't guarantee that all kernels in `k` slice will be
           // executed sequentially in current thread, it's no longer safe to use
-          // thread local memory in followig slices along the k dimensions.
+          // thread local memory in following slices along the k dimensions.
           eigen_assert(k > 0);
           can_use_thread_local_packed_[n].store(false,
                                                 std::memory_order_relaxed);
@@ -912,9 +912,9 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
           // On 10000x2x10000 mm zeroing can easily take half of time. Zero (bn
           // x m) row. Safe to do here because all kernels that will write to
           // this memory depend on completion of this task. Note: don't call
-          // device_.memset() here. device_.memset() blocks on thread pool
+          // device_.fill() here. device_.fill() blocks on thread pool
           // worker thread, which can lead to underutilization and deadlocks.
-          memset(buffer_ + n1 * bn_ * m_, 0, bn(n1) * m_ * sizeof(Scalar));
+          std::fill_n(buffer_ + n1 * bn_ * m_, bn(n1) * m_, Scalar(0));
         }
         kernel_.packRhs(&packed_rhs(n, k, n1, use_thread_local),
                         rhs_.getSubMapper(k * bk_, n1 * bn_), bk(k), bn(n1));
@@ -927,7 +927,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
           signal_kernel(m, n, k, sync, use_thread_local);
         }
       } else {
-        assert(!use_thread_local);
+        eigen_assert(!use_thread_local);
         signal_packing(k);
       }
     }
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h
index 09d2da9..bfb7b12 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H
 #define EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /** \class TensorConversionOp
@@ -28,9 +30,9 @@ struct traits<TensorConversionOp<TargetType, XprType> >
   typedef typename traits<XprType>::StorageKind StorageKind;
   typedef typename traits<XprType>::Index Index;
   typedef typename XprType::Nested Nested;
-  typedef typename remove_reference<Nested>::type _Nested;
-  static const int NumDimensions = traits<XprType>::NumDimensions;
-  static const int Layout = traits<XprType>::Layout;
+  typedef std::remove_reference_t<Nested> Nested_;
+  static constexpr int NumDimensions = traits<XprType>::NumDimensions;
+  static constexpr int Layout = traits<XprType>::Layout;
   enum { Flags = 0 };
   typedef typename TypeConversion<Scalar, typename traits<XprType>::PointerType>::type PointerType;
 };
@@ -187,7 +189,7 @@ class TensorConversionOp : public TensorBase<TensorConversionOp<TargetType, XprT
         : m_xpr(xpr) {}
 
     EIGEN_DEVICE_FUNC
-    const typename internal::remove_all<typename XprType::Nested>::type&
+    const internal::remove_all_t<typename XprType::Nested>&
     expression() const { return m_xpr; }
 
   protected:
@@ -250,12 +252,12 @@ struct PacketConv {
   typedef typename internal::unpacket_traits<SrcPacket>::type SrcType;
   typedef typename internal::unpacket_traits<TargetPacket>::type TargetType;
 
-  static const int PacketSize = internal::unpacket_traits<TargetPacket>::size;
+  static constexpr int PacketSize = internal::unpacket_traits<TargetPacket>::size;
 
   template <typename ArgType, typename Device>
   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetPacket run(const TensorEvaluator<ArgType, Device>& impl, Index index) {
     internal::scalar_cast_op<SrcType, TargetType> converter;
-    EIGEN_ALIGN_MAX typename internal::remove_const<TargetType>::type values[PacketSize];
+    EIGEN_ALIGN_MAX std::remove_const_t<TargetType> values[PacketSize];
     EIGEN_UNROLL_LOOP
     for (int i = 0; i < PacketSize; ++i) {
       values[i] = converter(impl.coeff(index+i));
@@ -283,11 +285,11 @@ struct PacketConv<SrcPacket, TargetPacket, LoadMode, true, IsSameT> {
 template <typename SrcPacket, typename TargetPacket, int LoadMode>
 struct PacketConv<SrcPacket, TargetPacket, LoadMode, /*ActuallyVectorize=*/false, /*IsSameT=*/true> {
   typedef typename internal::unpacket_traits<TargetPacket>::type TargetType;
-  static const int PacketSize = internal::unpacket_traits<TargetPacket>::size;
+  static constexpr int PacketSize = internal::unpacket_traits<TargetPacket>::size;
 
   template <typename ArgType, typename Device>
   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetPacket run(const TensorEvaluator<ArgType, Device>& impl, Index index) {
-    EIGEN_ALIGN_MAX typename internal::remove_const<TargetType>::type values[PacketSize];
+    EIGEN_ALIGN_MAX std::remove_const_t<TargetType> values[PacketSize];
     for (int i = 0; i < PacketSize; ++i) values[i] = impl.coeff(index+i);
     return internal::pload<TargetPacket>(values);
   }
@@ -312,11 +314,11 @@ struct TensorEvaluator<const TensorConversionOp<TargetType, ArgType>, Device>
   typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
   typedef TargetType Scalar;
   typedef TargetType CoeffReturnType;
-  typedef typename internal::remove_all<typename internal::traits<ArgType>::Scalar>::type SrcType;
+  typedef internal::remove_all_t<typename internal::traits<ArgType>::Scalar> SrcType;
   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
   typedef typename PacketType<SrcType, Device>::type PacketSourceType;
-  static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
-  static const bool IsSameType = internal::is_same<TargetType, SrcType>::value;
+  static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
+  static constexpr bool IsSameType = internal::is_same<TargetType, SrcType>::value;
   typedef StorageMemory<CoeffReturnType, Device> Storage;
   typedef typename Storage::Type EvaluatorPointerType;
 
@@ -331,11 +333,11 @@ struct TensorEvaluator<const TensorConversionOp<TargetType, ArgType>, Device>
     #endif
     BlockAccess       = TensorEvaluator<ArgType, Device>::BlockAccess,
     PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
-    Layout            = TensorEvaluator<ArgType, Device>::Layout,
     RawAccess         = false
   };
 
-  static const int NumDims = internal::array_size<Dimensions>::value;
+  static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
+  static constexpr int NumDims = internal::array_size<Dimensions>::value;
 
   //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
   typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h
index b20f80b..70d2129 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_CXX11_TENSOR_TENSOR_CONVOLUTION_H
 #define EIGEN_CXX11_TENSOR_TENSOR_CONVOLUTION_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /** \class TensorConvolution
@@ -206,7 +208,7 @@ class IndexMapper {
   }
 
  private:
-  static const int NumDims = internal::array_size<InputDims>::value;
+  static constexpr int NumDims = internal::array_size<InputDims>::value;
   array<Index, NumDims> m_inputStrides;
   array<Index, NumDims> m_outputStrides;
   array<Index, NumDims> m_gpuInputStrides;
@@ -227,12 +229,12 @@ struct traits<TensorConvolutionOp<Dimensions, InputXprType, KernelXprType> >
                                       typename traits<KernelXprType>::Index>::type Index;
   typedef typename InputXprType::Nested LhsNested;
   typedef typename KernelXprType::Nested RhsNested;
-  typedef typename remove_reference<LhsNested>::type _LhsNested;
-  typedef typename remove_reference<RhsNested>::type _RhsNested;
-  static const int NumDimensions = traits<InputXprType>::NumDimensions;
-  static const int Layout = traits<InputXprType>::Layout;
-  typedef typename conditional<Pointer_type_promotion<typename InputXprType::Scalar, Scalar>::val,
-  typename traits<InputXprType>::PointerType, typename traits<KernelXprType>::PointerType>::type PointerType;
+  typedef std::remove_reference_t<LhsNested> LhsNested_;
+  typedef std::remove_reference_t<RhsNested> RhsNested_;
+  static constexpr int NumDimensions = traits<InputXprType>::NumDimensions;
+  static constexpr int Layout = traits<InputXprType>::Layout;
+  typedef std::conditional_t<Pointer_type_promotion<typename InputXprType::Scalar, Scalar>::val,
+  typename traits<InputXprType>::PointerType, typename traits<KernelXprType>::PointerType> PointerType;
 
   enum {
     Flags = 0
@@ -275,11 +277,11 @@ class TensorConvolutionOp : public TensorBase<TensorConvolutionOp<Indices, Input
 
     /** \returns the nested expressions */
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const typename internal::remove_all<typename InputXprType::Nested>::type&
+    const internal::remove_all_t<typename InputXprType::Nested>&
     inputExpression() const { return m_input_xpr; }
 
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const typename internal::remove_all<typename KernelXprType::Nested>::type&
+    const internal::remove_all_t<typename KernelXprType::Nested>&
     kernelExpression() const { return m_kernel_xpr; }
 
   protected:
@@ -294,24 +296,24 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
 {
   typedef TensorConvolutionOp<Indices, InputArgType, KernelArgType> XprType;
 
-  static const int NumDims = internal::array_size<typename TensorEvaluator<InputArgType, Device>::Dimensions>::value;
-  static const int NumKernelDims = internal::array_size<Indices>::value;
+  static constexpr int NumDims = internal::array_size<typename TensorEvaluator<InputArgType, Device>::Dimensions>::value;
+  static constexpr int NumKernelDims = internal::array_size<Indices>::value;
   typedef typename XprType::Index Index;
   typedef DSizes<Index, NumDims> Dimensions;
 
   typedef typename XprType::Scalar Scalar;
   typedef typename XprType::CoeffReturnType CoeffReturnType;
   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
-  static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
+  static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
   typedef StorageMemory<Scalar, Device> Storage;
   typedef typename Storage::Type EvaluatorPointerType;
 
+  static constexpr int Layout = TensorEvaluator<InputArgType, Device>::Layout;
   enum {
     IsAligned = int(TensorEvaluator<InputArgType, Device>::IsAligned) & int(TensorEvaluator<KernelArgType, Device>::IsAligned),
     PacketAccess = int(TensorEvaluator<InputArgType, Device>::PacketAccess) & int(TensorEvaluator<KernelArgType, Device>::PacketAccess),
     BlockAccess = false,
     PreferBlockAccess = false,
-    Layout = TensorEvaluator<InputArgType, Device>::Layout,
     CoordAccess = false,  // to be implemented
     RawAccess = false
   };
@@ -777,18 +779,18 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
 {
   typedef TensorConvolutionOp<Indices, InputArgType, KernelArgType> XprType;
 
-  static const int NumDims =  internal::array_size<typename TensorEvaluator<InputArgType, GpuDevice>::Dimensions>::value;
-  static const int NumKernelDims = internal::array_size<Indices>::value;
+  static constexpr int NumDims =  internal::array_size<typename TensorEvaluator<InputArgType, GpuDevice>::Dimensions>::value;
+  static constexpr int NumKernelDims = internal::array_size<Indices>::value;
   typedef typename XprType::Index Index;
   typedef DSizes<Index, NumDims> Dimensions;
   typedef typename TensorEvaluator<KernelArgType, GpuDevice>::Dimensions KernelDimensions;
 
+  static constexpr int Layout = TensorEvaluator<InputArgType, GpuDevice>::Layout;
   enum {
     IsAligned = TensorEvaluator<InputArgType, GpuDevice>::IsAligned & TensorEvaluator<KernelArgType, GpuDevice>::IsAligned,
     PacketAccess = false,
     BlockAccess = false,
     PreferBlockAccess = false,
-    Layout = TensorEvaluator<InputArgType, GpuDevice>::Layout,
     CoordAccess = false,  // to be implemented
     RawAccess = false
   };
@@ -818,7 +820,7 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
   typedef typename XprType::CoeffReturnType CoeffReturnType;
   typedef typename PacketType<CoeffReturnType, GpuDevice>::type PacketReturnType;
   typedef typename InputArgType::Scalar Scalar;
-  static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
+  static constexpr int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
 
   EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_dimensions; }
 
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConvolutionSycl.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConvolutionSycl.h
index 033318f..3cbc1ab 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConvolutionSycl.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConvolutionSycl.h
@@ -15,6 +15,8 @@
 #ifndef EIGEN_CXX11_TENSOR_TENSOR_CONVOLUTION_SYCL_H
 #define EIGEN_CXX11_TENSOR_TENSOR_CONVOLUTION_SYCL_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /** \class TensorConvolution
@@ -275,9 +277,9 @@ template <typename Indices, typename InputArgType, typename KernelArgType>
 struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelArgType>, Eigen::SyclDevice> {
   typedef TensorConvolutionOp<Indices, InputArgType, KernelArgType> XprType;
 
-  static const int NumDims =
+  static constexpr int NumDims =
       internal::array_size<typename TensorEvaluator<InputArgType, Eigen::SyclDevice>::Dimensions>::value;
-  static const int NumKernelDims = internal::array_size<Indices>::value;
+  static constexpr int NumKernelDims = internal::array_size<Indices>::value;
   typedef typename XprType::Index Index;
   typedef DSizes<Index, NumDims> Dimensions;
   typedef typename TensorEvaluator<KernelArgType, Eigen::SyclDevice>::Dimensions KernelDimensions;
@@ -285,18 +287,18 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
   typedef typename XprType::CoeffReturnType CoeffReturnType;
   typedef typename PacketType<CoeffReturnType, Eigen::SyclDevice>::type PacketReturnType;
   typedef typename InputArgType::Scalar Scalar;
-  static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
+  static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
   typedef StorageMemory<CoeffReturnType, Eigen::SyclDevice> Storage;
   typedef typename Storage::Type EvaluatorPointerType;
   typedef StorageMemory<const CoeffReturnType, Eigen::SyclDevice> KernelStorage;
 
+  static constexpr int Layout = TensorEvaluator<InputArgType, Eigen::SyclDevice>::Layout;
   enum {
     IsAligned = TensorEvaluator<InputArgType, Eigen::SyclDevice>::IsAligned &
                 TensorEvaluator<KernelArgType, Eigen::SyclDevice>::IsAligned,
     PacketAccess = false,
     BlockAccess = false,
     PreferBlockAccess = false,
-    Layout = TensorEvaluator<InputArgType, Eigen::SyclDevice>::Layout,
     CoordAccess = false,  // to be implemented
     RawAccess = false
   };
@@ -392,8 +394,8 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
         const size_t numX = dimensions()[m_indices[0]];
         const size_t numP = dimensions().TotalSize() / numX;
         const auto input_dim = std::array<size_t, 2>{numX, numP};
-        auto global_range = cl::sycl::range<2>{};
-        auto local_range = cl::sycl::range<2>{};
+        auto global_range = cl::sycl::range<2>{1, 1};
+        auto local_range = cl::sycl::range<2>{1, 1};
         const size_t kernel_size = m_kernelImpl.dimensions().TotalSize();
 
         m_device.parallel_for_setup(input_dim, global_range, local_range);
@@ -423,8 +425,8 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
         const size_t numP = dimensions().TotalSize() / (numX * numY);
         auto input_dim = std::array<size_t, 3>{numX, numY, numP};
 
-        auto global_range = cl::sycl::range<3>{};
-        auto local_range = cl::sycl::range<3>{};
+        auto global_range = cl::sycl::range<3>{1, 1, 1};
+        auto local_range = cl::sycl::range<3>{1, 1, 1};
 
         m_device.parallel_for_setup(input_dim, global_range, local_range);
 
@@ -467,8 +469,8 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
 
         internal::IndexMapper<Index, InputDims, 3, Layout> indexMapper(m_inputImpl.dimensions(), kernel_dims, indices);
 
-        auto global_range = cl::sycl::range<3>{};
-        auto local_range = cl::sycl::range<3>{};
+        auto global_range = cl::sycl::range<3>{1, 1, 1};
+        auto local_range = cl::sycl::range<3>{1, 1, 1};
 
         m_device.parallel_for_setup(input_dim, global_range, local_range);
         auto local_memory_range = (local_range + kernel_size - 1);
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h
index 195267c..c3f4bdd 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_CXX11_TENSOR_TENSOR_COST_MODEL_H
 #define EIGEN_CXX11_TENSOR_TENSOR_COST_MODEL_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /** \class TensorEvaluator
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h
index 95a8a84..0634be8 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_CXX11_TENSOR_TENSOR_CUSTOM_OP_H
 #define EIGEN_CXX11_TENSOR_TENSOR_CUSTOM_OP_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /** \class TensorCustomUnaryOp
@@ -27,9 +29,9 @@ struct traits<TensorCustomUnaryOp<CustomUnaryFunc, XprType> >
   typedef typename XprType::StorageKind StorageKind;
   typedef typename XprType::Index Index;
   typedef typename XprType::Nested Nested;
-  typedef typename remove_reference<Nested>::type _Nested;
-  static const int NumDimensions = traits<XprType>::NumDimensions;
-  static const int Layout = traits<XprType>::Layout;
+  typedef std::remove_reference_t<Nested> Nested_;
+  static constexpr int NumDimensions = traits<XprType>::NumDimensions;
+  static constexpr int Layout = traits<XprType>::Layout;
   typedef typename traits<XprType>::PointerType PointerType;
 };
 
@@ -67,7 +69,7 @@ class TensorCustomUnaryOp : public TensorBase<TensorCustomUnaryOp<CustomUnaryFun
   const CustomUnaryFunc& func() const { return m_func; }
 
   EIGEN_DEVICE_FUNC
-  const typename internal::remove_all<typename XprType::Nested>::type&
+  const internal::remove_all_t<typename XprType::Nested>&
   expression() const { return m_expr; }
 
   protected:
@@ -82,22 +84,22 @@ struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Devi
 {
   typedef TensorCustomUnaryOp<CustomUnaryFunc, XprType> ArgType;
   typedef typename internal::traits<ArgType>::Index Index;
-  static const int NumDims = internal::traits<ArgType>::NumDimensions;
+  static constexpr int NumDims = internal::traits<ArgType>::NumDimensions;
   typedef DSizes<Index, NumDims> Dimensions;
-  typedef typename internal::remove_const<typename ArgType::Scalar>::type Scalar;
-  typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
+  typedef std::remove_const_t<typename ArgType::Scalar> Scalar;
+  typedef std::remove_const_t<typename XprType::CoeffReturnType> CoeffReturnType;
   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
-  static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
+  static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
   typedef typename Eigen::internal::traits<XprType>::PointerType TensorPointerType;
   typedef StorageMemory<CoeffReturnType, Device> Storage;
   typedef typename Storage::Type EvaluatorPointerType;
 
+  static constexpr int Layout = TensorEvaluator<XprType, Device>::Layout;
   enum {
     IsAligned = false,
     PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
     BlockAccess = false,
     PreferBlockAccess = false,
-    Layout = TensorEvaluator<XprType, Device>::Layout,
     CoordAccess = false,  // to be implemented
     RawAccess = false
   };
@@ -191,12 +193,12 @@ struct traits<TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType> >
                                       typename traits<RhsXprType>::Index>::type Index;
   typedef typename LhsXprType::Nested LhsNested;
   typedef typename RhsXprType::Nested RhsNested;
-  typedef typename remove_reference<LhsNested>::type _LhsNested;
-  typedef typename remove_reference<RhsNested>::type _RhsNested;
-  static const int NumDimensions = traits<LhsXprType>::NumDimensions;
-  static const int Layout = traits<LhsXprType>::Layout;
-  typedef typename conditional<Pointer_type_promotion<typename LhsXprType::Scalar, Scalar>::val,
-                                typename traits<LhsXprType>::PointerType, typename traits<RhsXprType>::PointerType>::type PointerType;
+  typedef std::remove_reference_t<LhsNested> LhsNested_;
+  typedef std::remove_reference_t<RhsNested> RhsNested_;
+  static constexpr int NumDimensions = traits<LhsXprType>::NumDimensions;
+  static constexpr int Layout = traits<LhsXprType>::Layout;
+  typedef std::conditional_t<Pointer_type_promotion<typename LhsXprType::Scalar, Scalar>::val,
+                        typename traits<LhsXprType>::PointerType, typename traits<RhsXprType>::PointerType> PointerType;
 };
 
 template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType>
@@ -234,11 +236,11 @@ class TensorCustomBinaryOp : public TensorBase<TensorCustomBinaryOp<CustomBinary
   const CustomBinaryFunc& func() const { return m_func; }
 
   EIGEN_DEVICE_FUNC
-  const typename internal::remove_all<typename LhsXprType::Nested>::type&
+  const internal::remove_all_t<typename LhsXprType::Nested>&
   lhsExpression() const { return m_lhs_xpr; }
 
   EIGEN_DEVICE_FUNC
-  const typename internal::remove_all<typename RhsXprType::Nested>::type&
+  const internal::remove_all_t<typename RhsXprType::Nested>&
   rhsExpression() const { return m_rhs_xpr; }
 
   protected:
@@ -254,23 +256,23 @@ struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType,
 {
   typedef TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType> XprType;
   typedef typename internal::traits<XprType>::Index Index;
-  static const int NumDims = internal::traits<XprType>::NumDimensions;
+  static constexpr int NumDims = internal::traits<XprType>::NumDimensions;
   typedef DSizes<Index, NumDims> Dimensions;
   typedef typename XprType::Scalar Scalar;
-  typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
+  typedef std::remove_const_t<typename XprType::CoeffReturnType> CoeffReturnType;
   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
-  static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
+  static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
 
   typedef typename Eigen::internal::traits<XprType>::PointerType TensorPointerType;
   typedef StorageMemory<CoeffReturnType, Device> Storage;
   typedef typename Storage::Type EvaluatorPointerType;
 
+  static constexpr int Layout = TensorEvaluator<LhsXprType, Device>::Layout;
   enum {
     IsAligned = false,
     PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
     BlockAccess = false,
     PreferBlockAccess = false,
-    Layout = TensorEvaluator<LhsXprType, Device>::Layout,
     CoordAccess = false,  // to be implemented
     RawAccess = false
   };
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h
index 96fa46c..8148441 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_CXX11_TENSOR_TENSOR_DEVICE_H
 #define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /** \class TensorDevice
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h
index 46b9d3a..7864a2a 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h
@@ -11,6 +11,8 @@
 #define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_DEFAULT_H
 
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 // Default device for the machine (typically a single cpu core)
@@ -39,6 +41,17 @@ struct DefaultDevice {
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const {
     ::memset(buffer, c, n);
   }
+  template<typename T>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void fill(T* begin, T* end, const T& value) const {
+#ifdef EIGEN_GPU_COMPILE_PHASE
+    // std::fill is not a device function, so resort to simple loop.
+    for (T* it = begin; it != end; ++it) {
+      *it = value;
+    }
+#else
+    std::fill(begin, end, value);
+#endif
+  }
   template<typename Type>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Type get(Type data) const { 
     return data;
@@ -82,6 +95,10 @@ struct DefaultDevice {
     return firstLevelCacheSize();
 #endif
   }
+  
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void synchronize() const {
+    // Nothing.  Default device operations are synchronous.
+  }
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const {
 #if !defined(EIGEN_GPU_COMPILE_PHASE)
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceGpu.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceGpu.h
index ec2e3cb..8ea1bf0 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceGpu.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceGpu.h
@@ -15,6 +15,8 @@
 // A separate header (included at the end of this file) will undefine all 
 #include "TensorGpuHipCudaDefines.h"
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 static const int kGpuScratchSize = 1024;
@@ -128,7 +130,13 @@ class GpuStreamDevice : public StreamInterface {
  public:
   // Use the default stream on the current device
   GpuStreamDevice() : stream_(&default_stream), scratch_(NULL), semaphore_(NULL) {
-    gpuGetDevice(&device_);
+    gpuError_t status = gpuGetDevice(&device_);
+    if (status != gpuSuccess) {
+      std::cerr << "Failed to get the GPU devices "
+                << gpuGetErrorString(status)
+                << std::endl;
+      gpu_assert(status == gpuSuccess);
+    }
   }
   // Use the default stream on the specified device
   GpuStreamDevice(int device) : stream_(&default_stream), device_(device), scratch_(NULL), semaphore_(NULL) {}
@@ -139,7 +147,13 @@ class GpuStreamDevice : public StreamInterface {
   GpuStreamDevice(const gpuStream_t* stream, int device = -1)
       : stream_(stream), device_(device), scratch_(NULL), semaphore_(NULL) {
     if (device < 0) {
-      gpuGetDevice(&device_);
+      gpuError_t status = gpuGetDevice(&device_);
+      if (status != gpuSuccess) {
+        std::cerr << "Failed to get the GPU devices "
+                  << gpuGetErrorString(status)
+                  << std::endl;
+        gpu_assert(status == gpuSuccess);
+      }
     } else {
       int num_devices;
       gpuError_t err = gpuGetDeviceCount(&num_devices);
@@ -281,10 +295,49 @@ struct GpuDevice {
     EIGEN_UNUSED_VARIABLE(err)
     gpu_assert(err == gpuSuccess);
 #else
+  EIGEN_UNUSED_VARIABLE(buffer)
+  EIGEN_UNUSED_VARIABLE(c)
+  EIGEN_UNUSED_VARIABLE(n)
   eigen_assert(false && "The default device should be used instead to generate kernel code");
 #endif
   }
 
+  template<typename T>
+  EIGEN_STRONG_INLINE void fill(T* begin, T* end, const T& value) const {
+#ifndef EIGEN_GPU_COMPILE_PHASE
+    const size_t count = end - begin;
+    // Split value into bytes and run memset with stride.
+    const int value_size = sizeof(value);
+    char* buffer = (char*)begin;
+    char* value_bytes = (char*)(&value);
+    gpuError_t err;
+    EIGEN_UNUSED_VARIABLE(err)
+    
+    // If all value bytes are equal, then a single memset can be much faster.
+    bool use_single_memset = true;
+    for (int i=1; i<value_size; ++i) {
+      if (value_bytes[i] != value_bytes[0]) {
+        use_single_memset = false;
+      } 
+    }
+    
+    if (use_single_memset) {
+      err = gpuMemsetAsync(buffer, value_bytes[0], count * sizeof(T), stream_->stream());
+      gpu_assert(err == gpuSuccess);
+    } else {
+      for (int b=0; b<value_size; ++b) {
+        err = gpuMemset2DAsync(buffer+b, value_size, value_bytes[b], 1, count, stream_->stream());
+        gpu_assert(err == gpuSuccess);
+      }
+    }
+#else
+    EIGEN_UNUSED_VARIABLE(begin)
+    EIGEN_UNUSED_VARIABLE(end)
+    EIGEN_UNUSED_VARIABLE(value)
+    eigen_assert(false && "The default device should be used instead to generate kernel code");
+#endif
+  }
+
   EIGEN_STRONG_INLINE size_t numThreads() const {
     // FIXME
     return 32;
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h
index df591c2..84ebe38 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h
@@ -16,6 +16,8 @@
 #define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_SYCL_H
 #include <unordered_set>
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace TensorSycl {
@@ -134,6 +136,15 @@ class QueueInterface {
                          this->exception_caught_ = this->sycl_async_handler(l);
                        },
                        num_threads) {}
+  
+  explicit QueueInterface(
+      const cl::sycl::queue& q, unsigned num_threads = std::thread::hardware_concurrency())
+      : m_queue(q),
+#ifdef EIGEN_SYCL_USE_PROGRAM_CLASS
+        m_prog(m_queue.get_context(), get_sycl_supported_devices()),
+#endif
+        m_thread_pool(num_threads),
+        m_device_info(m_queue) {}
 
 #ifdef EIGEN_SYCL_USE_PROGRAM_CLASS
   EIGEN_STRONG_INLINE cl::sycl::program &program() const { return m_prog; }
@@ -244,7 +255,7 @@ class QueueInterface {
   }
 
   /// The memcpyHostToDevice is used to copy the data from host to device
-  /// The destination pointer could be deleted before the copy happend which is
+  /// The destination pointer could be deleted before the copy happened which is
   /// why a callback function is needed. By default if none is provided, the
   /// function is blocking.
   EIGEN_STRONG_INLINE void memcpyHostToDevice(
@@ -272,7 +283,7 @@ class QueueInterface {
   }
 
   /// The memcpyDeviceToHost is used to copy the data from device to host.
-  /// The source pointer could be deleted before the copy happend which is
+  /// The source pointer could be deleted before the copy happened which is
   /// why a callback function is needed. By default if none is provided, the
   /// function is blocking.
   EIGEN_STRONG_INLINE void memcpyDeviceToHost(
@@ -327,13 +338,27 @@ class QueueInterface {
     if (n == 0) {
       return;
     }
-    n /= sizeof(buffer_scalar_t);
     auto f = [&](cl::sycl::handler &cgh) {
-      auto dst_acc = get_range_accessor<write_mode>(cgh, data, n);
-      // The cast to uint8_t is here to match the behaviour of the standard
-      // memset. The cast to buffer_scalar_t is needed to match the type of the
-      // accessor (in case buffer_scalar_t is not uint8_t)
-      cgh.fill(dst_acc, static_cast<buffer_scalar_t>(static_cast<uint8_t>(c)));
+      // Get a typed range accesser to ensure we fill each byte, in case
+      // `buffer_scalar_t` is not (u)int8_t.
+      auto dst_acc = get_typed_range_accessor<write_mode, uint8_t>(cgh, data, n);
+      cgh.fill(dst_acc, static_cast<uint8_t>(c));
+    };
+    cl::sycl::event e;
+    EIGEN_SYCL_TRY_CATCH(e = m_queue.submit(f));
+    async_synchronize(e);
+  }
+
+  template<typename T>
+  EIGEN_STRONG_INLINE void fill(T* begin, T* end, const T& value) const {
+    static const auto write_mode = cl::sycl::access::mode::discard_write;
+    if (begin == end) {
+      return;
+    }
+    const ptrdiff_t count = end - begin;
+    auto f = [&](cl::sycl::handler &cgh) {
+      auto dst_acc = get_typed_range_accessor<write_mode, T>(cgh, begin, count);
+      cgh.fill(dst_acc, value);
     };
     cl::sycl::event e;
     EIGEN_SYCL_TRY_CATCH(e = m_queue.submit(f));
@@ -359,15 +384,17 @@ class QueueInterface {
 
     auto original_buffer = pMapper.get_buffer(ptr);
     const ptrdiff_t offset = pMapper.get_offset(ptr);
+    eigen_assert(offset % sizeof(T) == 0 && "The offset must be a multiple of sizeof(T)");
+    eigen_assert(original_buffer.get_size() % sizeof(T) == 0 && "The buffer size must be a multiple of sizeof(T)");
     const ptrdiff_t typed_offset = offset / sizeof(T);
     eigen_assert(typed_offset >= 0);
     const auto typed_size = original_buffer.get_size() / sizeof(T);
     auto buffer = original_buffer.template reinterpret<
-        typename Eigen::internal::remove_const<T>::type>(
+        std::remove_const_t<T>>(
         cl::sycl::range<1>(typed_size));
     const ptrdiff_t size = buffer.get_count() - typed_offset;
     eigen_assert(size >= 0);
-    typedef cl::sycl::accessor<typename Eigen::internal::remove_const<T>::type,
+    typedef cl::sycl::accessor<std::remove_const_t<T>,
                                1, AcMd, global_access, is_place_holder>
         placeholder_accessor_t;
     const auto start_ptr = static_cast<internal_ptr_t>(ptr) - offset;
@@ -395,6 +422,40 @@ class QueueInterface {
         cgh, cl::sycl::range<1>(n_bytes), cl::sycl::id<1>(offset));
   }
 
+  /// Get a range accessor to the virtual pointer's device memory with a
+  /// specified type and count.
+  template <cl::sycl::access::mode AcMd, typename T, typename Index>
+  EIGEN_STRONG_INLINE cl::sycl::accessor<
+      T, 1, AcMd, cl::sycl::access::target::global_buffer>
+  get_typed_range_accessor(cl::sycl::handler &cgh, const void *ptr,
+                     const Index count) const {
+    static const auto global_access = cl::sycl::access::target::global_buffer;
+    eigen_assert(count >= 0);
+    std::lock_guard<std::mutex> lock(pmapper_mutex_);
+    auto buffer = pMapper.get_buffer(ptr);
+    const ptrdiff_t offset = pMapper.get_offset(ptr);
+    eigen_assert(offset >= 0);
+
+    // Technically we should create a subbuffer for the desired range,
+    // then reinterpret that.  However, I was not able to get changes to reflect
+    // in the original buffer (only the subbuffer and reinterpretted buffer).
+    // This current implementation now has the restriction that the buffer
+    // offset and original buffer size must be a multiple of sizeof(T).
+    // Note that get_range_accessor(void*) currently has the same restriction.
+    //
+    // auto subbuffer = cl::sycl::buffer<buffer_scalar_t, 1>(buffer, 
+    //     cl::sycl::id<1>(offset), cl::sycl::range<1>(n_bytes));
+    eigen_assert(offset % sizeof(T) == 0 && "The offset must be a multiple of sizeof(T)");
+    eigen_assert(buffer.get_size() % sizeof(T) == 0 && "The buffer size must be a multiple of sizeof(T)");
+    const ptrdiff_t typed_offset = offset / sizeof(T);
+    const size_t typed_size = buffer.get_size() / sizeof(T);
+    auto reint = buffer.template reinterpret<
+        std::remove_const_t<T>>(
+        cl::sycl::range<1>(typed_size));
+    return reint.template get_access<AcMd, global_access>(
+        cgh, cl::sycl::range<1>(count), cl::sycl::id<1>(typed_offset));
+  }
+
   /// Creation of sycl accessor for a buffer. This function first tries to find
   /// the buffer in the buffer_map. If found it gets the accessor from it, if
   /// not, the function then adds an entry by creating a sycl buffer for that
@@ -663,7 +724,7 @@ class QueueInterface {
   EIGEN_STRONG_INLINE int majorDeviceVersion() const { return 1; }
 
   EIGEN_STRONG_INLINE unsigned long maxSyclThreadsPerMultiProcessor() const {
-    // OpenCL doesnot have such concept
+    // OpenCL does not have such a concept
     return 2;
   }
 
@@ -951,6 +1012,11 @@ struct SyclDevice : public SyclDeviceBase {
   EIGEN_STRONG_INLINE void memset(void *data, int c, size_t n) const {
     queue_stream()->memset(data, c, n);
   }
+  /// the fill function
+  template<typename T>
+  EIGEN_STRONG_INLINE void fill(T* begin, T* end, const T& value) const {
+    queue_stream()->fill(begin, end, value);
+  }
   /// returning the sycl queue
   EIGEN_STRONG_INLINE cl::sycl::queue &sycl_queue() const {
     return queue_stream()->sycl_queue();
@@ -978,7 +1044,7 @@ struct SyclDevice : public SyclDeviceBase {
     return queue_stream()->maxWorkItemSizes();
   }
   EIGEN_STRONG_INLINE unsigned long maxSyclThreadsPerMultiProcessor() const {
-    // OpenCL doesnot have such concept
+    // OpenCL does not have such a concept
     return queue_stream()->maxSyclThreadsPerMultiProcessor();
   }
   EIGEN_STRONG_INLINE size_t sharedMemPerBlock() const {
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
index e524b53..6accc66 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
@@ -10,6 +10,8 @@
 #if defined(EIGEN_USE_THREADS) && !defined(EIGEN_CXX11_TENSOR_TENSOR_DEVICE_THREAD_POOL_H)
 #define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_THREAD_POOL_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 // Runs an arbitrary function and then calls Notify() on the passed in
@@ -122,6 +124,11 @@ struct ThreadPoolDevice {
     ::memset(buffer, c, n);
   }
 
+  template<typename T>
+  EIGEN_STRONG_INLINE void fill(T* begin, T* end, const T& value) const {
+    std::fill(begin, end, value);
+  }
+
   EIGEN_STRONG_INLINE int numThreads() const {
     return num_threads_;
   }
@@ -140,6 +147,10 @@ struct ThreadPoolDevice {
     // The l3 cache size is shared between all the cores.
     return l3CacheSize() / num_threads_;
   }
+  
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void synchronize() const {
+    // Nothing.  Threadpool device operations are synchronous.
+  }
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const {
     // Should return an enum that encodes the ISA supported by the CPU
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h
index 1a30e45..c58838e 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_CXX11_TENSOR_TENSOR_DIMENSION_LIST_H
 #define EIGEN_CXX11_TENSOR_TENSOR_DIMENSION_LIST_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /** \internal
@@ -43,8 +45,6 @@ template<DenseIndex n, typename Index, std::size_t Rank> const Index array_get(c
   return n;
 }
 
-
-#if EIGEN_HAS_CONSTEXPR
 template <typename Index, std::size_t Rank>
 struct index_known_statically_impl<DimensionList<Index, Rank> > {
   EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex) {
@@ -136,99 +136,6 @@ struct index_statically_lt_impl<const DimensionList<Index, Rank> > {
   }
 };
 
-#else
-template <typename Index, std::size_t Rank>
-struct index_known_statically_impl<DimensionList<Index, Rank> > {
-  EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE bool run(const DenseIndex) {
-    return true;
-  }
-};
-template <typename Index, std::size_t Rank>
-struct index_known_statically_impl<const DimensionList<Index, Rank> > {
-  EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE bool run(const DenseIndex) {
-    return true;
-  }
-};
-
-template <typename Index, std::size_t Rank>
-struct all_indices_known_statically_impl<DimensionList<Index, Rank> > {
-  EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE bool run() {
-    return true;
-  }
-};
-template <typename Index, std::size_t Rank>
-struct all_indices_known_statically_impl<const DimensionList<Index, Rank> > {
-  EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE bool run() {
-    return true;
-  }
-};
-
-template <typename Index, std::size_t Rank>
-struct indices_statically_known_to_increase_impl<DimensionList<Index, Rank> > {
-  static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run() {
-    return true;
-  }
-};
-template <typename Index, std::size_t Rank>
-struct indices_statically_known_to_increase_impl<const DimensionList<Index, Rank> > {
-  static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run() {
-    return true;
-  }
-};
-
-template <typename Index, std::size_t Rank>
-struct index_statically_eq_impl<DimensionList<Index, Rank> > {
-  static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) {
-    return false;
-  }
-};
-template <typename Index, std::size_t Rank>
-struct index_statically_eq_impl<const DimensionList<Index, Rank> > {
-  static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) {
-    return false;
-  }
-};
-
-template <typename Index, std::size_t Rank>
-struct index_statically_ne_impl<DimensionList<Index, Rank> > {
-  static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex){
-    return false;
-  }
-};
-template <typename Index, std::size_t Rank>
-struct index_statically_ne_impl<const DimensionList<Index, Rank> > {
-  static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) {
-    return false;
-  }
-};
-
-template <typename Index, std::size_t Rank>
-struct index_statically_gt_impl<DimensionList<Index, Rank> > {
-  static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) {
-    return false;
-  }
-};
-template <typename Index, std::size_t Rank>
-struct index_statically_gt_impl<const DimensionList<Index, Rank> > {
-  static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) {
-    return false;
-  }
-};
-
-template <typename Index, std::size_t Rank>
-struct index_statically_lt_impl<DimensionList<Index, Rank> > {
-  static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) {
-    return false;
-  }
-};
-template <typename Index, std::size_t Rank>
-struct index_statically_lt_impl<const DimensionList<Index, Rank> > {
-  static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) {
-    return false;
-  }
-};
-#endif
-
 }  // end namespace internal
 }  // end namespace Eigen
 
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h
index f0f1e83..6d9e9dc 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h
@@ -11,6 +11,8 @@
 #define EIGEN_CXX11_TENSOR_TENSOR_DIMENSIONS_H
 
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /** \internal
@@ -109,12 +111,10 @@ struct Sizes {
   explicit EIGEN_DEVICE_FUNC Sizes(const array<DenseIndex, Base::count>& /*indices*/) {
     // todo: add assertion
   }
-#if EIGEN_HAS_VARIADIC_TEMPLATES
   template <typename... DenseIndex> EIGEN_DEVICE_FUNC Sizes(DenseIndex...) { }
   explicit EIGEN_DEVICE_FUNC Sizes(std::initializer_list<std::ptrdiff_t> /*l*/) {
     // todo: add assertion
   }
-#endif
 
   template <typename T> Sizes& operator = (const T& /*other*/) {
     // add assertion failure if the size of other is different
@@ -171,28 +171,16 @@ template <std::ptrdiff_t V1=0, std::ptrdiff_t V2=0, std::ptrdiff_t V3=0, std::pt
   explicit Sizes(const array<DenseIndex, Base::count>& /*indices*/) {
     // todo: add assertion
   }
+
   template <typename T> Sizes& operator = (const T& /*other*/) {
     // add assertion failure if the size of other is different
     return *this;
   }
 
-#if EIGEN_HAS_VARIADIC_TEMPLATES
   template <typename... DenseIndex> Sizes(DenseIndex... /*indices*/) { }
   explicit Sizes(std::initializer_list<std::ptrdiff_t>) {
     // todo: add assertion
   }
-#else
-  EIGEN_DEVICE_FUNC explicit Sizes(const DenseIndex) {
-  }
-  EIGEN_DEVICE_FUNC Sizes(const DenseIndex, const DenseIndex) {
-  }
-  EIGEN_DEVICE_FUNC Sizes(const DenseIndex, const DenseIndex, const DenseIndex) {
-  }
-  EIGEN_DEVICE_FUNC Sizes(const DenseIndex, const DenseIndex, const DenseIndex, const DenseIndex) {
-  }
-  EIGEN_DEVICE_FUNC Sizes(const DenseIndex, const DenseIndex, const DenseIndex, const DenseIndex, const DenseIndex) {
-  }
-#endif
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index operator[] (const Index index) const {
     switch (index) {
@@ -296,20 +284,19 @@ struct DSizes : array<DenseIndex, NumDims> {
   EIGEN_DEVICE_FUNC
   explicit DSizes(const array<OtherIndex, NumDims>& other,
                   // Default template parameters require c++11.
-                  typename internal::enable_if<
+                  std::enable_if_t<
                      internal::is_same<
                          DenseIndex,
                          typename internal::promote_index_type<
                              DenseIndex,
                              OtherIndex
                          >::type
-                     >::value, void*>::type = 0) {
+                     >::value, void*> = 0) {
     for (int i = 0; i < NumDims; ++i) {
       (*this)[i] = static_cast<DenseIndex>(other[i]);
     }
   }
 
-#ifdef EIGEN_HAS_INDEX_LIST
   template <typename FirstType, typename... OtherTypes>
   EIGEN_DEVICE_FUNC
   explicit DSizes(const Eigen::IndexList<FirstType, OtherTypes...>& dimensions) {
@@ -317,7 +304,6 @@ struct DSizes : array<DenseIndex, NumDims> {
       (*this)[i] = dimensions[i];
     }
   }
-#endif
 
 #ifndef EIGEN_EMULATE_CXX11_META_H
   template <typename std::ptrdiff_t... Indices>
@@ -335,39 +321,10 @@ struct DSizes : array<DenseIndex, NumDims> {
   }
 #endif
 
-#if EIGEN_HAS_VARIADIC_TEMPLATES
   template<typename... IndexTypes> EIGEN_DEVICE_FUNC
   EIGEN_STRONG_INLINE explicit DSizes(DenseIndex firstDimension, DenseIndex secondDimension, IndexTypes... otherDimensions) : Base({{firstDimension, secondDimension, otherDimensions...}}) {
     EIGEN_STATIC_ASSERT(sizeof...(otherDimensions) + 2 == NumDims, YOU_MADE_A_PROGRAMMING_MISTAKE)
   }
-#else
-  EIGEN_DEVICE_FUNC DSizes(const DenseIndex i0, const DenseIndex i1) {
-    eigen_assert(NumDims == 2);
-    (*this)[0] = i0;
-    (*this)[1] = i1;
-  }
-  EIGEN_DEVICE_FUNC DSizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2) {
-    eigen_assert(NumDims == 3);
-    (*this)[0] = i0;
-    (*this)[1] = i1;
-    (*this)[2] = i2;
-  }
-  EIGEN_DEVICE_FUNC DSizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2, const DenseIndex i3) {
-    eigen_assert(NumDims == 4);
-    (*this)[0] = i0;
-    (*this)[1] = i1;
-    (*this)[2] = i2;
-    (*this)[3] = i3;
-  }
-  EIGEN_DEVICE_FUNC DSizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2, const DenseIndex i3, const DenseIndex i4) {
-    eigen_assert(NumDims == 5);
-    (*this)[0] = i0;
-    (*this)[1] = i1;
-    (*this)[2] = i2;
-    (*this)[3] = i3;
-    (*this)[4] = i4;
-  }
-#endif
 
   EIGEN_DEVICE_FUNC DSizes& operator = (const array<DenseIndex, NumDims>& other) {
     *static_cast<Base*>(this) = other;
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h
index a48d035..0e9cdfe 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_CXX11_TENSOR_TENSOR_EVAL_TO_H
 #define EIGEN_CXX11_TENSOR_TENSOR_EVAL_TO_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /** \class TensorForcedEval
@@ -29,9 +31,9 @@ struct traits<TensorEvalToOp<XprType, MakePointer_> >
   typedef typename XprTraits::StorageKind StorageKind;
   typedef typename XprTraits::Index Index;
   typedef typename XprType::Nested Nested;
-  typedef typename remove_reference<Nested>::type _Nested;
-  static const int NumDimensions = XprTraits::NumDimensions;
-  static const int Layout = XprTraits::Layout;
+  typedef std::remove_reference_t<Nested> Nested_;
+  static constexpr int NumDimensions = XprTraits::NumDimensions;
+  static constexpr int Layout = XprTraits::Layout;
   typedef typename MakePointer_<Scalar>::Type PointerType;
 
   enum {
@@ -70,19 +72,19 @@ class TensorEvalToOp : public TensorBase<TensorEvalToOp<XprType, MakePointer_>,
   public:
   typedef typename Eigen::internal::traits<TensorEvalToOp>::Scalar Scalar;
   typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
-  typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
+  typedef std::remove_const_t<typename XprType::CoeffReturnType> CoeffReturnType;
   typedef typename MakePointer_<CoeffReturnType>::Type PointerType;
   typedef typename Eigen::internal::nested<TensorEvalToOp>::type Nested;
   typedef typename Eigen::internal::traits<TensorEvalToOp>::StorageKind StorageKind;
   typedef typename Eigen::internal::traits<TensorEvalToOp>::Index Index;
 
-  static const int NumDims = Eigen::internal::traits<TensorEvalToOp>::NumDimensions;
+  static constexpr int NumDims = Eigen::internal::traits<TensorEvalToOp>::NumDimensions;
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvalToOp(PointerType buffer, const XprType& expr)
       : m_xpr(expr), m_buffer(buffer) {}
 
     EIGEN_DEVICE_FUNC
-    const typename internal::remove_all<typename XprType::Nested>::type&
+    const internal::remove_all_t<typename XprType::Nested>&
     expression() const { return m_xpr; }
 
     EIGEN_DEVICE_FUNC PointerType buffer() const { return m_buffer; }
@@ -101,9 +103,9 @@ struct TensorEvaluator<const TensorEvalToOp<ArgType, MakePointer_>, Device>
   typedef typename ArgType::Scalar Scalar;
   typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
   typedef typename XprType::Index Index;
-  typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
+  typedef std::remove_const_t<typename XprType::CoeffReturnType> CoeffReturnType;
   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
-  static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
+  static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
   typedef typename Eigen::internal::traits<XprType>::PointerType TensorPointerType;
   typedef StorageMemory<CoeffReturnType, Device> Storage;
   typedef typename Storage::Type EvaluatorPointerType;
@@ -112,12 +114,12 @@ struct TensorEvaluator<const TensorEvalToOp<ArgType, MakePointer_>, Device>
     PacketAccess      = TensorEvaluator<ArgType, Device>::PacketAccess,
     BlockAccess       = true,
     PreferBlockAccess = false,
-    Layout            = TensorEvaluator<ArgType, Device>::Layout,
     CoordAccess       = false,  // to be implemented
     RawAccess         = true
   };
 
-  static const int NumDims = internal::traits<ArgType>::NumDimensions;
+  static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
+  static constexpr int NumDims = internal::traits<ArgType>::NumDimensions;
 
   //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
   typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
index 3aff7fa..92ad0f5 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H
 #define EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /** \class TensorEvaluator
@@ -33,26 +35,26 @@ struct TensorEvaluator
   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
   typedef typename Derived::Dimensions Dimensions;
   typedef Derived XprType;
-  static const int PacketSize =  PacketType<CoeffReturnType, Device>::size;
+  static constexpr int PacketSize =  PacketType<CoeffReturnType, Device>::size;
   typedef typename internal::traits<Derived>::template MakePointer<Scalar>::Type TensorPointerType;
   typedef StorageMemory<Scalar, Device> Storage;
   typedef typename Storage::Type EvaluatorPointerType;
 
   // NumDimensions is -1 for variable dim tensors
-  static const int NumCoords = internal::traits<Derived>::NumDimensions > 0 ?
-                               internal::traits<Derived>::NumDimensions : 0;
+  static constexpr int NumCoords = internal::traits<Derived>::NumDimensions > 0 ?
+                                   internal::traits<Derived>::NumDimensions : 0;
+  static constexpr int Layout = Derived::Layout;
 
   enum {
     IsAligned          = Derived::IsAligned,
     PacketAccess       = (PacketType<CoeffReturnType, Device>::size > 1),
-    BlockAccess        = internal::is_arithmetic<typename internal::remove_const<Scalar>::type>::value,
+    BlockAccess        = internal::is_arithmetic<std::remove_const_t<Scalar>>::value,
     PreferBlockAccess  = false,
-    Layout             = Derived::Layout,
     CoordAccess        = NumCoords > 0,
     RawAccess          = true
   };
 
-  typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
+  typedef std::remove_const_t<Scalar> ScalarNoConst;
 
   //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
   typedef internal::TensorBlockDescriptor<NumCoords, Index> TensorBlockDesc;
@@ -73,7 +75,7 @@ struct TensorEvaluator
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dims; }
 
   EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType dest) {
-    if (!NumTraits<typename internal::remove_const<Scalar>::type>::RequireInitialization && dest) {
+    if (!NumTraits<std::remove_const_t<Scalar>>::RequireInitialization && dest) {
       m_device.memcpy((void*)(m_device.get(dest)), m_device.get(m_data), m_dims.TotalSize() * sizeof(Scalar));
       return false;
     }
@@ -113,7 +115,7 @@ struct TensorEvaluator
   // float element will be loaded, otherwise 0 will be loaded.
   // Function has been templatized to enable Sfinae.
   template <typename PacketReturnTypeT> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  typename internal::enable_if<internal::unpacket_traits<PacketReturnTypeT>::masked_load_available, PacketReturnTypeT>::type
+  std::enable_if_t<internal::unpacket_traits<PacketReturnTypeT>::masked_load_available, PacketReturnTypeT>
   partialPacket(Index index, typename internal::unpacket_traits<PacketReturnTypeT>::mask_t umask) const
   {
     return internal::ploadu<PacketReturnTypeT>(m_data + index, umask);
@@ -157,14 +159,14 @@ struct TensorEvaluator
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
   block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
           bool /*root_of_expr_ast*/ = false) const {
-    assert(m_data != NULL);
+    eigen_assert(m_data != NULL);
     return TensorBlock::materialize(m_data, m_dims, desc, scratch);
   }
 
   template<typename TensorBlock>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock(
       const TensorBlockDesc& desc, const TensorBlock& block) {
-    assert(m_data != NULL);
+    eigen_assert(m_data != NULL);
 
     typedef typename TensorBlock::XprType TensorBlockExpr;
     typedef internal::TensorBlockAssignment<Scalar, NumCoords, TensorBlockExpr,
@@ -192,7 +194,7 @@ struct TensorEvaluator
   const Device EIGEN_DEVICE_REF m_device;
 };
 
-namespace {
+namespace internal {
 template <typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 T loadConstant(const T* address) {
   return *address;
@@ -219,8 +221,7 @@ T &loadConstant(const Eigen::TensorSycl::internal::RangeAccess<AcMd, T> &address
   return *address;
 }
 #endif
-}
-
+}  // namespace internal
 
 // Default evaluator for rvalues
 template<typename Derived, typename Device>
@@ -236,19 +237,19 @@ struct TensorEvaluator<const Derived, Device>
   typedef StorageMemory<const Scalar, Device> Storage;
   typedef typename Storage::Type EvaluatorPointerType;
 
-  typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
+  typedef std::remove_const_t<Scalar> ScalarNoConst;
 
   // NumDimensions is -1 for variable dim tensors
-  static const int NumCoords = internal::traits<Derived>::NumDimensions > 0 ?
-                               internal::traits<Derived>::NumDimensions : 0;
-  static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
+  static constexpr int NumCoords = internal::traits<Derived>::NumDimensions > 0 ?
+                                   internal::traits<Derived>::NumDimensions : 0;
+  static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
+  static constexpr int Layout = Derived::Layout;
 
   enum {
     IsAligned         = Derived::IsAligned,
     PacketAccess      = (PacketType<CoeffReturnType, Device>::size > 1),
     BlockAccess       = internal::is_arithmetic<ScalarNoConst>::value,
     PreferBlockAccess = false,
-    Layout            = Derived::Layout,
     CoordAccess       = NumCoords > 0,
     RawAccess         = true
   };
@@ -269,7 +270,7 @@ struct TensorEvaluator<const Derived, Device>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dims; }
 
   EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
-    if (!NumTraits<typename internal::remove_const<Scalar>::type>::RequireInitialization && data) {
+    if (!NumTraits<std::remove_const_t<Scalar>>::RequireInitialization && data) {
       m_device.memcpy((void*)(m_device.get(data)),m_device.get(m_data), m_dims.TotalSize() * sizeof(Scalar));
       return false;
     }
@@ -289,7 +290,7 @@ struct TensorEvaluator<const Derived, Device>
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
     eigen_assert(m_data != NULL);
-    return loadConstant(m_data+index);
+    return internal::loadConstant(m_data+index);
   }
 
   template<int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
@@ -304,7 +305,7 @@ struct TensorEvaluator<const Derived, Device>
   // float element will be loaded, otherwise 0 will be loaded.
   // Function has been templatized to enable Sfinae.
   template <typename PacketReturnTypeT> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  typename internal::enable_if<internal::unpacket_traits<PacketReturnTypeT>::masked_load_available, PacketReturnTypeT>::type
+  std::enable_if_t<internal::unpacket_traits<PacketReturnTypeT>::masked_load_available, PacketReturnTypeT>
   partialPacket(Index index, typename internal::unpacket_traits<PacketReturnTypeT>::mask_t umask) const
   {
     return internal::ploadu<PacketReturnTypeT>(m_data + index, umask);
@@ -314,7 +315,7 @@ struct TensorEvaluator<const Derived, Device>
     eigen_assert(m_data != NULL);
     const Index index = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? m_dims.IndexOfColMajor(coords)
                         : m_dims.IndexOfRowMajor(coords);
-    return loadConstant(m_data+index);
+    return internal::loadConstant(m_data+index);
   }
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
@@ -330,7 +331,7 @@ struct TensorEvaluator<const Derived, Device>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
   block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
           bool /*root_of_expr_ast*/ = false) const {
-    assert(m_data != NULL);
+    eigen_assert(m_data != NULL);
     return TensorBlock::materialize(m_data, m_dims, desc, scratch);
   }
 
@@ -365,11 +366,12 @@ struct TensorEvaluator<const TensorCwiseNullaryOp<NullaryOp, ArgType>, Device>
   typedef typename XprType::Scalar Scalar;
   typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
-  static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
+  static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
   typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
   typedef StorageMemory<CoeffReturnType, Device> Storage;
   typedef typename Storage::Type EvaluatorPointerType;
 
+  static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
   enum {
     IsAligned = true,
     PacketAccess = internal::functor_traits<NullaryOp>::PacketAccess
@@ -379,7 +381,6 @@ struct TensorEvaluator<const TensorCwiseNullaryOp<NullaryOp, ArgType>, Device>
     ,
     BlockAccess = false,
     PreferBlockAccess = false,
-    Layout = TensorEvaluator<ArgType, Device>::Layout,
     CoordAccess = false,  // to be implemented
     RawAccess = false
   };
@@ -443,13 +444,13 @@ struct TensorEvaluator<const TensorCwiseUnaryOp<UnaryOp, ArgType>, Device>
 {
   typedef TensorCwiseUnaryOp<UnaryOp, ArgType> XprType;
 
+  static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
   enum {
     IsAligned          = TensorEvaluator<ArgType, Device>::IsAligned,
     PacketAccess       = int(TensorEvaluator<ArgType, Device>::PacketAccess) &
                          int(internal::functor_traits<UnaryOp>::PacketAccess),
     BlockAccess        = TensorEvaluator<ArgType, Device>::BlockAccess,
     PreferBlockAccess  = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
-    Layout             = TensorEvaluator<ArgType, Device>::Layout,
     CoordAccess        = false,  // to be implemented
     RawAccess          = false
   };
@@ -462,14 +463,14 @@ struct TensorEvaluator<const TensorCwiseUnaryOp<UnaryOp, ArgType>, Device>
 
   typedef typename XprType::Index Index;
   typedef typename XprType::Scalar Scalar;
-  typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
+  typedef std::remove_const_t<Scalar> ScalarNoConst;
   typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
-  static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
+  static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
   typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
   typedef StorageMemory<CoeffReturnType, Device> Storage;
   typedef typename Storage::Type EvaluatorPointerType;
-  static const int NumDims = internal::array_size<Dimensions>::value;
+  static constexpr int NumDims = internal::array_size<Dimensions>::value;
 
   //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
   typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
@@ -555,6 +556,7 @@ struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArg
 {
   typedef TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArgType> XprType;
 
+  static constexpr int Layout = TensorEvaluator<LeftArgType, Device>::Layout;
   enum {
     IsAligned         = int(TensorEvaluator<LeftArgType, Device>::IsAligned) &
                         int(TensorEvaluator<RightArgType, Device>::IsAligned),
@@ -565,7 +567,6 @@ struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArg
                         int(TensorEvaluator<RightArgType, Device>::BlockAccess),
     PreferBlockAccess = int(TensorEvaluator<LeftArgType, Device>::PreferBlockAccess) |
                         int(TensorEvaluator<RightArgType, Device>::PreferBlockAccess),
-    Layout            = TensorEvaluator<LeftArgType, Device>::Layout,
     CoordAccess       = false,  // to be implemented
     RawAccess         = false
   };
@@ -584,12 +585,12 @@ struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArg
   typedef typename XprType::Scalar Scalar;
   typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
-  static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
+  static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
   typedef typename TensorEvaluator<LeftArgType, Device>::Dimensions Dimensions;
   typedef StorageMemory<CoeffReturnType, Device> Storage;
   typedef typename Storage::Type EvaluatorPointerType;
 
-  static const int NumDims = internal::array_size<
+  static constexpr int NumDims = internal::array_size<
       typename TensorEvaluator<LeftArgType, Device>::Dimensions>::value;
 
   //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
@@ -693,6 +694,7 @@ struct TensorEvaluator<const TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type,
 {
   typedef TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type, Arg3Type> XprType;
 
+  static constexpr int Layout = TensorEvaluator<Arg1Type, Device>::Layout;
   enum {
     IsAligned = TensorEvaluator<Arg1Type, Device>::IsAligned & TensorEvaluator<Arg2Type, Device>::IsAligned & TensorEvaluator<Arg3Type, Device>::IsAligned,
     PacketAccess      = TensorEvaluator<Arg1Type, Device>::PacketAccess &&
@@ -703,7 +705,6 @@ struct TensorEvaluator<const TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type,
     PreferBlockAccess = TensorEvaluator<Arg1Type, Device>::PreferBlockAccess ||
                         TensorEvaluator<Arg2Type, Device>::PreferBlockAccess ||
                         TensorEvaluator<Arg3Type, Device>::PreferBlockAccess,
-    Layout            = TensorEvaluator<Arg1Type, Device>::Layout,
     CoordAccess       = false,  // to be implemented
     RawAccess         = false
   };
@@ -736,7 +737,7 @@ struct TensorEvaluator<const TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type,
   typedef typename XprType::Scalar Scalar;
   typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
-  static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
+  static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
   typedef typename TensorEvaluator<Arg1Type, Device>::Dimensions Dimensions;
   typedef StorageMemory<CoeffReturnType, Device> Storage;
   typedef typename Storage::Type EvaluatorPointerType;
@@ -811,6 +812,7 @@ struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>
   typedef TensorSelectOp<IfArgType, ThenArgType, ElseArgType> XprType;
   typedef typename XprType::Scalar Scalar;
 
+    static constexpr int Layout = TensorEvaluator<IfArgType, Device>::Layout;
   enum {
     IsAligned         = TensorEvaluator<ThenArgType, Device>::IsAligned &
                         TensorEvaluator<ElseArgType, Device>::IsAligned,
@@ -823,7 +825,6 @@ struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>
     PreferBlockAccess = TensorEvaluator<IfArgType, Device>::PreferBlockAccess ||
                         TensorEvaluator<ThenArgType, Device>::PreferBlockAccess ||
                         TensorEvaluator<ElseArgType, Device>::PreferBlockAccess,
-    Layout            = TensorEvaluator<IfArgType, Device>::Layout,
     CoordAccess       = false,  // to be implemented
     RawAccess         = false
   };
@@ -842,12 +843,12 @@ struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>
   typedef typename XprType::Index Index;
   typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
-  static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
+  static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
   typedef typename TensorEvaluator<IfArgType, Device>::Dimensions Dimensions;
   typedef StorageMemory<CoeffReturnType, Device> Storage;
   typedef typename Storage::Type EvaluatorPointerType;
 
-  static const int NumDims = internal::array_size<Dimensions>::value;
+  static constexpr int NumDims = internal::array_size<Dimensions>::value;
 
   //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
     typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
index c52fb77..de9bed4 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_CXX11_TENSOR_TENSOR_EXECUTOR_H
 #define EIGEN_CXX11_TENSOR_TENSOR_EXECUTOR_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /**
@@ -165,12 +167,12 @@ class TensorExecutor<Expression, DefaultDevice, Vectorizable,
                      /*Tiling=*/TiledEvaluation::On> {
  public:
   typedef typename traits<Expression>::Scalar Scalar;
-  typedef typename remove_const<Scalar>::type ScalarNoConst;
+  typedef std::remove_const_t<Scalar> ScalarNoConst;
 
   typedef TensorEvaluator<Expression, DefaultDevice> Evaluator;
   typedef typename traits<Expression>::Index StorageIndex;
 
-  static const int NumDims = traits<Expression>::NumDimensions;
+  static constexpr int NumDims = traits<Expression>::NumDimensions;
 
   EIGEN_DEVICE_FUNC
   static EIGEN_STRONG_INLINE void run(const Expression& expr,
@@ -282,7 +284,7 @@ struct EvalRange {
 
 template <typename Evaluator, typename StorageIndex>
 struct EvalRange<Evaluator, StorageIndex, /*Vectorizable*/ true> {
-  static const int PacketSize =
+  static constexpr int PacketSize =
       unpacket_traits<typename Evaluator::PacketReturnType>::size;
 
   static void run(Evaluator* evaluator_in, const StorageIndex firstIdx,
@@ -351,9 +353,9 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable,
  public:
   typedef typename traits<Expression>::Index IndexType;
   typedef typename traits<Expression>::Scalar Scalar;
-  typedef typename remove_const<Scalar>::type ScalarNoConst;
+  typedef std::remove_const_t<Scalar> ScalarNoConst;
 
-  static const int NumDims = traits<Expression>::NumDimensions;
+  static constexpr int NumDims = traits<Expression>::NumDimensions;
 
   typedef TensorEvaluator<Expression, ThreadPoolDevice> Evaluator;
   typedef TensorBlockMapper<NumDims, Evaluator::Layout, IndexType> BlockMapper;
@@ -459,9 +461,9 @@ class TensorAsyncExecutor<Expression, ThreadPoolDevice, DoneCallback,
  public:
   typedef typename traits<Expression>::Index IndexType;
   typedef typename traits<Expression>::Scalar Scalar;
-  typedef typename remove_const<Scalar>::type ScalarNoConst;
+  typedef std::remove_const_t<Scalar> ScalarNoConst;
 
-  static const int NumDims = traits<Expression>::NumDimensions;
+  static constexpr int NumDims = traits<Expression>::NumDimensions;
 
   typedef TensorEvaluator<Expression, ThreadPoolDevice> Evaluator;
   typedef TensorBlockMapper<NumDims, Evaluator::Layout, IndexType> BlockMapper;
@@ -551,11 +553,59 @@ class TensorExecutor<Expression, GpuDevice, Vectorizable, Tiling> {
 };
 
 #if defined(EIGEN_GPUCC)
+// Returns 1 if lhs + rhs would overflow, -1 if it would underflow, otherwise 0.
+template <typename Index>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE int sum_will_overflow(Index lhs,
+                                                            Index rhs) {
+  const Index highest = NumTraits<Index>::highest();
+  const Index lowest = NumTraits<Index>::lowest();
+  if (lhs > 0 && rhs > 0) {
+    return lhs > highest - rhs ? 1 : 0;
+  } else if (lhs < 0 && rhs < 0) {
+    return lhs < lowest - rhs ? -1 : 0;
+  } else {
+    return 0;
+  }
+}
+
+// Returns lhs + rhs, saturating to the highest/lowest representable value on
+// overflow/underflow respectively.
+template <typename Index>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index saturate_add(Index lhs, Index rhs) {
+  const Index highest = NumTraits<Index>::highest();
+  const Index lowest = NumTraits<Index>::lowest();
+  int overflow = sum_will_overflow(lhs, rhs);
+  return overflow == 1 ? highest : overflow == -1 ? lowest : lhs + rhs;
+}
+
+// A functor that adds step_size to a given index, saturating to avoid
+// overflow/underflow. If overflow/underflow is not possible, regular addition
+// is used (for efficiency).
+template <typename Index>
+struct SafeStep {
+  // lastIdx is one past the end of the possible indexes.
+  // step_size is the value that will be added to the given index when the
+  // functor is called.
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE SafeStep(Index lastIdx, Index step_size)
+      : can_overflow_(sum_will_overflow(lastIdx, step_size)),
+        step_size_(step_size) {}
+
+  // Adds step_size to index, saturating on overflow (if overflow is possible).
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index operator()(Index index) const {
+    return can_overflow_ ? saturate_add(index, step_size_) : index + step_size_;
+  }
+
+ private:
+  const bool can_overflow_;
+  const Index step_size_;
+};
+
 template <typename Evaluator, typename StorageIndex, bool Vectorizable>
 struct EigenMetaKernelEval {
   static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
   void run(Evaluator& eval, StorageIndex firstIdx, StorageIndex lastIdx, StorageIndex step_size) {
-    for (StorageIndex i = firstIdx; i < lastIdx; i += step_size) {
+    SafeStep<StorageIndex> safe_step(lastIdx, step_size);
+    for (StorageIndex i = firstIdx; i < lastIdx; i = safe_step(i)) {
       eval.evalScalar(i);
     }
   }
@@ -569,12 +619,16 @@ struct EigenMetaKernelEval<Evaluator, StorageIndex, true> {
     const StorageIndex vectorized_size = (lastIdx / PacketSize) * PacketSize;
     const StorageIndex vectorized_step_size = step_size * PacketSize;
 
+    SafeStep<StorageIndex> safe_vectorized_step(vectorized_size,
+                                                vectorized_step_size);
     // Use the vector path
     for (StorageIndex i = firstIdx * PacketSize; i < vectorized_size;
-         i += vectorized_step_size) {
+         i = safe_vectorized_step(i)) {
       eval.evalPacket(i);
     }
-    for (StorageIndex i = vectorized_size + firstIdx; i < lastIdx; i += step_size) {
+    SafeStep<StorageIndex> safe_step(lastIdx, step_size);
+    for (StorageIndex i = saturate_add(vectorized_size, firstIdx); i < lastIdx;
+         i = safe_step(i)) {
       eval.evalScalar(i);
     }
   }
@@ -601,8 +655,11 @@ EIGEN_STRONG_INLINE void TensorExecutor<Expression, GpuDevice, Vectorizable, Til
   if (needs_assign) {
 
     const int block_size = device.maxGpuThreadsPerBlock();
-    const int max_blocks = device.getNumGpuMultiProcessors() *
-                           device.maxGpuThreadsPerMultiProcessor() / block_size;
+    const int max_blocks =
+        numext::mini<int64_t>(device.getNumGpuMultiProcessors() *
+                              device.maxGpuThreadsPerMultiProcessor(),
+                          NumTraits<StorageIndex>::highest()) /
+        block_size;
     const StorageIndex size = array_prod(evaluator.dimensions());
     // Create a least one block to ensure we won't crash when tensorflow calls with tensors of size 0.
     const int num_blocks = numext::maxi<int>(numext::mini<int>(max_blocks, divup<int>(size, block_size)), 1);
@@ -635,7 +692,7 @@ struct ExecExprFunctorKernel {
     compute(itemID);
   }
   template <bool is_vec = Evaluator::PacketAccess>
-  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE typename std::enable_if<!is_vec>::type
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE std::enable_if_t<!is_vec>
   compute(const cl::sycl::nd_item<1>& itemID) {
     Index gId = static_cast<Index>(itemID.get_global_linear_id());
     Index total_threads = itemID.get_global_range(0);
@@ -645,7 +702,7 @@ struct ExecExprFunctorKernel {
     }
   }
   template <bool is_vec = Evaluator::PacketAccess>
-  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE typename std::enable_if<is_vec>::type
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE std::enable_if_t<is_vec>
   compute(const cl::sycl::nd_item<1>& itemID) {
     const Index vectorizedRange =
         (range / Evaluator::PacketSize) * Evaluator::PacketSize;
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h
index c9bccfc..9bed2a1 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_CXX11_TENSOR_TENSOR_EXPR_H
 #define EIGEN_CXX11_TENSOR_TENSOR_EXPR_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /** \class TensorExpr
@@ -35,9 +37,9 @@ struct traits<TensorCwiseNullaryOp<NullaryOp, XprType> >
   typedef traits<XprType> XprTraits;
   typedef typename XprType::Scalar Scalar;
   typedef typename XprType::Nested XprTypeNested;
-  typedef typename remove_reference<XprTypeNested>::type _XprTypeNested;
-  static const int NumDimensions = XprTraits::NumDimensions;
-  static const int Layout = XprTraits::Layout;
+  typedef std::remove_reference_t<XprTypeNested> XprTypeNested_;
+  static constexpr int NumDimensions = XprTraits::NumDimensions;
+  static constexpr int Layout = XprTraits::Layout;
   typedef typename XprTraits::PointerType PointerType;
   enum {
     Flags = 0
@@ -63,7 +65,7 @@ class TensorCwiseNullaryOp : public TensorBase<TensorCwiseNullaryOp<NullaryOp, X
         : m_xpr(xpr), m_functor(func) {}
 
     EIGEN_DEVICE_FUNC
-    const typename internal::remove_all<typename XprType::Nested>::type&
+    const internal::remove_all_t<typename XprType::Nested>&
     nestedExpression() const { return m_xpr; }
 
     EIGEN_DEVICE_FUNC
@@ -86,9 +88,9 @@ struct traits<TensorCwiseUnaryOp<UnaryOp, XprType> >
   typedef typename result_of<UnaryOp(typename XprType::Scalar)>::type Scalar;
   typedef traits<XprType> XprTraits;
   typedef typename XprType::Nested XprTypeNested;
-  typedef typename remove_reference<XprTypeNested>::type _XprTypeNested;
-  static const int NumDimensions = XprTraits::NumDimensions;
-  static const int Layout = XprTraits::Layout;
+  typedef std::remove_reference_t<XprTypeNested> XprTypeNested_;
+  static constexpr int NumDimensions = XprTraits::NumDimensions;
+  static constexpr int Layout = XprTraits::Layout;
   typedef typename TypeConversion<Scalar, 
                                   typename XprTraits::PointerType
                                   >::type 
@@ -132,7 +134,7 @@ class TensorCwiseUnaryOp : public TensorBase<TensorCwiseUnaryOp<UnaryOp, XprType
 
     /** \returns the nested expression */
     EIGEN_DEVICE_FUNC
-    const typename internal::remove_all<typename XprType::Nested>::type&
+    const internal::remove_all_t<typename XprType::Nested>&
     nestedExpression() const { return m_xpr; }
 
   protected:
@@ -161,14 +163,14 @@ struct traits<TensorCwiseBinaryOp<BinaryOp, LhsXprType, RhsXprType> >
       typename traits<RhsXprType>::Index>::type Index;
   typedef typename LhsXprType::Nested LhsNested;
   typedef typename RhsXprType::Nested RhsNested;
-  typedef typename remove_reference<LhsNested>::type _LhsNested;
-  typedef typename remove_reference<RhsNested>::type _RhsNested;
-  static const int NumDimensions = XprTraits::NumDimensions;
-  static const int Layout = XprTraits::Layout;
+  typedef std::remove_reference_t<LhsNested> LhsNested_;
+  typedef std::remove_reference_t<RhsNested> RhsNested_;
+  static constexpr int NumDimensions = XprTraits::NumDimensions;
+  static constexpr int Layout = XprTraits::Layout;
   typedef typename TypeConversion<Scalar,
-                                  typename conditional<Pointer_type_promotion<typename LhsXprType::Scalar, Scalar>::val,
+                                  std::conditional_t<Pointer_type_promotion<typename LhsXprType::Scalar, Scalar>::val,
                                                       typename traits<LhsXprType>::PointerType,
-                                                      typename traits<RhsXprType>::PointerType>::type
+                                                      typename traits<RhsXprType>::PointerType>
                                   >::type 
                                   PointerType;
   enum {
@@ -213,11 +215,11 @@ class TensorCwiseBinaryOp : public TensorBase<TensorCwiseBinaryOp<BinaryOp, LhsX
 
     /** \returns the nested expressions */
     EIGEN_DEVICE_FUNC
-    const typename internal::remove_all<typename LhsXprType::Nested>::type&
+    const internal::remove_all_t<typename LhsXprType::Nested>&
     lhsExpression() const { return m_lhs_xpr; }
 
     EIGEN_DEVICE_FUNC
-    const typename internal::remove_all<typename RhsXprType::Nested>::type&
+    const internal::remove_all_t<typename RhsXprType::Nested>&
     rhsExpression() const { return m_rhs_xpr; }
 
   protected:
@@ -242,15 +244,15 @@ struct traits<TensorCwiseTernaryOp<TernaryOp, Arg1XprType, Arg2XprType, Arg3XprT
   typedef typename Arg1XprType::Nested Arg1Nested;
   typedef typename Arg2XprType::Nested Arg2Nested;
   typedef typename Arg3XprType::Nested Arg3Nested;
-  typedef typename remove_reference<Arg1Nested>::type _Arg1Nested;
-  typedef typename remove_reference<Arg2Nested>::type _Arg2Nested;
-  typedef typename remove_reference<Arg3Nested>::type _Arg3Nested;
-  static const int NumDimensions = XprTraits::NumDimensions;
-  static const int Layout = XprTraits::Layout;
+  typedef std::remove_reference_t<Arg1Nested> Arg1Nested_;
+  typedef std::remove_reference_t<Arg2Nested> Arg2Nested_;
+  typedef std::remove_reference_t<Arg3Nested> Arg3Nested_;
+  static constexpr int NumDimensions = XprTraits::NumDimensions;
+  static constexpr int Layout = XprTraits::Layout;
   typedef typename TypeConversion<Scalar,
-                                  typename conditional<Pointer_type_promotion<typename Arg2XprType::Scalar, Scalar>::val,
+                                  std::conditional_t<Pointer_type_promotion<typename Arg2XprType::Scalar, Scalar>::val,
                                                       typename traits<Arg2XprType>::PointerType,
-                                                      typename traits<Arg3XprType>::PointerType>::type
+                                                      typename traits<Arg3XprType>::PointerType>
                                   >::type 
                                   PointerType;
   enum {
@@ -293,15 +295,15 @@ class TensorCwiseTernaryOp : public TensorBase<TensorCwiseTernaryOp<TernaryOp, A
 
     /** \returns the nested expressions */
     EIGEN_DEVICE_FUNC
-    const typename internal::remove_all<typename Arg1XprType::Nested>::type&
+    const internal::remove_all_t<typename Arg1XprType::Nested>&
     arg1Expression() const { return m_arg1_xpr; }
 
     EIGEN_DEVICE_FUNC
-    const typename internal::remove_all<typename Arg2XprType::Nested>::type&
+    const internal::remove_all_t<typename Arg2XprType::Nested>&
     arg2Expression() const { return m_arg2_xpr; }
 
     EIGEN_DEVICE_FUNC
-    const typename internal::remove_all<typename Arg3XprType::Nested>::type&
+    const internal::remove_all_t<typename Arg3XprType::Nested>&
     arg3Expression() const { return m_arg3_xpr; }
 
   protected:
@@ -326,11 +328,11 @@ struct traits<TensorSelectOp<IfXprType, ThenXprType, ElseXprType> >
   typedef typename IfXprType::Nested IfNested;
   typedef typename ThenXprType::Nested ThenNested;
   typedef typename ElseXprType::Nested ElseNested;
-  static const int NumDimensions = XprTraits::NumDimensions;
-  static const int Layout = XprTraits::Layout;
-  typedef typename conditional<Pointer_type_promotion<typename ThenXprType::Scalar, Scalar>::val,
+  static constexpr int NumDimensions = XprTraits::NumDimensions;
+  static constexpr int Layout = XprTraits::Layout;
+  typedef std::conditional_t<Pointer_type_promotion<typename ThenXprType::Scalar, Scalar>::val,
                                typename traits<ThenXprType>::PointerType,
-                               typename traits<ElseXprType>::PointerType>::type PointerType;
+                               typename traits<ElseXprType>::PointerType> PointerType;
 };
 
 template<typename IfXprType, typename ThenXprType, typename ElseXprType>
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h
index 4a1a068..c744d79 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_CXX11_TENSOR_TENSOR_FFT_H
 #define EIGEN_CXX11_TENSOR_TENSOR_FFT_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /** \class TensorFFT
@@ -60,13 +62,13 @@ struct traits<TensorFFTOp<FFT, XprType, FFTResultType, FFTDir> > : public traits
   typedef typename NumTraits<typename XprTraits::Scalar>::Real RealScalar;
   typedef typename std::complex<RealScalar> ComplexScalar;
   typedef typename XprTraits::Scalar InputScalar;
-  typedef typename conditional<FFTResultType == RealPart || FFTResultType == ImagPart, RealScalar, ComplexScalar>::type OutputScalar;
+  typedef std::conditional_t<FFTResultType == RealPart || FFTResultType == ImagPart, RealScalar, ComplexScalar> OutputScalar;
   typedef typename XprTraits::StorageKind StorageKind;
   typedef typename XprTraits::Index Index;
   typedef typename XprType::Nested Nested;
-  typedef typename remove_reference<Nested>::type _Nested;
-  static const int NumDimensions = XprTraits::NumDimensions;
-  static const int Layout = XprTraits::Layout;
+  typedef std::remove_reference_t<Nested> Nested_;
+  static constexpr int NumDimensions = XprTraits::NumDimensions;
+  static constexpr int Layout = XprTraits::Layout;
   typedef typename traits<XprType>::PointerType PointerType;
 };
 
@@ -88,7 +90,7 @@ class TensorFFTOp : public TensorBase<TensorFFTOp<FFT, XprType, FFTResultType, F
   typedef typename Eigen::internal::traits<TensorFFTOp>::Scalar Scalar;
   typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
   typedef typename std::complex<RealScalar> ComplexScalar;
-  typedef typename internal::conditional<FFTResultType == RealPart || FFTResultType == ImagPart, RealScalar, ComplexScalar>::type OutputScalar;
+  typedef std::conditional_t<FFTResultType == RealPart || FFTResultType == ImagPart, RealScalar, ComplexScalar> OutputScalar;
   typedef OutputScalar CoeffReturnType;
   typedef typename Eigen::internal::nested<TensorFFTOp>::type Nested;
   typedef typename Eigen::internal::traits<TensorFFTOp>::StorageKind StorageKind;
@@ -101,7 +103,7 @@ class TensorFFTOp : public TensorBase<TensorFFTOp<FFT, XprType, FFTResultType, F
   const FFT& fft() const { return m_fft; }
 
   EIGEN_DEVICE_FUNC
-  const typename internal::remove_all<typename XprType::Nested>::type& expression() const {
+  const internal::remove_all_t<typename XprType::Nested>& expression() const {
     return m_xpr;
   }
 
@@ -115,7 +117,7 @@ template <typename FFT, typename ArgType, typename Device, int FFTResultType, in
 struct TensorEvaluator<const TensorFFTOp<FFT, ArgType, FFTResultType, FFTDir>, Device> {
   typedef TensorFFTOp<FFT, ArgType, FFTResultType, FFTDir> XprType;
   typedef typename XprType::Index Index;
-  static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
+  static constexpr int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
   typedef DSizes<Index, NumDims> Dimensions;
   typedef typename XprType::Scalar Scalar;
   typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
@@ -123,19 +125,19 @@ struct TensorEvaluator<const TensorFFTOp<FFT, ArgType, FFTResultType, FFTDir>, D
   typedef typename TensorEvaluator<ArgType, Device>::Dimensions InputDimensions;
   typedef internal::traits<XprType> XprTraits;
   typedef typename XprTraits::Scalar InputScalar;
-  typedef typename internal::conditional<FFTResultType == RealPart || FFTResultType == ImagPart, RealScalar, ComplexScalar>::type OutputScalar;
+  typedef std::conditional_t<FFTResultType == RealPart || FFTResultType == ImagPart, RealScalar, ComplexScalar> OutputScalar;
   typedef OutputScalar CoeffReturnType;
   typedef typename PacketType<OutputScalar, Device>::type PacketReturnType;
-  static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
-    typedef StorageMemory<CoeffReturnType, Device> Storage;
+  static constexpr int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
+  typedef StorageMemory<CoeffReturnType, Device> Storage;
   typedef typename Storage::Type EvaluatorPointerType;
 
+  static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
   enum {
     IsAligned = false,
     PacketAccess = true,
     BlockAccess = false,
     PreferBlockAccess = false,
-    Layout = TensorEvaluator<ArgType, Device>::Layout,
     CoordAccess = false,
     RawAccess = false
   };
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h
index ca39bb8..08b182a 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_CXX11_TENSOR_TENSOR_FIXED_SIZE_H
 #define EIGEN_CXX11_TENSOR_TENSOR_FIXED_SIZE_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /** \class TensorFixedSize
@@ -36,14 +38,14 @@ class TensorFixedSize : public TensorBase<TensorFixedSize<Scalar_, Dimensions_,
     typedef typename NumTraits<Scalar>::Real RealScalar;
     typedef typename Base::CoeffReturnType CoeffReturnType;
 
-    static const int Options = Options_;
+    static constexpr int Options = Options_;
+    static constexpr int Layout = Options_ & RowMajor ? RowMajor : ColMajor;
 
     enum {
       IsAligned = bool(EIGEN_MAX_ALIGN_BYTES>0),
       PacketAccess = (internal::packet_traits<Scalar>::size > 1),
       BlockAccess = false,
       PreferBlockAccess = false,
-      Layout = Options_ & RowMajor ? RowMajor : ColMajor,
       CoordAccess = true,
       RawAccess = true
     };
@@ -53,7 +55,7 @@ class TensorFixedSize : public TensorBase<TensorFixedSize<Scalar_, Dimensions_,
   //===--------------------------------------------------------------------===//
 
   typedef Dimensions_ Dimensions;
-  static const std::size_t NumIndices = Dimensions::count;
+  static constexpr std::size_t NumIndices = Dimensions::count;
 
   protected:
   TensorStorage<Scalar, Dimensions, Options> m_storage;
@@ -61,7 +63,7 @@ class TensorFixedSize : public TensorBase<TensorFixedSize<Scalar_, Dimensions_,
   public:
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index                    rank()                   const { return NumIndices; }
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index                    dimension(std::size_t n) const { return m_storage.dimensions()[n]; }
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions&        dimensions()             const { return m_storage.dimensions(); }
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions         dimensions()             const { return m_storage.dimensions(); }
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index                    size()                   const { return m_storage.size(); }
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar                   *data()                        { return m_storage.data(); }
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar             *data()                  const { return m_storage.data(); }
@@ -72,7 +74,6 @@ class TensorFixedSize : public TensorBase<TensorFixedSize<Scalar_, Dimensions_,
     inline Self& base()             { return *this; }
     inline const Self& base() const { return *this; }
 
-#if EIGEN_HAS_VARIADIC_TEMPLATES
     template<typename... IndexTypes>
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(Index firstIndex, IndexTypes... otherIndices) const
     {
@@ -80,7 +81,6 @@ class TensorFixedSize : public TensorBase<TensorFixedSize<Scalar_, Dimensions_,
       EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
       return coeff(array<Index, NumIndices>{{firstIndex, otherIndices...}});
     }
-#endif
 
     EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE const Scalar& coeff(const array<Index, NumIndices>& indices) const
@@ -104,7 +104,6 @@ class TensorFixedSize : public TensorBase<TensorFixedSize<Scalar_, Dimensions_,
     }
 
 
-#if EIGEN_HAS_VARIADIC_TEMPLATES
     template<typename... IndexTypes>
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index firstIndex, IndexTypes... otherIndices)
     {
@@ -112,7 +111,6 @@ class TensorFixedSize : public TensorBase<TensorFixedSize<Scalar_, Dimensions_,
       EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
       return coeffRef(array<Index, NumIndices>{{firstIndex, otherIndices...}});
     }
-#endif
 
     EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Scalar& coeffRef(const array<Index, NumIndices>& indices)
@@ -135,7 +133,6 @@ class TensorFixedSize : public TensorBase<TensorFixedSize<Scalar_, Dimensions_,
       return m_storage.data()[0];
     }
 
-#if EIGEN_HAS_VARIADIC_TEMPLATES
     template<typename... IndexTypes>
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(Index firstIndex, IndexTypes... otherIndices) const
     {
@@ -143,53 +140,6 @@ class TensorFixedSize : public TensorBase<TensorFixedSize<Scalar_, Dimensions_,
       EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
       return this->operator()(array<Index, NumIndices>{{firstIndex, otherIndices...}});
     }
-#else
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1) const
-    {
-      if (Options&RowMajor) {
-        const Index index = i1 + i0 * m_storage.dimensions()[1];
-        return m_storage.data()[index];
-      } else {
-        const Index index = i0 + i1 * m_storage.dimensions()[0];
-        return m_storage.data()[index];
-      }
-    }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2) const
-    {
-      if (Options&RowMajor) {
-         const Index index = i2 + m_storage.dimensions()[2] * (i1 + m_storage.dimensions()[1] * i0);
-         return m_storage.data()[index];
-      } else {
-         const Index index = i0 + m_storage.dimensions()[0] * (i1 + m_storage.dimensions()[1] * i2);
-        return m_storage.data()[index];
-      }
-    }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2, Index i3) const
-    {
-      if (Options&RowMajor) {
-        const Index index = i3 + m_storage.dimensions()[3] * (i2 + m_storage.dimensions()[2] * (i1 + m_storage.dimensions()[1] * i0));
-        return m_storage.data()[index];
-      } else {
-        const Index index = i0 + m_storage.dimensions()[0] * (i1 + m_storage.dimensions()[1] * (i2 + m_storage.dimensions()[2] * i3));
-        return m_storage.data()[index];
-      }
-    }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2, Index i3, Index i4) const
-    {
-      if (Options&RowMajor) {
-        const Index index = i4 + m_storage.dimensions()[4] * (i3 + m_storage.dimensions()[3] * (i2 + m_storage.dimensions()[2] * (i1 + m_storage.dimensions()[1] * i0)));
-        return m_storage.data()[index];
-      } else {
-        const Index index = i0 + m_storage.dimensions()[0] * (i1 + m_storage.dimensions()[1] * (i2 + m_storage.dimensions()[2] * (i3 + m_storage.dimensions()[3] * i4)));
-        return m_storage.data()[index];
-      }
-    }
-#endif
-
 
     EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE const Scalar& operator()(const array<Index, NumIndices>& indices) const
@@ -220,7 +170,6 @@ class TensorFixedSize : public TensorBase<TensorFixedSize<Scalar_, Dimensions_,
       return coeff(index);
     }
 
-#if EIGEN_HAS_VARIADIC_TEMPLATES
     template<typename... IndexTypes>
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()(Index firstIndex, IndexTypes... otherIndices)
     {
@@ -228,52 +177,6 @@ class TensorFixedSize : public TensorBase<TensorFixedSize<Scalar_, Dimensions_,
       EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
       return operator()(array<Index, NumIndices>{{firstIndex, otherIndices...}});
     }
-#else
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1)
-    {
-       if (Options&RowMajor) {
-         const Index index = i1 + i0 * m_storage.dimensions()[1];
-        return m_storage.data()[index];
-      } else {
-        const Index index = i0 + i1 * m_storage.dimensions()[0];
-        return m_storage.data()[index];
-      }
-    }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2)
-    {
-       if (Options&RowMajor) {
-         const Index index = i2 + m_storage.dimensions()[2] * (i1 + m_storage.dimensions()[1] * i0);
-        return m_storage.data()[index];
-      } else {
-         const Index index = i0 + m_storage.dimensions()[0] * (i1 + m_storage.dimensions()[1] * i2);
-        return m_storage.data()[index];
-      }
-    }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3)
-    {
-      if (Options&RowMajor) {
-        const Index index = i3 + m_storage.dimensions()[3] * (i2 + m_storage.dimensions()[2] * (i1 + m_storage.dimensions()[1] * i0));
-        return m_storage.data()[index];
-      } else {
-        const Index index = i0 + m_storage.dimensions()[0] * (i1 + m_storage.dimensions()[1] * (i2 + m_storage.dimensions()[2] * i3));
-        return m_storage.data()[index];
-      }
-    }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3, Index i4)
-    {
-      if (Options&RowMajor) {
-        const Index index = i4 + m_storage.dimensions()[4] * (i3 + m_storage.dimensions()[3] * (i2 + m_storage.dimensions()[2] * (i1 + m_storage.dimensions()[1] * i0)));
-        return m_storage.data()[index];
-      } else {
-        const Index index = i0 + m_storage.dimensions()[0] * (i1 + m_storage.dimensions()[1] * (i2 + m_storage.dimensions()[2] * (i3 + m_storage.dimensions()[3] * i4)));
-        return m_storage.data()[index];
-      }
-    }
-#endif
 
     EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Scalar& operator()(const array<Index, NumIndices>& indices)
@@ -312,16 +215,14 @@ class TensorFixedSize : public TensorBase<TensorFixedSize<Scalar_, Dimensions_,
 
     EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE TensorFixedSize(const Self& other)
-      : m_storage(other.m_storage)
+      : Base(other), m_storage(other.m_storage)
     {
     }
 
-#if EIGEN_HAS_RVALUE_REFERENCES
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorFixedSize(Self&& other)
       : m_storage(other.m_storage)
     {
     }
-#endif
 
     template<typename OtherDerived>
     EIGEN_DEVICE_FUNC
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h
index e800ded..4550fca 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_CXX11_TENSOR_TENSOR_FORCED_EVAL_H
 #define EIGEN_CXX11_TENSOR_TENSOR_FORCED_EVAL_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /** \class TensorForcedEval
@@ -29,9 +31,9 @@ struct traits<TensorForcedEvalOp<XprType> >
   typedef typename traits<XprType>::StorageKind StorageKind;
   typedef typename traits<XprType>::Index Index;
   typedef typename XprType::Nested Nested;
-  typedef typename remove_reference<Nested>::type _Nested;
-  static const int NumDimensions = XprTraits::NumDimensions;
-  static const int Layout = XprTraits::Layout;
+  typedef std::remove_reference_t<Nested> Nested_;
+  static constexpr int NumDimensions = XprTraits::NumDimensions;
+  static constexpr int Layout = XprTraits::Layout;
   typedef typename XprTraits::PointerType PointerType;
 
   enum {
@@ -61,7 +63,7 @@ class TensorForcedEvalOp : public TensorBase<TensorForcedEvalOp<XprType>, ReadOn
   public:
   typedef typename Eigen::internal::traits<TensorForcedEvalOp>::Scalar Scalar;
   typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
-  typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
+  typedef std::remove_const_t<typename XprType::CoeffReturnType> CoeffReturnType;
   typedef typename Eigen::internal::nested<TensorForcedEvalOp>::type Nested;
   typedef typename Eigen::internal::traits<TensorForcedEvalOp>::StorageKind StorageKind;
   typedef typename Eigen::internal::traits<TensorForcedEvalOp>::Index Index;
@@ -70,7 +72,7 @@ class TensorForcedEvalOp : public TensorBase<TensorForcedEvalOp<XprType>, ReadOn
       : m_xpr(expr) {}
 
     EIGEN_DEVICE_FUNC
-    const typename internal::remove_all<typename XprType::Nested>::type&
+    const internal::remove_all_t<typename XprType::Nested>&
     expression() const { return m_xpr; }
 
   protected:
@@ -103,14 +105,14 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(Index, StorageType) {
 template<typename ArgType_, typename Device>
 struct TensorEvaluator<const TensorForcedEvalOp<ArgType_>, Device>
 {
-  typedef const typename internal::remove_all<ArgType_>::type ArgType;
+  typedef const internal::remove_all_t<ArgType_> ArgType;
   typedef TensorForcedEvalOp<ArgType> XprType;
   typedef typename ArgType::Scalar Scalar;
   typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
   typedef typename XprType::Index Index;
   typedef typename XprType::CoeffReturnType CoeffReturnType;
   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
-  static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
+  static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
   typedef typename Eigen::internal::traits<XprType>::PointerType TensorPointerType;
   typedef StorageMemory<CoeffReturnType, Device> Storage;
   typedef typename Storage::Type EvaluatorPointerType;
@@ -120,11 +122,11 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType_>, Device>
     PacketAccess      = (PacketType<CoeffReturnType, Device>::size > 1),
     BlockAccess       = internal::is_arithmetic<CoeffReturnType>::value,
     PreferBlockAccess = false,
-    Layout            = TensorEvaluator<ArgType, Device>::Layout,
     RawAccess         = true
   };
 
-  static const int NumDims = internal::traits<ArgType>::NumDimensions;
+  static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
+  static constexpr int NumDims = internal::traits<ArgType>::NumDimensions;
 
   //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
   typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
@@ -148,11 +150,11 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType_>, Device>
 
    internal::non_integral_type_placement_new<Device, CoeffReturnType>()(numValues, m_buffer);
 
-    typedef TensorEvalToOp< const typename internal::remove_const<ArgType>::type > EvalTo;
+    typedef TensorEvalToOp< const std::remove_const_t<ArgType> > EvalTo;
     EvalTo evalToTmp(m_device.get(m_buffer), m_op);
 
     internal::TensorExecutor<
-        const EvalTo, typename internal::remove_const<Device>::type,
+        const EvalTo, std::remove_const_t<Device>,
         /*Vectorizable=*/internal::IsVectorizable<Device, const ArgType>::value,
         /*Tiling=*/internal::IsTileable<Device, const ArgType>::value>::
         run(evalToTmp, m_device);
@@ -167,14 +169,14 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType_>, Device>
     const Index numValues = internal::array_prod(m_impl.dimensions());
     m_buffer = m_device.get((CoeffReturnType*)m_device.allocate_temp(
         numValues * sizeof(CoeffReturnType)));
-    typedef TensorEvalToOp<const typename internal::remove_const<ArgType>::type>
+    typedef TensorEvalToOp<const std::remove_const_t<ArgType>>
         EvalTo;
     EvalTo evalToTmp(m_device.get(m_buffer), m_op);
 
     auto on_done = std::bind([](EvalSubExprsCallback done_) { done_(true); },
                              std::move(done));
     internal::TensorAsyncExecutor<
-        const EvalTo, typename internal::remove_const<Device>::type,
+        const EvalTo, std::remove_const_t<Device>,
         decltype(on_done),
         /*Vectorizable=*/internal::IsVectorizable<Device, const ArgType>::value,
         /*Tiling=*/internal::IsTileable<Device, const ArgType>::value>::
@@ -206,7 +208,7 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType_>, Device>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
   block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
           bool /*root_of_expr_ast*/ = false) const {
-    assert(m_buffer != NULL);
+    eigen_assert(m_buffer != NULL);
     return TensorBlock::materialize(m_buffer, m_impl.dimensions(), desc, scratch);
   }
 
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h
index 246ebe4..b52833c 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_CXX11_TENSOR_TENSOR_FORWARD_DECLARATIONS_H
 #define EIGEN_CXX11_TENSOR_TENSOR_FORWARD_DECLARATIONS_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 // MakePointer class is used as a container of the address space of the pointer
@@ -29,7 +31,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T* constCast(const T* data) {
 }
 
 // The StorageMemory class is a container of the device specific pointer
-// used for refering to a Pointer on TensorEvaluator class. While the TensorExpression
+// used for referring to a Pointer on TensorEvaluator class. While the TensorExpression
 // is a device-agnostic type and need MakePointer class for type conversion,
 // the TensorEvaluator class can be specialized for a device, hence it is possible
 // to construct different types of temproray storage memory in TensorEvaluator
@@ -61,8 +63,8 @@ template<typename BinaryOp, typename LeftXprType, typename RightXprType> class T
 template<typename TernaryOp, typename Arg1XprType, typename Arg2XprType, typename Arg3XprType> class TensorCwiseTernaryOp;
 template<typename IfXprType, typename ThenXprType, typename ElseXprType> class TensorSelectOp;
 template<typename Op, typename Dims, typename XprType, template <class> class MakePointer_ = MakePointer > class TensorReductionOp;
-template<typename XprType> class TensorIndexTupleOp;
-template<typename ReduceOp, typename Dims, typename XprType> class TensorTupleReducerOp;
+template<typename XprType> class TensorIndexPairOp;
+template<typename ReduceOp, typename Dims, typename XprType> class TensorPairReducerOp;
 template<typename Axis, typename LeftXprType, typename RightXprType> class TensorConcatenationOp;
 template<typename Dimensions, typename LeftXprType, typename RightXprType, typename OutputKernelType> class TensorContractionOp;
 template<typename TargetType, typename XprType> class TensorConversionOp;
@@ -165,7 +167,7 @@ struct IsTileable {
   // Check that block evaluation is supported and it's a preferred option (at
   // least one sub-expression has much faster block evaluation, e.g.
   // broadcasting).
-  static const bool BlockAccess =
+  static constexpr bool BlockAccess =
       TensorEvaluator<Expression, Device>::BlockAccess &&
       TensorEvaluator<Expression, Device>::PreferBlockAccess;
 
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h
index d963032..137fe58 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_CXX11_TENSOR_TENSOR_FUNCTORS_H
 #define EIGEN_CXX11_TENSOR_TENSOR_FUNCTORS_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 namespace internal {
 
@@ -33,7 +35,6 @@ struct functor_traits<scalar_mod_op<Scalar> >
  */
 template <typename Scalar>
 struct scalar_mod2_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_mod2_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a, const Scalar& b) const { return a % b; }
 };
 template <typename Scalar>
@@ -42,7 +43,6 @@ struct functor_traits<scalar_mod2_op<Scalar> >
 
 template <typename Scalar>
 struct scalar_fmod_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_fmod_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar
   operator()(const Scalar& a, const Scalar& b) const {
     return numext::fmod(a, b);
@@ -367,7 +367,7 @@ struct reducer_traits<OrReducer, Device> {
 
 // Argmin/Argmax reducers.  Returns the first occurrence if multiple locations
 // contain the same min/max value.
-template <typename T> struct ArgMaxTupleReducer
+template <typename T> struct ArgMaxPairReducer
 {
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const {
     if (t.second < accum->second) {
@@ -385,7 +385,7 @@ template <typename T> struct ArgMaxTupleReducer
 };
 
 template <typename T, typename Device>
-struct reducer_traits<ArgMaxTupleReducer<T>, Device> {
+struct reducer_traits<ArgMaxPairReducer<T>, Device> {
   enum {
     Cost = NumTraits<T>::AddCost,
     PacketAccess = false,
@@ -395,7 +395,7 @@ struct reducer_traits<ArgMaxTupleReducer<T>, Device> {
 };
 
 
-template <typename T> struct ArgMinTupleReducer
+template <typename T> struct ArgMinPairReducer
 {
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T& t, T* accum) const {
     if (t.second > accum->second) {
@@ -413,7 +413,7 @@ template <typename T> struct ArgMinTupleReducer
 };
 
 template <typename T, typename Device>
-struct reducer_traits<ArgMinTupleReducer<T>, Device> {
+struct reducer_traits<ArgMinPairReducer<T>, Device> {
   enum {
     Cost = NumTraits<T>::AddCost,
     PacketAccess = false,
@@ -426,7 +426,7 @@ struct reducer_traits<ArgMinTupleReducer<T>, Device> {
 template <typename T, typename Index, size_t NumDims>
 class GaussianGenerator {
  public:
-  static const bool PacketAccess = false;
+  static constexpr bool PacketAccess = false;
 
   EIGEN_DEVICE_FUNC GaussianGenerator(const array<T, NumDims>& means,
                                       const array<T, NumDims>& std_devs)
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h
index 174bf06..9b8469b 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_CXX11_TENSOR_TENSOR_GENERATOR_H
 #define EIGEN_CXX11_TENSOR_TENSOR_GENERATOR_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /** \class TensorGeneratorOp
@@ -28,9 +30,9 @@ struct traits<TensorGeneratorOp<Generator, XprType> > : public traits<XprType>
   typedef typename XprTraits::StorageKind StorageKind;
   typedef typename XprTraits::Index Index;
   typedef typename XprType::Nested Nested;
-  typedef typename remove_reference<Nested>::type _Nested;
-  static const int NumDimensions = XprTraits::NumDimensions;
-  static const int Layout = XprTraits::Layout;
+  typedef std::remove_reference_t<Nested> Nested_;
+  static constexpr int NumDimensions = XprTraits::NumDimensions;
+  static constexpr int Layout = XprTraits::Layout;
   typedef typename XprTraits::PointerType PointerType;
 };
 
@@ -68,7 +70,7 @@ class TensorGeneratorOp : public TensorBase<TensorGeneratorOp<Generator, XprType
     const Generator& generator() const { return m_generator; }
 
     EIGEN_DEVICE_FUNC
-    const typename internal::remove_all<typename XprType::Nested>::type&
+    const internal::remove_all_t<typename XprType::Nested>&
     expression() const { return m_xpr; }
 
   protected:
@@ -84,18 +86,18 @@ struct TensorEvaluator<const TensorGeneratorOp<Generator, ArgType>, Device>
   typedef TensorGeneratorOp<Generator, ArgType> XprType;
   typedef typename XprType::Index Index;
   typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
-  static const int NumDims = internal::array_size<Dimensions>::value;
+  static constexpr int NumDims = internal::array_size<Dimensions>::value;
   typedef typename XprType::Scalar Scalar;
   typedef typename XprType::CoeffReturnType CoeffReturnType;
   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
   typedef StorageMemory<CoeffReturnType, Device> Storage;
   typedef typename Storage::Type EvaluatorPointerType;
+  static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
   enum {
     IsAligned         = false,
     PacketAccess      = (PacketType<CoeffReturnType, Device>::size > 1),
     BlockAccess       = true,
     PreferBlockAccess = true,
-    Layout            = TensorEvaluator<ArgType, Device>::Layout,
     CoordAccess       = false,  // to be implemented
     RawAccess         = false
   };
@@ -153,10 +155,9 @@ struct TensorEvaluator<const TensorGeneratorOp<Generator, ArgType>, Device>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
   {
     const int packetSize = PacketType<CoeffReturnType, Device>::size;
-    EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
     eigen_assert(index+packetSize-1 < dimensions().TotalSize());
 
-    EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[packetSize];
+    EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[packetSize];
     for (int i = 0; i < packetSize; ++i) {
       values[i] = coeff(index+i);
     }
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h
index 665b861..65052a2 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_CXX11_TENSOR_TENSOR_GLOBAL_FUNCTIONS_H
 #define EIGEN_CXX11_TENSOR_TENSOR_GLOBAL_FUNCTIONS_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /** \cpp11 \returns an expression of the coefficient-wise betainc(\a x, \a a, \a b) to the given tensors.
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h
index cb53ce2..ae79a1d 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h
@@ -41,6 +41,7 @@
 #define gpuMalloc hipMalloc
 #define gpuFree hipFree
 #define gpuMemsetAsync hipMemsetAsync
+#define gpuMemset2DAsync hipMemset2DAsync
 #define gpuMemcpyAsync hipMemcpyAsync
 #define gpuMemcpyDeviceToDevice hipMemcpyDeviceToDevice
 #define gpuMemcpyDeviceToHost hipMemcpyDeviceToHost
@@ -71,6 +72,7 @@
 #define gpuMalloc cudaMalloc
 #define gpuFree cudaFree
 #define gpuMemsetAsync cudaMemsetAsync
+#define gpuMemset2DAsync cudaMemset2DAsync
 #define gpuMemcpyAsync cudaMemcpyAsync
 #define gpuMemcpyDeviceToDevice cudaMemcpyDeviceToDevice
 #define gpuMemcpyDeviceToHost cudaMemcpyDeviceToHost
@@ -91,7 +93,7 @@
 // HIPCC do not support the use of assert on the GPU side.
 #define gpu_assert(COND)
 #else
-#define gpu_assert(COND) assert(COND)
+#define gpu_assert(COND) eigen_assert(COND)
 #endif
 
 #endif // gpu_assert
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaUndefines.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaUndefines.h
index 1d142f2..e4d4bd5 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaUndefines.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaUndefines.h
@@ -26,6 +26,7 @@
 #undef gpuMalloc
 #undef gpuFree
 #undef gpuMemsetAsync
+#undef gpuMemset2DAsync
 #undef gpuMemcpyAsync
 #undef gpuMemcpyDeviceToDevice
 #undef gpuMemcpyDeviceToHost
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h
index a901c5d..efda741 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h
@@ -10,70 +10,365 @@
 #ifndef EIGEN_CXX11_TENSOR_TENSOR_IO_H
 #define EIGEN_CXX11_TENSOR_TENSOR_IO_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
+struct TensorIOFormat;
+
 namespace internal {
-
-// Print the tensor as a 2d matrix
-template <typename Tensor, int Rank>
-struct TensorPrinter {
-  static void run (std::ostream& os, const Tensor& tensor) {
-    typedef typename internal::remove_const<typename Tensor::Scalar>::type Scalar;
-    typedef typename Tensor::Index Index;
-    const Index total_size = internal::array_prod(tensor.dimensions());
-    if (total_size > 0) {
-      const Index first_dim = Eigen::internal::array_get<0>(tensor.dimensions());
-      static const int layout = Tensor::Layout;
-      Map<const Array<Scalar, Dynamic, Dynamic, layout> > matrix(const_cast<Scalar*>(tensor.data()), first_dim, total_size/first_dim);
-      os << matrix;
-    }
-  }
-};
-
-
-// Print the tensor as a vector
-template <typename Tensor>
-struct TensorPrinter<Tensor, 1> {
-  static void run (std::ostream& os, const Tensor& tensor) {
-    typedef typename internal::remove_const<typename Tensor::Scalar>::type Scalar;
-    typedef typename Tensor::Index Index;
-    const Index total_size = internal::array_prod(tensor.dimensions());
-    if (total_size > 0) {
-      Map<const Array<Scalar, Dynamic, 1> > array(const_cast<Scalar*>(tensor.data()), total_size);
-      os << array;
-    }
-  }
-};
-
-
-// Print the tensor as a scalar
-template <typename Tensor>
-struct TensorPrinter<Tensor, 0> {
-  static void run (std::ostream& os, const Tensor& tensor) {
-    os << tensor.coeff(0);
-  }
-};
+template <typename Tensor, std::size_t rank>
+struct TensorPrinter;
 }
 
+struct TensorIOFormat {
+  TensorIOFormat(const std::vector<std::string>& _separator, const std::vector<std::string>& _prefix,
+                 const std::vector<std::string>& _suffix, int _precision = StreamPrecision, int _flags = 0,
+                 const std::string& _tenPrefix = "", const std::string& _tenSuffix = "", const char _fill = ' ')
+      : tenPrefix(_tenPrefix),
+        tenSuffix(_tenSuffix),
+        prefix(_prefix),
+        suffix(_suffix),
+        separator(_separator),
+        fill(_fill),
+        precision(_precision),
+        flags(_flags) {
+    init_spacer();
+  }
+
+  TensorIOFormat(int _precision = StreamPrecision, int _flags = 0, const std::string& _tenPrefix = "",
+                 const std::string& _tenSuffix = "", const char _fill = ' ')
+      : tenPrefix(_tenPrefix), tenSuffix(_tenSuffix), fill(_fill), precision(_precision), flags(_flags) {
+    // default values of prefix, suffix and separator
+    prefix = {"", "["};
+    suffix = {"", "]"};
+    separator = {", ", "\n"};
+
+    init_spacer();
+  }
+
+  void init_spacer() {
+    if ((flags & DontAlignCols)) return;
+    spacer.resize(prefix.size());
+    spacer[0] = "";
+    int i = int(tenPrefix.length()) - 1;
+    while (i >= 0 && tenPrefix[i] != '\n') {
+      spacer[0] += ' ';
+      i--;
+    }
+
+    for (std::size_t k = 1; k < prefix.size(); k++) {
+      int j = int(prefix[k].length()) - 1;
+      while (j >= 0 && prefix[k][j] != '\n') {
+        spacer[k] += ' ';
+        j--;
+      }
+    }
+  }
+
+  static inline const TensorIOFormat Numpy() {
+    std::vector<std::string> prefix = {"", "["};
+    std::vector<std::string> suffix = {"", "]"};
+    std::vector<std::string> separator = {" ", "\n"};
+    return TensorIOFormat(separator, prefix, suffix, StreamPrecision, 0, "[", "]");
+  }
+
+  static inline const TensorIOFormat Plain() {
+    std::vector<std::string> separator = {" ", "\n", "\n", ""};
+    std::vector<std::string> prefix = {""};
+    std::vector<std::string> suffix = {""};
+    return TensorIOFormat(separator, prefix, suffix, StreamPrecision, 0, "", "", ' ');
+  }
+
+  static inline const TensorIOFormat Native() {
+    std::vector<std::string> separator = {", ", ",\n", "\n"};
+    std::vector<std::string> prefix = {"", "{"};
+    std::vector<std::string> suffix = {"", "}"};
+    return TensorIOFormat(separator, prefix, suffix, StreamPrecision, 0, "{", "}", ' ');
+  }
+
+  static inline const TensorIOFormat Legacy() {
+    TensorIOFormat LegacyFormat(StreamPrecision, 0, "", "", ' ');
+    LegacyFormat.legacy_bit = true;
+    return LegacyFormat;
+  }
+
+  std::string tenPrefix;
+  std::string tenSuffix;
+  std::vector<std::string> prefix;
+  std::vector<std::string> suffix;
+  std::vector<std::string> separator;
+  char fill;
+  int precision;
+  int flags;
+  std::vector<std::string> spacer{};
+  bool legacy_bit = false;
+};
+
+template <typename T, int Layout, int rank>
+class TensorWithFormat;
+// specialize for Layout=ColMajor, Layout=RowMajor and rank=0.
+template <typename T, int rank>
+class TensorWithFormat<T, RowMajor, rank> {
+ public:
+  TensorWithFormat(const T& tensor, const TensorIOFormat& format) : t_tensor(tensor), t_format(format) {}
+
+  friend std::ostream& operator<<(std::ostream& os, const TensorWithFormat<T, RowMajor, rank>& wf) {
+    // Evaluate the expression if needed
+    typedef TensorEvaluator<const TensorForcedEvalOp<const T>, DefaultDevice> Evaluator;
+    TensorForcedEvalOp<const T> eval = wf.t_tensor.eval();
+    Evaluator tensor(eval, DefaultDevice());
+    tensor.evalSubExprsIfNeeded(NULL);
+    internal::TensorPrinter<Evaluator, rank>::run(os, tensor, wf.t_format);
+    // Cleanup.
+    tensor.cleanup();
+    return os;
+  }
+
+ protected:
+  T t_tensor;
+  TensorIOFormat t_format;
+};
+
+template <typename T, int rank>
+class TensorWithFormat<T, ColMajor, rank> {
+ public:
+  TensorWithFormat(const T& tensor, const TensorIOFormat& format) : t_tensor(tensor), t_format(format) {}
+
+  friend std::ostream& operator<<(std::ostream& os, const TensorWithFormat<T, ColMajor, rank>& wf) {
+    // Switch to RowMajor storage and print afterwards
+    typedef typename T::Index IndexType;
+    std::array<IndexType, rank> shuffle;
+    std::array<IndexType, rank> id;
+    std::iota(id.begin(), id.end(), IndexType(0));
+    std::copy(id.begin(), id.end(), shuffle.rbegin());
+    auto tensor_row_major = wf.t_tensor.swap_layout().shuffle(shuffle);
+
+    // Evaluate the expression if needed
+    typedef TensorEvaluator<const TensorForcedEvalOp<const decltype(tensor_row_major)>, DefaultDevice> Evaluator;
+    TensorForcedEvalOp<const decltype(tensor_row_major)> eval = tensor_row_major.eval();
+    Evaluator tensor(eval, DefaultDevice());
+    tensor.evalSubExprsIfNeeded(NULL);
+    internal::TensorPrinter<Evaluator, rank>::run(os, tensor, wf.t_format);
+    // Cleanup.
+    tensor.cleanup();
+    return os;
+  }
+
+ protected:
+  T t_tensor;
+  TensorIOFormat t_format;
+};
+
 template <typename T>
-std::ostream& operator << (std::ostream& os, const TensorBase<T, ReadOnlyAccessors>& expr) {
-  typedef TensorEvaluator<const TensorForcedEvalOp<const T>, DefaultDevice> Evaluator;
-  typedef typename Evaluator::Dimensions Dimensions;
+class TensorWithFormat<T, ColMajor, 0> {
+ public:
+  TensorWithFormat(const T& tensor, const TensorIOFormat& format) : t_tensor(tensor), t_format(format) {}
 
-  // Evaluate the expression if needed
-  TensorForcedEvalOp<const T> eval = expr.eval();
-  Evaluator tensor(eval, DefaultDevice());
-  tensor.evalSubExprsIfNeeded(NULL);
+  friend std::ostream& operator<<(std::ostream& os, const TensorWithFormat<T, ColMajor, 0>& wf) {
+    // Evaluate the expression if needed
+    typedef TensorEvaluator<const TensorForcedEvalOp<const T>, DefaultDevice> Evaluator;
+    TensorForcedEvalOp<const T> eval = wf.t_tensor.eval();
+    Evaluator tensor(eval, DefaultDevice());
+    tensor.evalSubExprsIfNeeded(NULL);
+    internal::TensorPrinter<Evaluator, 0>::run(os, tensor, wf.t_format);
+    // Cleanup.
+    tensor.cleanup();
+    return os;
+  }
 
-  // Print the result
-  static const int rank = internal::array_size<Dimensions>::value;
-  internal::TensorPrinter<Evaluator, rank>::run(os, tensor);
+ protected:
+  T t_tensor;
+  TensorIOFormat t_format;
+};
 
-  // Cleanup.
-  tensor.cleanup();
-  return os;
+namespace internal {
+template <typename Tensor, std::size_t rank>
+struct TensorPrinter {
+  static void run(std::ostream& s, const Tensor& _t, const TensorIOFormat& fmt) {
+    typedef std::remove_const_t<typename Tensor::Scalar> Scalar;
+    typedef typename Tensor::Index IndexType;
+    static const int layout = Tensor::Layout;
+    // backwards compatibility case: print tensor after reshaping to matrix of size dim(0) x
+    // (dim(1)*dim(2)*...*dim(rank-1)).
+    if (fmt.legacy_bit) {
+      const IndexType total_size = internal::array_prod(_t.dimensions());
+      if (total_size > 0) {
+        const IndexType first_dim = Eigen::internal::array_get<0>(_t.dimensions());
+        Map<const Array<Scalar, Dynamic, Dynamic, layout> > matrix(_t.data(), first_dim,
+                                                                   total_size / first_dim);
+        s << matrix;
+        return;
+      }
+    }
+
+    eigen_assert(layout == RowMajor);
+    typedef std::conditional_t<is_same<Scalar, char>::value || is_same<Scalar, unsigned char>::value ||
+                             is_same<Scalar, numext::int8_t>::value || is_same<Scalar, numext::uint8_t>::value,
+                          int,
+                          std::conditional_t<is_same<Scalar, std::complex<char> >::value ||
+                                                is_same<Scalar, std::complex<unsigned char> >::value ||
+                                                is_same<Scalar, std::complex<numext::int8_t> >::value ||
+                                                is_same<Scalar, std::complex<numext::uint8_t> >::value,
+                                        std::complex<int>, const Scalar&>> PrintType;
+
+    const IndexType total_size = array_prod(_t.dimensions());
+
+    std::streamsize explicit_precision;
+    if (fmt.precision == StreamPrecision) {
+      explicit_precision = 0;
+    } else if (fmt.precision == FullPrecision) {
+      if (NumTraits<Scalar>::IsInteger) {
+        explicit_precision = 0;
+      } else {
+        explicit_precision = significant_decimals_impl<Scalar>::run();
+      }
+    } else {
+      explicit_precision = fmt.precision;
+    }
+
+    std::streamsize old_precision = 0;
+    if (explicit_precision) old_precision = s.precision(explicit_precision);
+
+    IndexType width = 0;
+
+    bool align_cols = !(fmt.flags & DontAlignCols);
+    if (align_cols) {
+      // compute the largest width
+      for (IndexType i = 0; i < total_size; i++) {
+        std::stringstream sstr;
+        sstr.copyfmt(s);
+        sstr << static_cast<PrintType>(_t.data()[i]);
+        width = std::max<IndexType>(width, IndexType(sstr.str().length()));
+      }
+    }
+    std::streamsize old_width = s.width();
+    char old_fill_character = s.fill();
+
+    s << fmt.tenPrefix;
+    for (IndexType i = 0; i < total_size; i++) {
+      std::array<bool, rank> is_at_end{};
+      std::array<bool, rank> is_at_begin{};
+
+      // is the ith element the end of an coeff (always true), of a row, of a matrix, ...?
+      for (std::size_t k = 0; k < rank; k++) {
+        if ((i + 1) % (std::accumulate(_t.dimensions().rbegin(), _t.dimensions().rbegin() + k, 1,
+                                       std::multiplies<IndexType>())) ==
+            0) {
+          is_at_end[k] = true;
+        }
+      }
+
+      // is the ith element the begin of an coeff (always true), of a row, of a matrix, ...?
+      for (std::size_t k = 0; k < rank; k++) {
+        if (i % (std::accumulate(_t.dimensions().rbegin(), _t.dimensions().rbegin() + k, 1,
+                                 std::multiplies<IndexType>())) ==
+            0) {
+          is_at_begin[k] = true;
+        }
+      }
+
+      // do we have a line break?
+      bool is_at_begin_after_newline = false;
+      for (std::size_t k = 0; k < rank; k++) {
+        if (is_at_begin[k]) {
+          std::size_t separator_index = (k < fmt.separator.size()) ? k : fmt.separator.size() - 1;
+          if (fmt.separator[separator_index].find('\n') != std::string::npos) {
+            is_at_begin_after_newline = true;
+          }
+        }
+      }
+
+      bool is_at_end_before_newline = false;
+      for (std::size_t k = 0; k < rank; k++) {
+        if (is_at_end[k]) {
+          std::size_t separator_index = (k < fmt.separator.size()) ? k : fmt.separator.size() - 1;
+          if (fmt.separator[separator_index].find('\n') != std::string::npos) {
+            is_at_end_before_newline = true;
+          }
+        }
+      }
+
+      std::stringstream suffix, prefix, separator;
+      for (std::size_t k = 0; k < rank; k++) {
+        std::size_t suffix_index = (k < fmt.suffix.size()) ? k : fmt.suffix.size() - 1;
+        if (is_at_end[k]) {
+          suffix << fmt.suffix[suffix_index];
+        }
+      }
+      for (std::size_t k = 0; k < rank; k++) {
+        std::size_t separator_index = (k < fmt.separator.size()) ? k : fmt.separator.size() - 1;
+        if (is_at_end[k] &&
+            (!is_at_end_before_newline || fmt.separator[separator_index].find('\n') != std::string::npos)) {
+          separator << fmt.separator[separator_index];
+        }
+      }
+      for (std::size_t k = 0; k < rank; k++) {
+        std::size_t spacer_index = (k < fmt.spacer.size()) ? k : fmt.spacer.size() - 1;
+        if (i != 0 && is_at_begin_after_newline && (!is_at_begin[k] || k == 0)) {
+          prefix << fmt.spacer[spacer_index];
+        }
+      }
+      for (int k = rank - 1; k >= 0; k--) {
+        std::size_t prefix_index = (static_cast<std::size_t>(k) < fmt.prefix.size()) ? k : fmt.prefix.size() - 1;
+        if (is_at_begin[k]) {
+          prefix << fmt.prefix[prefix_index];
+        }
+      }
+
+      s << prefix.str();
+      if (width) {
+        s.fill(fmt.fill);
+        s.width(width);
+        s << std::right;
+      }
+      s << _t.data()[i];
+      s << suffix.str();
+      if (i < total_size - 1) {
+        s << separator.str();
+      }
+    }
+    s << fmt.tenSuffix;
+    if (explicit_precision) s.precision(old_precision);
+    if (width) {
+      s.fill(old_fill_character);
+      s.width(old_width);
+    }
+  }
+};
+
+template <typename Tensor>
+struct TensorPrinter<Tensor, 0> {
+  static void run(std::ostream& s, const Tensor& _t, const TensorIOFormat& fmt) {
+    typedef typename Tensor::Scalar Scalar;
+
+    std::streamsize explicit_precision;
+    if (fmt.precision == StreamPrecision) {
+      explicit_precision = 0;
+    } else if (fmt.precision == FullPrecision) {
+      if (NumTraits<Scalar>::IsInteger) {
+        explicit_precision = 0;
+      } else {
+        explicit_precision = significant_decimals_impl<Scalar>::run();
+      }
+    } else {
+      explicit_precision = fmt.precision;
+    }
+
+    std::streamsize old_precision = 0;
+    if (explicit_precision) old_precision = s.precision(explicit_precision);
+
+    s << fmt.tenPrefix << _t.coeff(0) << fmt.tenSuffix;
+    if (explicit_precision) s.precision(old_precision);
+  }
+};
+
+}  // end namespace internal
+template <typename T>
+std::ostream& operator<<(std::ostream& s, const TensorBase<T, ReadOnlyAccessors>& t) {
+  s << t.format(TensorIOFormat::Plain());
+  return s;
 }
+}  // end namespace Eigen
 
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_IO_H
+#endif  // EIGEN_CXX11_TENSOR_TENSOR_IO_H
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h
index dd51850..a9b281f 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_CXX11_TENSOR_TENSOR_IMAGE_PATCH_H
 #define EIGEN_CXX11_TENSOR_TENSOR_IMAGE_PATCH_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /** \class TensorImagePatch
@@ -31,14 +33,14 @@ namespace internal {
 template<DenseIndex Rows, DenseIndex Cols, typename XprType>
 struct traits<TensorImagePatchOp<Rows, Cols, XprType> > : public traits<XprType>
 {
-  typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar;
+  typedef std::remove_const_t<typename XprType::Scalar> Scalar;
   typedef traits<XprType> XprTraits;
   typedef typename XprTraits::StorageKind StorageKind;
   typedef typename XprTraits::Index Index;
   typedef typename XprType::Nested Nested;
-  typedef typename remove_reference<Nested>::type _Nested;
-  static const int NumDimensions = XprTraits::NumDimensions + 1;
-  static const int Layout = XprTraits::Layout;
+  typedef std::remove_reference_t<Nested> Nested_;
+  static constexpr int NumDimensions = XprTraits::NumDimensions + 1;
+  static constexpr int Layout = XprTraits::Layout;
   typedef typename XprTraits::PointerType PointerType;
 };
 
@@ -187,7 +189,7 @@ class TensorImagePatchOp : public TensorBase<TensorImagePatchOp<Rows, Cols, XprT
     Scalar padding_value() const { return m_padding_value; }
 
     EIGEN_DEVICE_FUNC
-    const typename internal::remove_all<typename XprType::Nested>::type&
+    const internal::remove_all_t<typename XprType::Nested>&
     expression() const { return m_xpr; }
 
   protected:
@@ -215,25 +217,25 @@ struct TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, Device>
 {
   typedef TensorImagePatchOp<Rows, Cols, ArgType> XprType;
   typedef typename XprType::Index Index;
-  static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
-  static const int NumDims = NumInputDims + 1;
+  static constexpr int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
+  static constexpr int NumDims = NumInputDims + 1;
   typedef DSizes<Index, NumDims> Dimensions;
-  typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar;
+  typedef std::remove_const_t<typename XprType::Scalar> Scalar;
   typedef TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>,
                           Device> Self;
   typedef TensorEvaluator<ArgType, Device> Impl;
   typedef typename XprType::CoeffReturnType CoeffReturnType;
   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
-  static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
+  static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
   typedef StorageMemory<CoeffReturnType, Device> Storage;
   typedef typename Storage::Type EvaluatorPointerType;
 
+  static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
   enum {
     IsAligned         = false,
     PacketAccess      = TensorEvaluator<ArgType, Device>::PacketAccess,
     BlockAccess       = false,
     PreferBlockAccess = true,
-    Layout            = TensorEvaluator<ArgType, Device>::Layout,
     CoordAccess       = false,
     RawAccess         = false
   };
@@ -447,7 +449,6 @@ struct TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, Device>
   template<int LoadMode>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
   {
-    EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
     eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
 
     if (m_in_row_strides != 1 || m_in_col_strides != 1 || m_row_inflate_strides != 1 || m_col_inflate_strides != 1) {
@@ -540,7 +541,7 @@ struct TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, Device>
  protected:
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index) const
   {
-    EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
+    EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
     EIGEN_UNROLL_LOOP
     for (int i = 0; i < PacketSize; ++i) {
       values[i] = coeff(index+i);
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h
index 2d8c7b9..dc97d21 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h
@@ -10,10 +10,7 @@
 #ifndef EIGEN_CXX11_TENSOR_TENSOR_INDEX_LIST_H
 #define EIGEN_CXX11_TENSOR_TENSOR_INDEX_LIST_H
 
-
-#if EIGEN_HAS_CONSTEXPR && EIGEN_HAS_VARIADIC_TEMPLATES
-
-#define EIGEN_HAS_INDEX_LIST
+#include "./InternalHeaderCheck.h"
 
 namespace Eigen {
 
@@ -246,7 +243,7 @@ struct tuple_coeff {
 
   template <typename... T>
   EIGEN_DEVICE_FUNC static constexpr bool value_known_statically(const Index i, const IndexTuple<T...>& t) {
-    return ((i == Idx) & is_compile_time_constant<typename IndexTupleExtractor<Idx, T...>::ValType>::value) ||
+    return ((i == Idx) && is_compile_time_constant<typename IndexTupleExtractor<Idx, T...>::ValType>::value) ||
         tuple_coeff<Idx-1, ValueT>::value_known_statically(i, t);
   }
 
@@ -308,6 +305,11 @@ struct IndexList : internal::IndexTuple<FirstType, OtherTypes...> {
     return internal::tuple_coeff<internal::array_size<internal::IndexTuple<FirstType, OtherTypes...> >::value-1, Index>::set(i, *this, value);
   }
 
+  EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC constexpr std::size_t size() const {
+    return 1 + sizeof...(OtherTypes);
+  };
+
+
   EIGEN_DEVICE_FUNC constexpr IndexList(const internal::IndexTuple<FirstType, OtherTypes...>& other) : internal::IndexTuple<FirstType, OtherTypes...>(other) { }
   EIGEN_DEVICE_FUNC constexpr IndexList(FirstType& first, OtherTypes... other) : internal::IndexTuple<FirstType, OtherTypes...>(first, other...) { }
   EIGEN_DEVICE_FUNC constexpr IndexList() : internal::IndexTuple<FirstType, OtherTypes...>() { }
@@ -379,10 +381,10 @@ template<typename FirstType, typename... OtherTypes> struct array_size<const Ind
 };
 
 template<typename FirstType, typename... OtherTypes> struct array_size<IndexPairList<FirstType, OtherTypes...> > {
-  static const size_t value = std::tuple_size<std::tuple<FirstType, OtherTypes...> >::value;
+  static const size_t value = 1 + sizeof...(OtherTypes);
 };
 template<typename FirstType, typename... OtherTypes> struct array_size<const IndexPairList<FirstType, OtherTypes...> > {
-  static const size_t value = std::tuple_size<std::tuple<FirstType, OtherTypes...> >::value;
+  static const size_t value = 1 + sizeof...(OtherTypes);
 };
 
 template<Index N, typename FirstType, typename... OtherTypes> EIGEN_DEVICE_FUNC constexpr Index array_get(IndexList<FirstType, OtherTypes...>& a) {
@@ -468,7 +470,7 @@ struct index_statically_eq_impl {
 template <typename FirstType, typename... OtherTypes>
 struct index_statically_eq_impl<IndexList<FirstType, OtherTypes...> > {
   EIGEN_DEVICE_FUNC static constexpr bool run(const Index i, const Index value) {
-    return IndexList<FirstType, OtherTypes...>().value_known_statically(i) &
+    return IndexList<FirstType, OtherTypes...>().value_known_statically(i) &&
         (IndexList<FirstType, OtherTypes...>().get(i) == value);
   }
 };
@@ -476,7 +478,7 @@ struct index_statically_eq_impl<IndexList<FirstType, OtherTypes...> > {
 template <typename FirstType, typename... OtherTypes>
 struct index_statically_eq_impl<const IndexList<FirstType, OtherTypes...> > {
   EIGEN_DEVICE_FUNC static constexpr bool run(const Index i, const Index value) {
-    return IndexList<FirstType, OtherTypes...>().value_known_statically(i) &
+    return IndexList<FirstType, OtherTypes...>().value_known_statically(i) &&
         (IndexList<FirstType, OtherTypes...>().get(i) == value);
   }
 };
@@ -492,7 +494,7 @@ struct index_statically_ne_impl {
 template <typename FirstType, typename... OtherTypes>
 struct index_statically_ne_impl<IndexList<FirstType, OtherTypes...> > {
   EIGEN_DEVICE_FUNC static constexpr bool run(const Index i, const Index value) {
-    return IndexList<FirstType, OtherTypes...>().value_known_statically(i) &
+    return IndexList<FirstType, OtherTypes...>().value_known_statically(i) &&
         (IndexList<FirstType, OtherTypes...>().get(i) != value);
   }
 };
@@ -500,7 +502,7 @@ struct index_statically_ne_impl<IndexList<FirstType, OtherTypes...> > {
 template <typename FirstType, typename... OtherTypes>
 struct index_statically_ne_impl<const IndexList<FirstType, OtherTypes...> > {
   EIGEN_DEVICE_FUNC static constexpr bool run(const Index i, const Index value) {
-    return IndexList<FirstType, OtherTypes...>().value_known_statically(i) &
+    return IndexList<FirstType, OtherTypes...>().value_known_statically(i) &&
         (IndexList<FirstType, OtherTypes...>().get(i) != value);
   }
 };
@@ -516,7 +518,7 @@ struct index_statically_gt_impl {
 template <typename FirstType, typename... OtherTypes>
 struct index_statically_gt_impl<IndexList<FirstType, OtherTypes...> > {
   EIGEN_DEVICE_FUNC static constexpr bool run(const Index i, const Index value) {
-    return IndexList<FirstType, OtherTypes...>().value_known_statically(i) &
+    return IndexList<FirstType, OtherTypes...>().value_known_statically(i) &&
         (IndexList<FirstType, OtherTypes...>().get(i) > value);
   }
 };
@@ -524,7 +526,7 @@ struct index_statically_gt_impl<IndexList<FirstType, OtherTypes...> > {
 template <typename FirstType, typename... OtherTypes>
 struct index_statically_gt_impl<const IndexList<FirstType, OtherTypes...> > {
   EIGEN_DEVICE_FUNC static constexpr bool run(const Index i, const Index value) {
-    return IndexList<FirstType, OtherTypes...>().value_known_statically(i) &
+    return IndexList<FirstType, OtherTypes...>().value_known_statically(i) &&
         (IndexList<FirstType, OtherTypes...>().get(i) > value);
   }
 };
@@ -541,7 +543,7 @@ struct index_statically_lt_impl {
 template <typename FirstType, typename... OtherTypes>
 struct index_statically_lt_impl<IndexList<FirstType, OtherTypes...> > {
   EIGEN_DEVICE_FUNC static constexpr bool run(const Index i, const Index value) {
-    return IndexList<FirstType, OtherTypes...>().value_known_statically(i) &
+    return IndexList<FirstType, OtherTypes...>().value_known_statically(i) &&
         (IndexList<FirstType, OtherTypes...>().get(i) < value);
   }
 };
@@ -549,7 +551,7 @@ struct index_statically_lt_impl<IndexList<FirstType, OtherTypes...> > {
 template <typename FirstType, typename... OtherTypes>
 struct index_statically_lt_impl<const IndexList<FirstType, OtherTypes...> > {
   EIGEN_DEVICE_FUNC static constexpr bool run(const Index i, const Index value) {
-    return IndexList<FirstType, OtherTypes...>().value_known_statically(i) &
+    return IndexList<FirstType, OtherTypes...>().value_known_statically(i) &&
         (IndexList<FirstType, OtherTypes...>().get(i) < value);
   }
 };
@@ -566,7 +568,7 @@ struct index_pair_first_statically_eq_impl {
 template <typename FirstType, typename... OtherTypes>
 struct index_pair_first_statically_eq_impl<IndexPairList<FirstType, OtherTypes...> > {
   EIGEN_DEVICE_FUNC static constexpr bool run(const Index i, const Index value) {
-    return IndexPairList<FirstType, OtherTypes...>().value_known_statically(i) &
+    return IndexPairList<FirstType, OtherTypes...>().value_known_statically(i) &&
         (IndexPairList<FirstType, OtherTypes...>().operator[](i).first == value);
   }
 };
@@ -574,7 +576,7 @@ struct index_pair_first_statically_eq_impl<IndexPairList<FirstType, OtherTypes..
 template <typename FirstType, typename... OtherTypes>
 struct index_pair_first_statically_eq_impl<const IndexPairList<FirstType, OtherTypes...> > {
   EIGEN_DEVICE_FUNC static constexpr bool run(const Index i, const Index value) {
-    return IndexPairList<FirstType, OtherTypes...>().value_known_statically(i) &
+    return IndexPairList<FirstType, OtherTypes...>().value_known_statically(i) &&
         (IndexPairList<FirstType, OtherTypes...>().operator[](i).first == value);
   }
 };
@@ -591,7 +593,7 @@ struct index_pair_second_statically_eq_impl {
 template <typename FirstType, typename... OtherTypes>
 struct index_pair_second_statically_eq_impl<IndexPairList<FirstType, OtherTypes...> > {
   EIGEN_DEVICE_FUNC static constexpr bool run(const Index i, const Index value) {
-    return IndexPairList<FirstType, OtherTypes...>().value_known_statically(i) &
+    return IndexPairList<FirstType, OtherTypes...>().value_known_statically(i) &&
         (IndexPairList<FirstType, OtherTypes...>().operator[](i).second == value);
   }
 };
@@ -599,7 +601,7 @@ struct index_pair_second_statically_eq_impl<IndexPairList<FirstType, OtherTypes.
 template <typename FirstType, typename... OtherTypes>
 struct index_pair_second_statically_eq_impl<const IndexPairList<FirstType, OtherTypes...> > {
   EIGEN_DEVICE_FUNC static constexpr bool run(const Index i, const Index value) {
-    return IndexPairList<FirstType, OtherTypes...>().value_known_statically(i) &
+    return IndexPairList<FirstType, OtherTypes...>().value_known_statically(i) &&
         (IndexPairList<FirstType, OtherTypes...>().operator[](i).second == value);
   }
 };
@@ -608,81 +610,6 @@ struct index_pair_second_statically_eq_impl<const IndexPairList<FirstType, Other
 }  // end namespace internal
 }  // end namespace Eigen
 
-#else
-
-namespace Eigen {
-namespace internal {
-
-template <typename T>
-struct index_known_statically_impl {
-  static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const Index) {
-    return false;
-  }
-};
-
-template <typename T>
-struct all_indices_known_statically_impl {
-  static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run() {
-    return false;
-  }
-};
-
-template <typename T>
-struct indices_statically_known_to_increase_impl {
-  static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run() {
-    return false;
-  }
-};
-
-template <typename T>
-struct index_statically_eq_impl {
-  static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(Index, Index) {
-    return false;
-  }
-};
-
-template <typename T>
-struct index_statically_ne_impl {
-  static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(Index, Index) {
-    return false;
-  }
-};
-
-template <typename T>
-struct index_statically_gt_impl {
-  static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(Index, Index) {
-    return false;
-  }
-};
-
-template <typename T>
-struct index_statically_lt_impl {
-  static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(Index, Index) {
-    return false;
-  }
-};
-
-template <typename Tx>
-struct index_pair_first_statically_eq_impl {
-  static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(Index, Index) {
-    return false;
-  }
-};
-
-template <typename Tx>
-struct index_pair_second_statically_eq_impl {
-  static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(Index, Index) {
-    return false;
-  }
-};
-
-
-
-}  // end namespace internal
-}  // end namespace Eigen
-
-#endif
-
 
 namespace Eigen {
 namespace internal {
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h
index c5cb61a..8188194 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_CXX11_TENSOR_TENSOR_INFLATION_H
 #define EIGEN_CXX11_TENSOR_TENSOR_INFLATION_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /** \class TensorInflation
@@ -28,9 +30,9 @@ struct traits<TensorInflationOp<Strides, XprType> > : public traits<XprType>
   typedef typename XprTraits::StorageKind StorageKind;
   typedef typename XprTraits::Index Index;
   typedef typename XprType::Nested Nested;
-  typedef typename remove_reference<Nested>::type _Nested;
-  static const int NumDimensions = XprTraits::NumDimensions;
-  static const int Layout = XprTraits::Layout;
+  typedef std::remove_reference_t<Nested> Nested_;
+  static constexpr int NumDimensions = XprTraits::NumDimensions;
+  static constexpr int Layout = XprTraits::Layout;
   typedef typename XprTraits::PointerType PointerType;
 };
 
@@ -66,7 +68,7 @@ class TensorInflationOp : public TensorBase<TensorInflationOp<Strides, XprType>,
     const Strides& strides() const { return m_strides; }
 
     EIGEN_DEVICE_FUNC
-    const typename internal::remove_all<typename XprType::Nested>::type&
+    const internal::remove_all_t<typename XprType::Nested>&
     expression() const { return m_xpr; }
 
   protected:
@@ -80,21 +82,21 @@ struct TensorEvaluator<const TensorInflationOp<Strides, ArgType>, Device>
 {
   typedef TensorInflationOp<Strides, ArgType> XprType;
   typedef typename XprType::Index Index;
-  static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
+  static constexpr int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
   typedef DSizes<Index, NumDims> Dimensions;
   typedef typename XprType::Scalar Scalar;
   typedef typename XprType::CoeffReturnType CoeffReturnType;
   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
-  static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
+  static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
   typedef StorageMemory<CoeffReturnType, Device> Storage;
   typedef typename Storage::Type EvaluatorPointerType;
 
+  static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
   enum {
     IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/ false,
     PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
     BlockAccess = false,
     PreferBlockAccess = false,
-    Layout = TensorEvaluator<ArgType, Device>::Layout,
     CoordAccess = false,  // to be implemented
     RawAccess = false
   };
@@ -202,7 +204,7 @@ struct TensorEvaluator<const TensorInflationOp<Strides, ArgType>, Device>
     EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
     eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
 
-    EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
+    EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
     EIGEN_UNROLL_LOOP
     for (int i = 0; i < PacketSize; ++i) {
       values[i] = coeff(index+i);
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h
index 26a3818..d8d977c 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h
@@ -10,10 +10,10 @@
 #ifndef EIGEN_CXX11_TENSOR_TENSOR_INITIALIZER_H
 #define EIGEN_CXX11_TENSOR_TENSOR_INITIALIZER_H
 
-#if EIGEN_HAS_VARIADIC_TEMPLATES
-
 #include <initializer_list>
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /** \class TensorInitializer
@@ -77,6 +77,4 @@ void initialize_tensor(TensorEvaluator<Derived, DefaultDevice>& tensor,
 }  // namespace internal
 }  // namespace Eigen
 
-#endif  // EIGEN_HAS_VARIADIC_TEMPLATES
-
 #endif  // EIGEN_CXX11_TENSOR_TENSOR_INITIALIZER_H
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h
index 6d5cce4..a921fea 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h
@@ -11,6 +11,8 @@
 #define EIGEN_CXX11_TENSOR_TENSOR_INTDIV_H
 
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /** \internal
@@ -28,12 +30,10 @@ namespace Eigen {
 
 namespace internal {
 
-namespace {
-
   // Note: result is undefined if val == 0
   template <typename T>
   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
-  typename internal::enable_if<sizeof(T)==4,int>::type count_leading_zeros(const T val)
+  std::enable_if_t<sizeof(T)==4,int> count_leading_zeros(const T val)
   {
 #ifdef EIGEN_GPU_COMPILE_PHASE
     return __clz(val);
@@ -51,7 +51,7 @@ namespace {
 
   template <typename T>
   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
-  typename internal::enable_if<sizeof(T)==8,int>::type count_leading_zeros(const T val)
+  std::enable_if_t<sizeof(T)==8,int> count_leading_zeros(const T val)
   {
 #ifdef EIGEN_GPU_COMPILE_PHASE
     return __clzll(val);
@@ -79,13 +79,13 @@ namespace {
 
   template <typename T>
   struct UnsignedTraits {
-    typedef typename conditional<sizeof(T) == 8, uint64_t, uint32_t>::type type;
+    typedef std::conditional_t<sizeof(T) == 8, uint64_t, uint32_t> type;
   };
 
   template <typename T>
   struct DividerTraits {
     typedef typename UnsignedTraits<T>::type type;
-    static const int N = sizeof(T) * 8;
+    static constexpr int N = sizeof(T) * 8;
   };
 
   template <typename T>
@@ -135,8 +135,6 @@ namespace {
 #endif
     }
   };
-}
-
 
 template <typename T, bool div_gt_one = false>
 struct TensorIntDivisor {
@@ -252,7 +250,7 @@ private:
 
 
 template <typename T, bool div_gt_one>
-static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator / (const T& numerator, const TensorIntDivisor<T, div_gt_one>& divisor) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator / (const T& numerator, const TensorIntDivisor<T, div_gt_one>& divisor) {
   return divisor.divide(numerator);
 }
 
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h
index 80106c1..d12bd6d 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_CXX11_TENSOR_TENSOR_LAYOUT_SWAP_H
 #define EIGEN_CXX11_TENSOR_TENSOR_LAYOUT_SWAP_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /** \class TensorLayoutSwap
@@ -43,9 +45,9 @@ struct traits<TensorLayoutSwapOp<XprType> > : public traits<XprType>
   typedef typename XprTraits::StorageKind StorageKind;
   typedef typename XprTraits::Index Index;
   typedef typename XprType::Nested Nested;
-  typedef typename remove_reference<Nested>::type _Nested;
-  static const int NumDimensions = traits<XprType>::NumDimensions;
-  static const int Layout = (traits<XprType>::Layout == ColMajor) ? RowMajor : ColMajor;
+  typedef std::remove_reference_t<Nested> Nested_;
+  static constexpr int NumDimensions = traits<XprType>::NumDimensions;
+  static constexpr int Layout = (traits<XprType>::Layout == ColMajor) ? RowMajor : ColMajor;
   typedef typename XprTraits::PointerType PointerType;
 };
 
@@ -72,7 +74,7 @@ class TensorLayoutSwapOp : public TensorBase<TensorLayoutSwapOp<XprType>, WriteA
     typedef TensorBase<TensorLayoutSwapOp<XprType>, WriteAccessors> Base;
     typedef typename Eigen::internal::traits<TensorLayoutSwapOp>::Scalar Scalar;
     typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
-    typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
+    typedef std::remove_const_t<typename XprType::CoeffReturnType> CoeffReturnType;
     typedef typename Eigen::internal::nested<TensorLayoutSwapOp>::type Nested;
     typedef typename Eigen::internal::traits<TensorLayoutSwapOp>::StorageKind StorageKind;
     typedef typename Eigen::internal::traits<TensorLayoutSwapOp>::Index Index;
@@ -81,7 +83,7 @@ class TensorLayoutSwapOp : public TensorBase<TensorLayoutSwapOp<XprType>, WriteA
         : m_xpr(expr) {}
 
     EIGEN_DEVICE_FUNC
-    const typename internal::remove_all<typename XprType::Nested>::type&
+    const internal::remove_all_t<typename XprType::Nested>&
     expression() const { return m_xpr; }
 
     EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorLayoutSwapOp)
@@ -96,15 +98,15 @@ struct TensorEvaluator<const TensorLayoutSwapOp<ArgType>, Device>
 {
   typedef TensorLayoutSwapOp<ArgType> XprType;
   typedef typename XprType::Index Index;
-  static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
+  static constexpr int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
   typedef DSizes<Index, NumDims> Dimensions;
 
+  static constexpr int Layout = (TensorEvaluator<ArgType, Device>::Layout == static_cast<int>(ColMajor)) ? RowMajor : ColMajor;
   enum {
     IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
     PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
     BlockAccess = false,
     PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
-    Layout = (static_cast<int>(TensorEvaluator<ArgType, Device>::Layout) == static_cast<int>(ColMajor)) ? RowMajor : ColMajor,
     CoordAccess = false,  // to be implemented
     RawAccess = TensorEvaluator<ArgType, Device>::RawAccess
   };
@@ -178,12 +180,12 @@ template<typename ArgType, typename Device>
   typedef TensorEvaluator<const TensorLayoutSwapOp<ArgType>, Device> Base;
   typedef TensorLayoutSwapOp<ArgType> XprType;
 
+  static constexpr int Layout = (TensorEvaluator<ArgType, Device>::Layout == static_cast<int>(ColMajor)) ? RowMajor : ColMajor;
   enum {
     IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
     PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
     BlockAccess = false,
     PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
-    Layout = (static_cast<int>(TensorEvaluator<ArgType, Device>::Layout) == static_cast<int>(ColMajor)) ? RowMajor : ColMajor,
     CoordAccess = false  // to be implemented
   };
 
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h
index 73ff3d2..7d8814a 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h
@@ -14,7 +14,7 @@
 /** use this macro in sfinae selection in templated functions
  *
  *   template<typename T,
- *            typename std::enable_if< isBanana<T>::value , int >::type = 0
+ *            std::enable_if_t< isBanana<T>::value , int > = 0
  *   >
  *   void foo(){}
  *
@@ -26,22 +26,8 @@
  *   void foo(){}
  */
 
-// SFINAE requires variadic templates
-#if !defined(EIGEN_GPUCC)
-#if EIGEN_HAS_VARIADIC_TEMPLATES
-  // SFINAE doesn't work for gcc <= 4.7
-  #ifdef EIGEN_COMP_GNUC
-    #if EIGEN_GNUC_AT_LEAST(4,8)
-      #define EIGEN_HAS_SFINAE
-    #endif
-  #else
-    #define EIGEN_HAS_SFINAE
-  #endif
-#endif
-#endif
-
 #define EIGEN_SFINAE_ENABLE_IF( __condition__ ) \
-    typename internal::enable_if< ( __condition__ ) , int >::type = 0
+    std::enable_if_t< ( __condition__ ) , int > = 0
 
 // Define a macro to use a reference on the host but a value on the device
 #if defined(SYCL_DEVICE_ONLY)
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h
index 6834c97..7a601ca 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_CXX11_TENSOR_TENSOR_MAP_H
 #define EIGEN_CXX11_TENSOR_TENSOR_MAP_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 // FIXME use proper doxygen documentation (e.g. \tparam MakePointer_)
@@ -32,7 +34,7 @@ template<typename PlainObjectType, int Options_, template <class> class MakePoin
     typedef TensorMap<PlainObjectType, Options_, MakePointer_> Self;
     typedef TensorBase<TensorMap<PlainObjectType, Options_, MakePointer_> > Base;
   #ifdef EIGEN_USE_SYCL
-    typedef  typename Eigen::internal::remove_reference<typename Eigen::internal::nested<Self>::type>::type Nested;
+    typedef  std::remove_reference_t<typename Eigen::internal::nested<Self>::type> Nested;
   #else
      typedef typename Eigen::internal::nested<Self>::type Nested;
   #endif
@@ -49,29 +51,29 @@ template<typename PlainObjectType, int Options_, template <class> class MakePoin
     // example in TensorMap<Tensor<const Scalar, ...>> expression. This type of
     // expression should be illegal, but adding this restriction is not possible
     // in practice (see https://bitbucket.org/eigen/eigen/pull-requests/488).
-    typedef typename internal::conditional<
+    typedef std::conditional_t<
         bool(internal::is_lvalue<PlainObjectType>::value),
         PointerType,      // use simple pointer in lvalue expressions
         PointerConstType  // use const pointer in rvalue expressions
-        >::type StoragePointerType;
+        > StoragePointerType;
 
     // If TensorMap was constructed over rvalue expression (e.g. const Tensor),
     // we should return a reference to const from operator() (and others), even
     // if TensorMap itself is not const.
-    typedef typename internal::conditional<
+    typedef std::conditional_t<
         bool(internal::is_lvalue<PlainObjectType>::value),
         Scalar&,
         const Scalar&
-        >::type StorageRefType;
+        > StorageRefType;
 
-    static const int Options = Options_;
+    static constexpr int Options = Options_;
 
-    static const Index NumIndices = PlainObjectType::NumIndices;
+    static constexpr Index NumIndices = PlainObjectType::NumIndices;
     typedef typename PlainObjectType::Dimensions Dimensions;
 
+    static constexpr int Layout = PlainObjectType::Layout;
     enum {
       IsAligned = ((int(Options_)&Aligned)==Aligned),
-      Layout = PlainObjectType::Layout,
       CoordAccess = true,
       RawAccess = true
     };
@@ -82,35 +84,11 @@ template<typename PlainObjectType, int Options_, template <class> class MakePoin
       EIGEN_STATIC_ASSERT((0 == NumIndices || NumIndices == Dynamic), YOU_MADE_A_PROGRAMMING_MISTAKE)
     }
 
-#if EIGEN_HAS_VARIADIC_TEMPLATES
     template<typename... IndexTypes> EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE TensorMap(StoragePointerType dataPtr, Index firstDimension, IndexTypes... otherDimensions) : m_data(dataPtr), m_dimensions(firstDimension, otherDimensions...) {
       // The number of dimensions used to construct a tensor must be equal to the rank of the tensor.
       EIGEN_STATIC_ASSERT((sizeof...(otherDimensions) + 1 == NumIndices || NumIndices == Dynamic), YOU_MADE_A_PROGRAMMING_MISTAKE)
     }
-#else
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE TensorMap(StoragePointerType dataPtr, Index firstDimension) : m_data(dataPtr), m_dimensions(firstDimension) {
-      // The number of dimensions used to construct a tensor must be equal to the rank of the tensor.
-      EIGEN_STATIC_ASSERT((1 == NumIndices || NumIndices == Dynamic), YOU_MADE_A_PROGRAMMING_MISTAKE)
-    }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE TensorMap(StoragePointerType dataPtr, Index dim1, Index dim2) : m_data(dataPtr), m_dimensions(dim1, dim2) {
-      EIGEN_STATIC_ASSERT(2 == NumIndices || NumIndices == Dynamic, YOU_MADE_A_PROGRAMMING_MISTAKE)
-    }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE TensorMap(StoragePointerType dataPtr, Index dim1, Index dim2, Index dim3) : m_data(dataPtr), m_dimensions(dim1, dim2, dim3) {
-      EIGEN_STATIC_ASSERT(3 == NumIndices || NumIndices == Dynamic, YOU_MADE_A_PROGRAMMING_MISTAKE)
-    }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE TensorMap(StoragePointerType dataPtr, Index dim1, Index dim2, Index dim3, Index dim4) : m_data(dataPtr), m_dimensions(dim1, dim2, dim3, dim4) {
-      EIGEN_STATIC_ASSERT(4 == NumIndices || NumIndices == Dynamic, YOU_MADE_A_PROGRAMMING_MISTAKE)
-    }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE TensorMap(StoragePointerType dataPtr, Index dim1, Index dim2, Index dim3, Index dim4, Index dim5) : m_data(dataPtr), m_dimensions(dim1, dim2, dim3, dim4, dim5) {
-      EIGEN_STATIC_ASSERT(5 == NumIndices || NumIndices == Dynamic, YOU_MADE_A_PROGRAMMING_MISTAKE)
-    }
-#endif
 
    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorMap(StoragePointerType dataPtr, const array<Index, NumIndices>& dimensions)
       : m_data(dataPtr), m_dimensions(dimensions)
@@ -165,7 +143,6 @@ template<typename PlainObjectType, int Options_, template <class> class MakePoin
       return m_data[index];
     }
 
-#if EIGEN_HAS_VARIADIC_TEMPLATES
     template<typename... IndexTypes> EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE StorageRefType operator()(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) const
     {
@@ -179,52 +156,6 @@ template<typename PlainObjectType, int Options_, template <class> class MakePoin
         return m_data[index];
       }
     }
-#else
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE StorageRefType operator()(Index i0, Index i1) const
-    {
-      if (PlainObjectType::Options&RowMajor) {
-        const Index index = i1 + i0 * m_dimensions[1];
-        return m_data[index];
-      } else {
-        const Index index = i0 + i1 * m_dimensions[0];
-        return m_data[index];
-      }
-    }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE StorageRefType operator()(Index i0, Index i1, Index i2) const
-    {
-      if (PlainObjectType::Options&RowMajor) {
-         const Index index = i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0);
-         return m_data[index];
-      } else {
-         const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * i2);
-        return m_data[index];
-      }
-    }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE StorageRefType operator()(Index i0, Index i1, Index i2, Index i3) const
-    {
-      if (PlainObjectType::Options&RowMajor) {
-        const Index index = i3 + m_dimensions[3] * (i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0));
-        return m_data[index];
-      } else {
-        const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * (i2 + m_dimensions[2] * i3));
-        return m_data[index];
-      }
-    }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE StorageRefType operator()(Index i0, Index i1, Index i2, Index i3, Index i4) const
-    {
-      if (PlainObjectType::Options&RowMajor) {
-        const Index index = i4 + m_dimensions[4] * (i3 + m_dimensions[3] * (i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0)));
-        return m_data[index];
-      } else {
-        const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * (i2 + m_dimensions[2] * (i3 + m_dimensions[3] * i4)));
-        return m_data[index];
-      }
-    }
-#endif
 
     EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE StorageRefType operator()(const array<Index, NumIndices>& indices)
@@ -253,7 +184,6 @@ template<typename PlainObjectType, int Options_, template <class> class MakePoin
       return m_data[index];
     }
 
-#if EIGEN_HAS_VARIADIC_TEMPLATES
     template<typename... IndexTypes> EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE StorageRefType operator()(Index firstIndex, Index secondIndex, IndexTypes... otherIndices)
     {
@@ -268,52 +198,6 @@ template<typename PlainObjectType, int Options_, template <class> class MakePoin
         return m_data[index];
       }
     }
-#else
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE StorageRefType operator()(Index i0, Index i1)
-    {
-       if (PlainObjectType::Options&RowMajor) {
-         const Index index = i1 + i0 * m_dimensions[1];
-        return m_data[index];
-      } else {
-        const Index index = i0 + i1 * m_dimensions[0];
-        return m_data[index];
-      }
-    }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE StorageRefType operator()(Index i0, Index i1, Index i2)
-    {
-       if (PlainObjectType::Options&RowMajor) {
-         const Index index = i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0);
-        return m_data[index];
-      } else {
-         const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * i2);
-        return m_data[index];
-      }
-    }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE StorageRefType operator()(Index i0, Index i1, Index i2, Index i3)
-    {
-      if (PlainObjectType::Options&RowMajor) {
-        const Index index = i3 + m_dimensions[3] * (i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0));
-        return m_data[index];
-      } else {
-        const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * (i2 + m_dimensions[2] * i3));
-        return m_data[index];
-      }
-    }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE StorageRefType operator()(Index i0, Index i1, Index i2, Index i3, Index i4)
-    {
-      if (PlainObjectType::Options&RowMajor) {
-        const Index index = i4 + m_dimensions[4] * (i3 + m_dimensions[3] * (i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0)));
-        return m_data[index];
-      } else {
-        const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * (i2 + m_dimensions[2] * (i3 + m_dimensions[3] * i4)));
-        return m_data[index];
-      }
-    }
-#endif
 
     EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorMap)
 
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h
index a6181d3..3c9a808 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_CXX11_TENSOR_TENSOR_META_H
 #define EIGEN_CXX11_TENSOR_TENSOR_META_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 template<bool cond> struct Cond {};
@@ -28,13 +30,15 @@ const T2& choose(Cond<false>, const T1&, const T2& second) {
 template <typename T, typename X, typename Y>
 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 T divup(const X x, const Y y) {
-  return static_cast<T>((x + y - 1) / y);
+  // Note: This form is used because it cannot overflow.
+  return static_cast<T>(x == 0 ? 0 : (x - 1) / y + 1);
 }
 
 template <typename T>
 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 T divup(const T x, const T y) {
-  return static_cast<T>((x + y - 1) / y);
+  // Note: This form is used because it cannot overflow.
+  return static_cast<T>(x == 0 ? 0 : (x - 1) / y + 1);
 }
 
 template <size_t n> struct max_n_1 {
@@ -52,7 +56,7 @@ struct PacketType : internal::packet_traits<Scalar> {
 };
 
 // For CUDA packet types when using a GpuDevice
-#if defined(EIGEN_USE_GPU) && defined(EIGEN_HAS_GPU_FP16)
+#if defined(EIGEN_USE_GPU) && defined(EIGEN_HAS_GPU_FP16) && defined(EIGEN_GPU_COMPILE_PHASE)
 
 typedef ulonglong2 Packet4h2;
 template<>
@@ -118,13 +122,13 @@ struct static_for<Index, end, end, step, StepOp> {
 
 template <typename OutScalar, typename Device, bool Vectorizable>
 struct Vectorise {
-  static const int PacketSize = 1;
+  static constexpr int PacketSize = 1;
   typedef OutScalar PacketReturnType;
 };
 
 template <typename OutScalar, typename Device>
 struct Vectorise<OutScalar, Device, true> {
-  static const int PacketSize = Eigen::PacketType<OutScalar, Device>::size;
+  static constexpr int PacketSize = Eigen::PacketType<OutScalar, Device>::size;
   typedef typename Eigen::PacketType<OutScalar, Device>::type PacketReturnType;
 };
 
@@ -207,9 +211,11 @@ template<> struct PacketType<const half, const SyclDevice>: PacketType<half, Syc
 #endif
 #endif
 
-// Tuple mimics std::pair but works on e.g. nvcc.
-template <typename U, typename V> struct Tuple {
+// Pair mimics std::pair but works on e.g. nvcc.
+template <typename U, typename V> struct Pair {
  public:
+  EIGEN_MAKE_ALIGNED_OPERATOR_NEW
+
   U first;
   V second;
 
@@ -217,13 +223,13 @@ template <typename U, typename V> struct Tuple {
   typedef V second_type;
 
   EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  Tuple() : first(), second() {}
+  Pair() : first(), second() {}
 
   EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  Tuple(const U& f, const V& s) : first(f), second(s) {}
+  Pair(const U& f, const V& s) : first(f), second(s) {}
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  void swap(Tuple& rhs) {
+  void swap(Pair& rhs) {
     using numext::swap;
     swap(first, rhs.first);
     swap(second, rhs.second);
@@ -232,13 +238,13 @@ template <typename U, typename V> struct Tuple {
 
 template <typename U, typename V>
 EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-bool operator==(const Tuple<U, V>& x, const Tuple<U, V>& y) {
+bool operator==(const Pair<U, V>& x, const Pair<U, V>& y) {
   return (x.first == y.first && x.second == y.second);
 }
 
 template <typename U, typename V>
 EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-bool operator!=(const Tuple<U, V>& x, const Tuple<U, V>& y) {
+bool operator!=(const Pair<U, V>& x, const Pair<U, V>& y) {
   return !(x == y);
 }
 
@@ -258,13 +264,12 @@ template <typename Idx> struct IndexPair {
 };
 
 
-#ifdef EIGEN_HAS_SFINAE
 namespace internal {
 
-  template<typename IndexType, typename Index, Index... Is>
+  template<typename IndexType, typename Index, Index First, Index... Is>
   EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  array<Index, sizeof...(Is)> customIndices2Array(IndexType& idx, numeric_list<Index, Is...>) {
-    return { idx[Is]... };
+  array<Index, 1 + sizeof...(Is)> customIndices2Array(IndexType& idx, numeric_list<Index, First, Is...>) {
+    return { idx[First], idx[Is]... };
   }
   template<typename IndexType, typename Index>
   EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
@@ -302,9 +307,6 @@ namespace internal {
   };
 
 }
-#endif
-
-
 
 }  // namespace Eigen
 
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
index b3f00f7..0205710 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H
 #define EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /** \class TensorReshaping
@@ -28,9 +30,9 @@ struct traits<TensorReshapingOp<NewDimensions, XprType> > : public traits<XprTyp
   typedef typename XprTraits::StorageKind StorageKind;
   typedef typename XprTraits::Index Index;
   typedef typename XprType::Nested Nested;
-  typedef typename remove_reference<Nested>::type _Nested;
-  static const int NumDimensions = array_size<NewDimensions>::value;
-  static const int Layout = XprTraits::Layout;
+  typedef std::remove_reference_t<Nested> Nested_;
+  static constexpr int NumDimensions = array_size<NewDimensions>::value;
+  static constexpr int Layout = XprTraits::Layout;
   typedef typename XprTraits::PointerType PointerType;
 };
 
@@ -56,7 +58,7 @@ class TensorReshapingOp : public TensorBase<TensorReshapingOp<NewDimensions, Xpr
   public:
   typedef TensorBase<TensorReshapingOp<NewDimensions, XprType>, WriteAccessors> Base;
   typedef typename Eigen::internal::traits<TensorReshapingOp>::Scalar Scalar;
-  typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
+  typedef std::remove_const_t<typename XprType::CoeffReturnType> CoeffReturnType;
   typedef typename Eigen::internal::nested<TensorReshapingOp>::type Nested;
   typedef typename Eigen::internal::traits<TensorReshapingOp>::StorageKind StorageKind;
   typedef typename Eigen::internal::traits<TensorReshapingOp>::Index Index;
@@ -68,7 +70,7 @@ class TensorReshapingOp : public TensorBase<TensorReshapingOp<NewDimensions, Xpr
     const NewDimensions& dimensions() const { return m_dims; }
 
     EIGEN_DEVICE_FUNC
-    const typename internal::remove_all<typename XprType::Nested>::type&
+    const internal::remove_all_t<typename XprType::Nested>&
     expression() const { return m_xpr; }
 
     EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorReshapingOp)
@@ -92,10 +94,10 @@ struct TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
   typedef StorageMemory<CoeffReturnType, Device> Storage;
   typedef typename Storage::Type EvaluatorPointerType;
-  typedef StorageMemory<typename internal::remove_const<CoeffReturnType>::type, Device> ConstCastStorage;
+  typedef StorageMemory<std::remove_const_t<CoeffReturnType>, Device> ConstCastStorage;
 
-  static const int NumOutputDims = internal::array_size<Dimensions>::value;
-  static const int NumInputDims  = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
+  static constexpr int NumOutputDims = internal::array_size<Dimensions>::value;
+  static constexpr int NumInputDims  = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
 
   enum ReshapingKind {
     // We do not use layout information to determine reshaping kind.
@@ -107,15 +109,12 @@ struct TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
 
   // clang-format off
   static const ReshapingKind kind =
-#if defined(EIGEN_HAS_INDEX_LIST)
         (NumOutputDims == 2 && internal::index_statically_eq<NewDimensions>(/*index=*/0, /*value=*/1)) ? OneByN
       : (NumOutputDims == 2 && internal::index_statically_eq<NewDimensions>(/*index=*/1, /*value=*/1)) ? NByOne
       : Runtime;
-#else
-        Runtime;
-#endif
   // clang-format on
 
+  static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
   enum {
     IsAligned         = TensorEvaluator<ArgType, Device>::IsAligned,
     PacketAccess      = TensorEvaluator<ArgType, Device>::PacketAccess,
@@ -125,12 +124,11 @@ struct TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
     BlockAccess       = TensorEvaluator<ArgType, Device>::RawAccess &&
                         NumInputDims > 0 && NumOutputDims > 0,
     PreferBlockAccess = false,
-    Layout            = TensorEvaluator<ArgType, Device>::Layout,
     CoordAccess       = false,  // to be implemented
     RawAccess         = TensorEvaluator<ArgType, Device>::RawAccess
   };
 
-  typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
+  typedef std::remove_const_t<Scalar> ScalarNoConst;
 
   //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
   typedef internal::TensorBlockDescriptor<NumOutputDims, Index> TensorBlockDesc;
@@ -245,12 +243,12 @@ template<typename NewDimensions, typename ArgType, typename Device>
   typedef TensorReshapingOp<NewDimensions, ArgType> XprType;
   typedef NewDimensions Dimensions;
 
+  static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
   enum {
     IsAligned         = TensorEvaluator<ArgType, Device>::IsAligned,
     PacketAccess      = TensorEvaluator<ArgType, Device>::PacketAccess,
     BlockAccess       = TensorEvaluator<ArgType, Device>::RawAccess,
     PreferBlockAccess = false,
-    Layout            = TensorEvaluator<ArgType, Device>::Layout,
     CoordAccess       = false,  // to be implemented
     RawAccess         = TensorEvaluator<ArgType, Device>::RawAccess
   };
@@ -283,7 +281,7 @@ template<typename NewDimensions, typename ArgType, typename Device>
   template <typename TensorBlock>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock(
       const TensorBlockDesc& desc, const TensorBlock& block) {
-    assert(this->m_impl.data() != NULL);
+    eigen_assert(this->m_impl.data() != NULL);
 
     typedef typename TensorBlock::XprType TensorBlockExpr;
     typedef internal::TensorBlockAssignment<
@@ -315,9 +313,9 @@ struct traits<TensorSlicingOp<StartIndices, Sizes, XprType> > : public traits<Xp
   typedef typename XprTraits::StorageKind StorageKind;
   typedef typename XprTraits::Index Index;
   typedef typename XprType::Nested Nested;
-  typedef typename remove_reference<Nested>::type _Nested;
-  static const int NumDimensions = array_size<StartIndices>::value;
-  static const int Layout = XprTraits::Layout;
+  typedef std::remove_reference_t<Nested> Nested_;
+  static constexpr int NumDimensions = array_size<StartIndices>::value;
+  static constexpr int Layout = XprTraits::Layout;
   typedef typename XprTraits::PointerType PointerType;
 };
 
@@ -357,7 +355,7 @@ class TensorSlicingOp : public TensorBase<TensorSlicingOp<StartIndices, Sizes, X
     const Sizes& sizes() const { return m_sizes; }
 
     EIGEN_DEVICE_FUNC
-    const typename internal::remove_all<typename XprType::Nested>::type&
+    const internal::remove_all_t<typename XprType::Nested>&
     expression() const { return m_xpr; }
 
     EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorSlicingOp)
@@ -369,8 +367,9 @@ class TensorSlicingOp : public TensorBase<TensorSlicingOp<StartIndices, Sizes, X
 };
 
 
+namespace internal {
+
 // Fixme: figure out the exact threshold
-namespace {
 template <typename Index, typename Device, bool BlockAccess> struct MemcpyTriggerForSlicing {
   EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(const Device& device) : threshold_(2 * device.numThreads()) { }
   EIGEN_DEVICE_FUNC bool operator ()(Index total, Index contiguous) const {
@@ -400,14 +399,14 @@ template <typename Index, bool BlockAccess> struct MemcpyTriggerForSlicing<Index
 };
 #endif
 
-}
+}  // namespace internal
 
 // Eval as rvalue
 template<typename StartIndices, typename Sizes, typename ArgType, typename Device>
 struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
 {
   typedef TensorSlicingOp<StartIndices, Sizes, ArgType> XprType;
-  static const int NumDims = internal::array_size<Sizes>::value;
+  static constexpr int NumDims = internal::array_size<Sizes>::value;
 
   typedef typename XprType::Index Index;
   typedef typename XprType::Scalar Scalar;
@@ -415,9 +414,10 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
   typedef Sizes Dimensions;
   typedef StorageMemory<CoeffReturnType, Device> Storage;
-  typedef StorageMemory<typename internal::remove_const<CoeffReturnType>::type, Device> ConstCastStorage;
+  typedef StorageMemory<std::remove_const_t<CoeffReturnType>, Device> ConstCastStorage;
   typedef typename Storage::Type EvaluatorPointerType;
 
+  static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
   enum {
     // Alignment can't be guaranteed at compile time since it depends on the
     // slice offsets and sizes.
@@ -425,14 +425,13 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
     PacketAccess      = TensorEvaluator<ArgType, Device>::PacketAccess,
     BlockAccess       = TensorEvaluator<ArgType, Device>::BlockAccess &&
                         // FIXME: Temporary workaround for bug in slicing of bool tensors.
-                        !internal::is_same<typename internal::remove_const<Scalar>::type, bool>::value,
+                        !internal::is_same<std::remove_const_t<Scalar>, bool>::value,
     PreferBlockAccess = true,
-    Layout            = TensorEvaluator<ArgType, Device>::Layout,
     CoordAccess       = false,
     RawAccess         = false
   };
 
-  typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
+  typedef std::remove_const_t<Scalar> ScalarNoConst;
 
   //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
   typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
@@ -492,7 +491,7 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
 
   EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
     m_impl.evalSubExprsIfNeeded(NULL);
-    if (!NumTraits<typename internal::remove_const<Scalar>::type>::RequireInitialization
+    if (!NumTraits<std::remove_const_t<Scalar>>::RequireInitialization
         && data && m_impl.data()) {
       Index contiguous_values = 1;
       if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
@@ -511,7 +510,7 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
         }
       }
       // Use memcpy if it's going to be faster than using the regular evaluation.
-      const MemcpyTriggerForSlicing<Index, Device, BlockAccess> trigger(m_device);
+      const internal::MemcpyTriggerForSlicing<Index, Device, BlockAccess> trigger(m_device);
       if (trigger(internal::array_prod(dimensions()), contiguous_values)) {
         EvaluatorPointerType src = (EvaluatorPointerType)m_impl.data();
         for (Index i = 0; i < internal::array_prod(dimensions()); i += contiguous_values) {
@@ -588,7 +587,7 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
       return rslt;
     }
     else {
-      EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[packetSize];
+      EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[packetSize];
       values[0] = m_impl.coeff(inputIndices[0]);
       values[packetSize-1] = m_impl.coeff(inputIndices[1]);
       EIGEN_UNROLL_LOOP
@@ -705,7 +704,7 @@ struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
 {
   typedef TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device> Base;
   typedef TensorSlicingOp<StartIndices, Sizes, ArgType> XprType;
-  static const int NumDims = internal::array_size<Sizes>::value;
+  static constexpr int NumDims = internal::array_size<Sizes>::value;
 
   typedef typename XprType::Index Index;
   typedef typename XprType::Scalar Scalar;
@@ -713,17 +712,17 @@ struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
   typedef Sizes Dimensions;
 
+  static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
   enum {
     IsAligned         = false,
     PacketAccess      = TensorEvaluator<ArgType, Device>::PacketAccess,
     BlockAccess       = TensorEvaluator<ArgType, Device>::BlockAccess,
     PreferBlockAccess = true,
-    Layout            = TensorEvaluator<ArgType, Device>::Layout,
     CoordAccess       = false,
     RawAccess         = (NumDims == 1) & TensorEvaluator<ArgType, Device>::RawAccess
   };
 
-  typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
+  typedef std::remove_const_t<Scalar> ScalarNoConst;
 
   //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
   typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
@@ -811,9 +810,9 @@ struct traits<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprTyp
   typedef typename XprTraits::StorageKind StorageKind;
   typedef typename XprTraits::Index Index;
   typedef typename XprType::Nested Nested;
-  typedef typename remove_reference<Nested>::type _Nested;
-  static const int NumDimensions = array_size<StartIndices>::value;
-  static const int Layout = XprTraits::Layout;
+  typedef std::remove_reference_t<Nested> Nested_;
+  static constexpr int NumDimensions = array_size<StartIndices>::value;
+  static constexpr int Layout = XprTraits::Layout;
   typedef typename XprTraits::PointerType PointerType;
 };
 
@@ -857,7 +856,7 @@ class TensorStridingSlicingOp : public TensorBase<TensorStridingSlicingOp<StartI
     const StartIndices& strides() const { return m_strides; }
 
     EIGEN_DEVICE_FUNC
-    const typename internal::remove_all<typename XprType::Nested>::type&
+    const internal::remove_all_t<typename XprType::Nested>&
     expression() const { return m_xpr; }
 
     EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorStridingSlicingOp)
@@ -874,7 +873,7 @@ template<typename StartIndices, typename StopIndices, typename Strides, typename
 struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device>
 {
   typedef TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType> XprType;
-  static const int NumDims = internal::array_size<Strides>::value;
+  static constexpr int NumDims = internal::array_size<Strides>::value;
   typedef typename XprType::Index Index;
   typedef typename XprType::Scalar Scalar;
   typedef typename XprType::CoeffReturnType CoeffReturnType;
@@ -883,6 +882,7 @@ struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices,
   typedef typename Storage::Type EvaluatorPointerType;
   typedef Strides Dimensions;
 
+  static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
   enum {
     // Alignment can't be guaranteed at compile time since it depends on the
     // slice offsets and sizes.
@@ -890,7 +890,6 @@ struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices,
     PacketAccess = false,
     BlockAccess = false,
     PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
-    Layout = TensorEvaluator<ArgType, Device>::Layout,
     RawAccess = false
   };
 
@@ -1060,14 +1059,14 @@ struct TensorEvaluator<TensorStridingSlicingOp<StartIndices, StopIndices, Stride
 {
   typedef TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device> Base;
   typedef TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType> XprType;
-  static const int NumDims = internal::array_size<Strides>::value;
+  static constexpr int NumDims = internal::array_size<Strides>::value;
+  static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
 
   enum {
     IsAligned = false,
     PacketAccess = false,
     BlockAccess = false,
     PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
-    Layout = TensorEvaluator<ArgType, Device>::Layout,
     CoordAccess = TensorEvaluator<ArgType, Device>::CoordAccess,
     RawAccess = false
   };
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h
index ee44382..cf348c3 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_CXX11_TENSOR_TENSOR_PADDING_H
 #define EIGEN_CXX11_TENSOR_TENSOR_PADDING_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /** \class TensorPadding
@@ -28,9 +30,9 @@ struct traits<TensorPaddingOp<PaddingDimensions, XprType> > : public traits<XprT
   typedef typename XprTraits::StorageKind StorageKind;
   typedef typename XprTraits::Index Index;
   typedef typename XprType::Nested Nested;
-  typedef typename remove_reference<Nested>::type _Nested;
-  static const int NumDimensions = XprTraits::NumDimensions;
-  static const int Layout = XprTraits::Layout;
+  typedef std::remove_reference_t<Nested> Nested_;
+  static constexpr int NumDimensions = XprTraits::NumDimensions;
+  static constexpr int Layout = XprTraits::Layout;
   typedef typename XprTraits::PointerType PointerType;
 };
 
@@ -70,7 +72,7 @@ class TensorPaddingOp : public TensorBase<TensorPaddingOp<PaddingDimensions, Xpr
     Scalar padding_value() const { return m_padding_value; }
 
     EIGEN_DEVICE_FUNC
-    const typename internal::remove_all<typename XprType::Nested>::type&
+    const internal::remove_all_t<typename XprType::Nested>&
     expression() const { return m_xpr; }
 
   protected:
@@ -86,26 +88,26 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
 {
   typedef TensorPaddingOp<PaddingDimensions, ArgType> XprType;
   typedef typename XprType::Index Index;
-  static const int NumDims = internal::array_size<PaddingDimensions>::value;
+  static constexpr int NumDims = internal::array_size<PaddingDimensions>::value;
   typedef DSizes<Index, NumDims> Dimensions;
   typedef typename XprType::Scalar Scalar;
   typedef typename XprType::CoeffReturnType CoeffReturnType;
   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
-  static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
+  static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
   typedef StorageMemory<CoeffReturnType, Device> Storage;
   typedef typename Storage::Type EvaluatorPointerType;
 
+  static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
   enum {
     IsAligned         = true,
     PacketAccess      = TensorEvaluator<ArgType, Device>::PacketAccess,
     BlockAccess       = TensorEvaluator<ArgType, Device>::RawAccess,
     PreferBlockAccess = true,
-    Layout            = TensorEvaluator<ArgType, Device>::Layout,
     CoordAccess       = true,
     RawAccess         = false
   };
 
-  typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
+  typedef std::remove_const_t<Scalar> ScalarNoConst;
 
   //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
   typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
@@ -510,35 +512,20 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
 
   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isPaddingAtIndexForDim(
       Index index, int dim_index) const {
-#if defined(EIGEN_HAS_INDEX_LIST)
     return (!internal::index_pair_first_statically_eq<PaddingDimensions>(dim_index, 0) &&
             index < m_padding[dim_index].first) ||
         (!internal::index_pair_second_statically_eq<PaddingDimensions>(dim_index, 0) &&
          index >= m_dimensions[dim_index] - m_padding[dim_index].second);
-#else
-    return (index < m_padding[dim_index].first) ||
-           (index >= m_dimensions[dim_index] - m_padding[dim_index].second);
-#endif
   }
 
   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isLeftPaddingCompileTimeZero(
       int dim_index) const {
-#if defined(EIGEN_HAS_INDEX_LIST)
     return internal::index_pair_first_statically_eq<PaddingDimensions>(dim_index, 0);
-#else
-    EIGEN_UNUSED_VARIABLE(dim_index);
-    return false;
-#endif
   }
 
   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isRightPaddingCompileTimeZero(
       int dim_index) const {
-#if defined(EIGEN_HAS_INDEX_LIST)
     return internal::index_pair_second_statically_eq<PaddingDimensions>(dim_index, 0);
-#else
-    EIGEN_UNUSED_VARIABLE(dim_index);
-    return false;
-#endif
   }
 
 
@@ -564,7 +551,6 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetColMajor(Index index) const
   {
-    EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
     eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
 
     const Index initialIndex = index;
@@ -622,7 +608,6 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetRowMajor(Index index) const
   {
-    EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
     eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
 
     const Index initialIndex = index;
@@ -680,7 +665,7 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index) const
   {
-    EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
+    EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
     EIGEN_UNROLL_LOOP
     for (int i = 0; i < PacketSize; ++i) {
       values[i] = coeff(index+i);
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h
index 413d25d..413c5c4 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_CXX11_TENSOR_TENSOR_PATCH_H
 #define EIGEN_CXX11_TENSOR_TENSOR_PATCH_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /** \class TensorPatch
@@ -28,9 +30,9 @@ struct traits<TensorPatchOp<PatchDim, XprType> > : public traits<XprType>
   typedef typename XprTraits::StorageKind StorageKind;
   typedef typename XprTraits::Index Index;
   typedef typename XprType::Nested Nested;
-  typedef typename remove_reference<Nested>::type _Nested;
-  static const int NumDimensions = XprTraits::NumDimensions + 1;
-  static const int Layout = XprTraits::Layout;
+  typedef std::remove_reference_t<Nested> Nested_;
+  static constexpr int NumDimensions = XprTraits::NumDimensions + 1;
+  static constexpr int Layout = XprTraits::Layout;
   typedef typename XprTraits::PointerType PointerType;
 };
 
@@ -68,7 +70,7 @@ class TensorPatchOp : public TensorBase<TensorPatchOp<PatchDim, XprType>, ReadOn
     const PatchDim& patch_dims() const { return m_patch_dims; }
 
     EIGEN_DEVICE_FUNC
-    const typename internal::remove_all<typename XprType::Nested>::type&
+    const internal::remove_all_t<typename XprType::Nested>&
     expression() const { return m_xpr; }
 
   protected:
@@ -83,22 +85,21 @@ struct TensorEvaluator<const TensorPatchOp<PatchDim, ArgType>, Device>
 {
   typedef TensorPatchOp<PatchDim, ArgType> XprType;
   typedef typename XprType::Index Index;
-  static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value + 1;
+  static constexpr int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value + 1;
   typedef DSizes<Index, NumDims> Dimensions;
   typedef typename XprType::Scalar Scalar;
   typedef typename XprType::CoeffReturnType CoeffReturnType;
   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
-  static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
+  static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
   typedef StorageMemory<CoeffReturnType, Device> Storage;
   typedef typename Storage::Type EvaluatorPointerType;
 
-
+  static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
   enum {
     IsAligned = false,
     PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
     BlockAccess = false,
     PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
-    Layout = TensorEvaluator<ArgType, Device>::Layout,
     CoordAccess = false,
     RawAccess = false
  };
@@ -195,7 +196,6 @@ struct TensorEvaluator<const TensorPatchOp<PatchDim, ArgType>, Device>
   template<int LoadMode>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
   {
-    EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
     eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
 
     Index output_stride_index = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? NumDims - 1 : 0;
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h
index 37c1d1c..0de4047 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h
@@ -11,12 +11,12 @@
 #ifndef EIGEN_CXX11_TENSOR_TENSOR_RANDOM_H
 #define EIGEN_CXX11_TENSOR_TENSOR_RANDOM_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 namespace internal {
 
-namespace {
-
-EIGEN_DEVICE_FUNC uint64_t get_random_seed() {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE uint64_t get_random_seed() {
 #if defined(EIGEN_GPU_COMPILE_PHASE)
   // We don't support 3d kernels since we currently only use 1 and
   // 2d kernels.
@@ -29,7 +29,7 @@ EIGEN_DEVICE_FUNC uint64_t get_random_seed() {
 #endif
 }
 
-static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE unsigned PCG_XSH_RS_generator(uint64_t* state, uint64_t stream) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE unsigned PCG_XSH_RS_generator(uint64_t* state, uint64_t stream) {
   // TODO: Unify with the implementation in the non blocking thread pool.
   uint64_t current = *state;
   // Update the internal state
@@ -38,14 +38,11 @@ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE unsigned PCG_XSH_RS_generator(uint6
   return static_cast<unsigned>((current ^ (current >> 22)) >> (22 + (current >> 61)));
 }
 
-static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE uint64_t PCG_XSH_RS_state(uint64_t seed) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE uint64_t PCG_XSH_RS_state(uint64_t seed) {
   seed = seed ? seed : get_random_seed();
   return seed * 6364136223846793005ULL + 0xda3e39cb94b95bdbULL;
 }
 
-}  // namespace
-
-
 template <typename T> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
 T RandomToTypeUniform(uint64_t* state, uint64_t stream) {
   unsigned rnd = PCG_XSH_RS_generator(state, stream);
@@ -123,7 +120,7 @@ std::complex<double> RandomToTypeUniform<std::complex<double> >(uint64_t* state,
 
 template <typename T> class UniformRandomGenerator {
  public:
-  static const bool PacketAccess = true;
+  static constexpr bool PacketAccess = true;
 
   // Uses the given "seed" if non-zero, otherwise uses a random seed.
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE UniformRandomGenerator(
@@ -131,7 +128,7 @@ template <typename T> class UniformRandomGenerator {
     m_state = PCG_XSH_RS_state(seed);
     #ifdef EIGEN_USE_SYCL
     // In SYCL it is not possible to build PCG_XSH_RS_state in one step.
-    // Therefor, we need two step to initializate the m_state.
+    // Therefore, we need two steps to initializate the m_state.
     // IN SYCL, the constructor of the functor is s called on the CPU
     // and we get the clock seed here from the CPU. However, This seed is
     //the same for all the thread. As unlike CUDA, the thread.ID, BlockID, etc is not a global function.
@@ -140,7 +137,7 @@ template <typename T> class UniformRandomGenerator {
     // but for SYCL ((CLOCK * 6364136223846793005ULL) + 0xda3e39cb94b95bdbULL) is passed to each thread and each thread adds
     // the  (global_thread_id* 6364136223846793005ULL) for itself only once, in order to complete the construction
     // similar to CUDA Therefore, the thread Id injection is not available at this stage.
-    //However when the operator() is called the thread ID will be avilable. So inside the opeator,
+    //However when the operator() is called the thread ID will be available. So inside the opeator,
     // we add the thrreadID, BlockId,... (which is equivalent of i)
     //to the seed and construct the unique m_state per thead similar to cuda.
     m_exec_once =false;
@@ -237,20 +234,20 @@ std::complex<double> RandomToTypeNormal<std::complex<double> >(uint64_t* state,
 
 template <typename T> class NormalRandomGenerator {
  public:
-  static const bool PacketAccess = true;
+  static constexpr bool PacketAccess = true;
 
   // Uses the given "seed" if non-zero, otherwise uses a random seed.
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE NormalRandomGenerator(uint64_t seed = 0) {
     m_state = PCG_XSH_RS_state(seed);
     #ifdef EIGEN_USE_SYCL
     // In SYCL it is not possible to build PCG_XSH_RS_state in one step.
-    // Therefor, we need two steps to initializate the m_state.
+    // Therefore, we need two steps to initializate the m_state.
     // IN SYCL, the constructor of the functor is s called on the CPU
     // and we get the clock seed here from the CPU. However, This seed is
     //the same for all the thread. As unlike CUDA, the thread.ID, BlockID, etc is not a global function.
     // and only  available on the Operator() function (which is called on the GPU).
     // Therefore, the thread Id injection is not available at this stage. However when the operator()
-    //is called the thread ID will be avilable. So inside the opeator,
+    //is called the thread ID will be available. So inside the operator,
     // we add the thrreadID, BlockId,... (which is equivalent of i)
     //to the seed and construct the unique m_state per thead similar to cuda.
     m_exec_once =false;
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
index 583f462..e62397a 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
@@ -21,6 +21,7 @@
 #endif
 #endif
 
+#include "./InternalHeaderCheck.h"
 
 namespace Eigen {
 
@@ -42,8 +43,8 @@ namespace internal {
   typedef typename XprTraits::StorageKind StorageKind;
   typedef typename XprTraits::Index Index;
   typedef typename XprType::Nested Nested;
-  static const int NumDimensions = XprTraits::NumDimensions - array_size<Dims>::value;
-  static const int Layout = XprTraits::Layout;
+  static constexpr int NumDimensions = XprTraits::NumDimensions - array_size<Dims>::value;
+  static constexpr int Layout = XprTraits::Layout;
   typedef typename XprTraits::PointerType PointerType;
 
   template <class T> struct MakePointer {
@@ -107,7 +108,6 @@ struct preserve_inner_most_dims {
   static const bool value = false;
 };
 
-#if EIGEN_HAS_CONSTEXPR && EIGEN_HAS_VARIADIC_TEMPLATES
 template <typename ReducedDims, int NumTensorDims>
 struct are_inner_most_dims<ReducedDims, NumTensorDims, ColMajor>{
   static const bool tmp1 = indices_statically_known_to_increase<ReducedDims>();
@@ -136,7 +136,6 @@ struct preserve_inner_most_dims<ReducedDims, NumTensorDims, RowMajor>{
   static const bool tmp2 = index_statically_lt<ReducedDims>(array_size<ReducedDims>::value - 1, NumTensorDims - 1);
   static const bool value = tmp1 & tmp2;
 };
-#endif
 
 
 template <int DimIndex, typename Self, typename Op>
@@ -166,8 +165,12 @@ struct GenericDimReducer<-1, Self, Op> {
 };
 
 template <typename Self, typename Op, bool Vectorizable = (Self::InputPacketAccess && Self::ReducerTraits::PacketAccess),
-          bool UseTreeReduction = (!Self::ReducerTraits::IsStateful &&
-                                   !Self::ReducerTraits::IsExactlyAssociative)>
+    bool UseTreeReduction = (!Self::ReducerTraits::IsStateful &&
+                             !Self::ReducerTraits::IsExactlyAssociative &&
+                             // GPU threads can quickly run out of stack space
+                             // for moderately sized inputs.
+                             !Self::RunningOnGPU
+                             )>
 struct InnerMostDimReducer {
   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Self::CoeffReturnType reduce(const Self& self, typename Self::Index firstIndex, typename Self::Index numValuesToReduce, Op& reducer) {
     typename Self::CoeffReturnType accum = reducer.initialize();
@@ -180,42 +183,77 @@ struct InnerMostDimReducer {
 
 template <typename Self, typename Op>
 struct InnerMostDimReducer<Self, Op, true, false> {
-  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Self::CoeffReturnType reduce(const Self& self, typename Self::Index firstIndex, typename Self::Index numValuesToReduce, Op& reducer) {
-    const typename Self::Index packetSize = internal::unpacket_traits<typename Self::PacketReturnType>::size;
-    const typename Self::Index VectorizedSize = (numValuesToReduce / packetSize) * packetSize;
-    typename Self::PacketReturnType paccum = reducer.template initializePacket<typename Self::PacketReturnType>();
-    for (typename Self::Index j = 0; j < VectorizedSize; j += packetSize) {
-      reducer.reducePacket(self.m_impl.template packet<Unaligned>(firstIndex + j), &paccum);
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Self::CoeffReturnType reduce(const Self& self, typename Self::Index firstIndex, typename Self::Index numValuesToReduce, Op& reducer0) {
+    using Index = typename Self::Index;
+    constexpr Index packetSize = internal::unpacket_traits<typename Self::PacketReturnType>::size;
+    Index start = 0;
+    typename Self::PacketReturnType paccum0 = reducer0.template initializePacket<typename Self::PacketReturnType>();
+    if (!Self::ReducerTraits::IsStateful && numValuesToReduce >= 4*packetSize) {
+      const Index VectorizedSize4 = (numValuesToReduce / (4*packetSize)) * (4*packetSize);
+      typename Self::PacketReturnType paccum1 = reducer0.template initializePacket<typename Self::PacketReturnType>();
+      typename Self::PacketReturnType paccum2 = reducer0.template initializePacket<typename Self::PacketReturnType>();
+      typename Self::PacketReturnType paccum3 = reducer0.template initializePacket<typename Self::PacketReturnType>();
+      const Index offset0 = firstIndex;
+      const Index offset1 = firstIndex + packetSize;
+      const Index offset2 = firstIndex + 2*packetSize;
+      const Index offset3 = firstIndex + 3*packetSize;
+      for (Index j = 0; j < VectorizedSize4; j += 4*packetSize) {
+        reducer0.reducePacket(self.m_impl.template packet<Unaligned>(offset0 + j), &paccum0);
+        reducer0.reducePacket(self.m_impl.template packet<Unaligned>(offset1 + j), &paccum1);
+        reducer0.reducePacket(self.m_impl.template packet<Unaligned>(offset2 + j), &paccum2);
+        reducer0.reducePacket(self.m_impl.template packet<Unaligned>(offset3 + j), &paccum3);
+      }
+      reducer0.reducePacket(paccum1, &paccum0);
+      reducer0.reducePacket(paccum2, &paccum0);
+      reducer0.reducePacket(paccum3, &paccum0);
+      start = VectorizedSize4;
     }
-    typename Self::CoeffReturnType accum = reducer.initialize();
-    for (typename Self::Index j = VectorizedSize; j < numValuesToReduce; ++j) {
-      reducer.reduce(self.m_impl.coeff(firstIndex + j), &accum);
+    if (start <= (numValuesToReduce - packetSize)) {
+      const Index VectorizedSize = (numValuesToReduce / packetSize) * packetSize;
+      for (Index j = start; j < VectorizedSize; j += packetSize) {
+        reducer0.reducePacket(self.m_impl.template packet<Unaligned>(firstIndex + j), &paccum0);
+      }
+      start = VectorizedSize;
     }
-    return reducer.finalizeBoth(accum, paccum);
+    typename Self::CoeffReturnType accum = reducer0.initialize();
+    for (Index j = start; j < numValuesToReduce; ++j) {
+      reducer0.reduce(self.m_impl.coeff(firstIndex + j), &accum);
+    }
+    return reducer0.finalizeBoth(accum, paccum0);
   }
 };
 
-#if !defined(EIGEN_HIPCC) 
-static const int kLeafSize = 1024;
+
+#if !defined(EIGEN_HIPCC)
+
+// The following implements tree-based reduction, which improves the accuracy
+// of sum and mean reductions, since each of the n inputs only participates in
+// O(log n) additions.
+template <typename T>
+EIGEN_DEVICE_FUNC inline Index LeafSize() { return 1024; }
+template <>
+EIGEN_DEVICE_FUNC inline Index LeafSize<half>() { return 200; }
+template <>
+EIGEN_DEVICE_FUNC inline Index LeafSize<bfloat16>() { return 128; }
 
 template <typename Self, typename Op>
 struct InnerMostDimReducer<Self, Op, false, true> {
   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Self::CoeffReturnType
   reduce(const Self& self, typename Self::Index firstIndex,
          typename Self::Index numValuesToReduce, Op& reducer) {
+    const Index kLeafSize = LeafSize<typename Self::CoeffReturnType>();
     typename Self::CoeffReturnType accum = reducer.initialize();
     if (numValuesToReduce > kLeafSize) {
       const typename Self::Index half = numValuesToReduce / 2;
+      // Recursively reduce the two halves.
       reducer.reduce(reduce(self, firstIndex, half, reducer), &accum);
       reducer.reduce(
           reduce(self, firstIndex + half, numValuesToReduce - half, reducer),
           &accum);
+      return reducer.finalize(accum);
     } else {
-      for (typename Self::Index j = 0; j < numValuesToReduce; ++j) {
-        reducer.reduce(self.m_impl.coeff(firstIndex + j), &accum);
-      }
+      return InnerMostDimReducer<Self, Op, false, false>::reduce(self, firstIndex, numValuesToReduce, reducer);
     }
-    return reducer.finalize(accum);
   }
 };
 
@@ -224,6 +262,7 @@ struct InnerMostDimReducer<Self, Op, true, true> {
   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Self::CoeffReturnType
   reduce(const Self& self, typename Self::Index firstIndex,
          typename Self::Index numValuesToReduce, Op& reducer) {
+    const Index kLeafSize = LeafSize<typename Self::CoeffReturnType>();
     const typename Self::Index packetSize =
         internal::unpacket_traits<typename Self::PacketReturnType>::size;
     typename Self::CoeffReturnType accum = reducer.initialize();
@@ -242,36 +281,12 @@ struct InnerMostDimReducer<Self, Op, true, true> {
       }
       return reducer.finalize(accum);
     } else {
-      const typename Self::Index UnrollSize =
-          (numValuesToReduce / (2*packetSize)) * 2*packetSize;
-      const typename Self::Index VectorizedSize =
-          (numValuesToReduce / packetSize) * packetSize;
-      typename Self::PacketReturnType paccum =
-          reducer.template initializePacket<typename Self::PacketReturnType>();
-      typename Self::PacketReturnType paccum2 =
-          reducer.template initializePacket<typename Self::PacketReturnType>();
-      for (typename Self::Index j = 0; j < UnrollSize; j += packetSize * 2) {
-        reducer.reducePacket(
-            self.m_impl.template packet<Unaligned>(firstIndex + j), &paccum);
-        reducer.reducePacket(
-            self.m_impl.template packet<Unaligned>(firstIndex + j + packetSize),
-            &paccum2);
-      }
-      for (typename Self::Index j = UnrollSize; j < VectorizedSize; j+= packetSize) {
-        reducer.reducePacket(self.m_impl.template packet<Unaligned>(
-                                 firstIndex + j), &paccum);
-      }
-      reducer.reducePacket(paccum2, &paccum);
-      for (typename Self::Index j = VectorizedSize; j < numValuesToReduce;
-           ++j) {
-        reducer.reduce(self.m_impl.coeff(firstIndex + j), &accum);
-      }
-      return reducer.finalizeBoth(accum, paccum);
+      return InnerMostDimReducer<Self, Op, true, false>::reduce(self, firstIndex, numValuesToReduce, reducer);
     }
   }
 };
 #endif
- 
+
 template <int DimIndex, typename Self, typename Op, bool vectorizable = (Self::InputPacketAccess && Self::ReducerTraits::PacketAccess)>
 struct InnerMostDimPreserver {
   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self&, typename Self::Index, Op&, typename Self::PacketReturnType*) {
@@ -292,10 +307,37 @@ struct InnerMostDimPreserver<DimIndex, Self, Op, true> {
 
 template <typename Self, typename Op>
 struct InnerMostDimPreserver<0, Self, Op, true> {
-  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self& self, typename Self::Index firstIndex, Op& reducer, typename Self::PacketReturnType* accum) {
-    for (typename Self::Index j = 0; j < self.m_reducedDims[0]; ++j) {
-      const typename Self::Index input = firstIndex + j * self.m_reducedStrides[0];
-      reducer.reducePacket(self.m_impl.template packet<Unaligned>(input), accum);
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self& self, typename Self::Index firstIndex, Op& reducer0, typename Self::PacketReturnType* accum0) {
+    using Index = typename Self::Index;
+    const Index stride = self.m_reducedStrides[0];
+    const Index size = self.m_reducedDims[0];
+    if (!Self::ReducerTraits::IsStateful && size >= 16) {
+      const Index unrolled_size4 = (size / 4) * 4;
+      typename Self::PacketReturnType accum1 = reducer0.template initializePacket<typename Self::PacketReturnType>();
+      typename Self::PacketReturnType accum2 = reducer0.template initializePacket<typename Self::PacketReturnType>();
+      typename Self::PacketReturnType accum3 = reducer0.template initializePacket<typename Self::PacketReturnType>();
+      for (Index j = 0; j < unrolled_size4; j += 4) {
+        const Index input0 = firstIndex + j * stride;
+        reducer0.reducePacket(self.m_impl.template packet<Unaligned>(input0), accum0);
+        const Index input1 = firstIndex + (j+1) * stride;
+        reducer0.reducePacket(self.m_impl.template packet<Unaligned>(input1), &accum1);
+        const Index input2 = firstIndex + (j+2) * stride;
+        reducer0.reducePacket(self.m_impl.template packet<Unaligned>(input2), &accum2);
+        const Index input3 = firstIndex + (j+3) * stride;
+        reducer0.reducePacket(self.m_impl.template packet<Unaligned>(input3), &accum3);
+      }
+      reducer0.reducePacket(accum1, accum0);
+      reducer0.reducePacket(accum2, accum0);
+      reducer0.reducePacket(accum3, accum0);
+      for (Index j = unrolled_size4; j < size; ++j) {
+        Index input = firstIndex + j * stride;
+        reducer0.reducePacket(self.m_impl.template packet<Unaligned>(input), accum0);
+      }
+    } else {
+      for (Index j = 0; j < size; ++j) {
+        Index input = firstIndex + j * stride;
+        reducer0.reducePacket(self.m_impl.template packet<Unaligned>(input), accum0);
+      }
     }
   }
 };
@@ -309,7 +351,7 @@ struct InnerMostDimPreserver<-1, Self, Op, true> {
 // Default full reducer
 template <typename Self, typename Op, typename Device, bool Vectorizable = (Self::InputPacketAccess && Self::ReducerTraits::PacketAccess)>
 struct FullReducer {
-  static const bool HasOptimizedImplementation = false;
+  static constexpr bool HasOptimizedImplementation = false;
 
   static EIGEN_DEVICE_FUNC void run(const Self& self, Op& reducer, const Device&, typename Self::EvaluatorPointerType output) {
     const typename Self::Index num_coeffs = array_prod(self.m_impl.dimensions());
@@ -334,8 +376,8 @@ struct FullReducerShard {
 // Multithreaded full reducer
 template <typename Self, typename Op, bool Vectorizable>
 struct FullReducer<Self, Op, ThreadPoolDevice, Vectorizable> {
-  static const bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful;
-  static const Index PacketSize =
+  static constexpr bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful;
+  static constexpr Index PacketSize =
       unpacket_traits<typename Self::PacketReturnType>::size;
 
   // launch one reducer per thread and accumulate the result.
@@ -351,15 +393,14 @@ struct FullReducer<Self, Op, ThreadPoolDevice, Vectorizable> {
         self.m_impl.costPerCoeff(Vectorizable) +
         TensorOpCost(0, 0, internal::functor_traits<Op>::Cost, Vectorizable,
                      PacketSize);
-    const int num_threads = TensorCostModel<ThreadPoolDevice>::numThreads(
+    const Index num_threads = TensorCostModel<ThreadPoolDevice>::numThreads(
         num_coeffs, cost, device.numThreads());
     if (num_threads == 1) {
       *output =
           InnerMostDimReducer<Self, Op, Vectorizable>::reduce(self, 0, num_coeffs, reducer);
       return;
     }
-    const Index blocksize =
-        std::floor<Index>(static_cast<float>(num_coeffs) / num_threads);
+    const Index blocksize = num_coeffs / num_threads;
     const Index numblocks = blocksize > 0 ? num_coeffs / blocksize : 0;
     eigen_assert(num_coeffs >= numblocks * blocksize);
 
@@ -393,7 +434,7 @@ struct FullReducer<Self, Op, ThreadPoolDevice, Vectorizable> {
 // Default inner reducer
 template <typename Self, typename Op, typename Device>
 struct InnerReducer {
-  static const bool HasOptimizedImplementation = false;
+  static constexpr bool HasOptimizedImplementation = false;
 
   EIGEN_DEVICE_FUNC static bool run(const Self&, Op&, const Device&, typename Self::CoeffReturnType*, typename Self::Index, typename Self::Index) {
     eigen_assert(false && "Not implemented");
@@ -404,7 +445,7 @@ struct InnerReducer {
 // Default outer reducer
 template <typename Self, typename Op, typename Device>
 struct OuterReducer {
-  static const bool HasOptimizedImplementation = false;
+  static constexpr bool HasOptimizedImplementation = false;
 
   EIGEN_DEVICE_FUNC static bool run(const Self&, Op&, const Device&, typename Self::CoeffReturnType*, typename Self::Index, typename Self::Index) {
     eigen_assert(false && "Not implemented");
@@ -416,7 +457,7 @@ struct OuterReducer {
 // Default Generic reducer
 template <typename Self, typename Op, typename Device>
 struct GenericReducer {
-  static const bool HasOptimizedImplementation = false;
+  static constexpr bool HasOptimizedImplementation = false;
 
   EIGEN_DEVICE_FUNC static bool run(const Self&, Op&, const Device&, typename Self::CoeffReturnType*, typename Self::Index, typename Self::Index) {
     eigen_assert(false && "Not implemented");
@@ -458,9 +499,9 @@ __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void OuterReductionKernel(R, const S, I_
 template <typename Op, typename CoeffReturnType>
 struct ReductionReturnType {
 #if defined(EIGEN_USE_SYCL)
-  typedef typename remove_const<decltype(std::declval<Op>().initialize())>::type type;
+  typedef std::remove_const_t<decltype(std::declval<Op>().initialize())> type;
 #else
-  typedef typename remove_const<CoeffReturnType>::type type;
+  typedef std::remove_const_t<CoeffReturnType> type;
 #endif
 };
 
@@ -472,7 +513,7 @@ class TensorReductionOp : public TensorBase<TensorReductionOp<Op, Dims, XprType,
   public:
     typedef typename Eigen::internal::traits<TensorReductionOp>::Scalar Scalar;
     typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
-    typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
+    typedef std::remove_const_t<typename XprType::CoeffReturnType> CoeffReturnType;
     typedef typename Eigen::internal::nested<TensorReductionOp>::type Nested;
     typedef typename Eigen::internal::traits<TensorReductionOp>::StorageKind StorageKind;
     typedef typename Eigen::internal::traits<TensorReductionOp>::Index Index;
@@ -510,44 +551,56 @@ struct TensorReductionEvaluatorBase<const TensorReductionOp<Op, Dims, ArgType, M
   typedef typename XprType::Index Index;
   typedef ArgType ChildType;
   typedef typename TensorEvaluator<ArgType, Device>::Dimensions InputDimensions;
-  static const int NumInputDims = internal::array_size<InputDimensions>::value;
-  static const int NumReducedDims = internal::array_size<Dims>::value;
-  static const int NumOutputDims = NumInputDims - NumReducedDims;
-  typedef typename internal::conditional<NumOutputDims==0, Sizes<>, DSizes<Index, NumOutputDims> >::type Dimensions;
+  static constexpr int NumInputDims = internal::array_size<InputDimensions>::value;
+  static constexpr int NumReducedDims = internal::array_size<Dims>::value;
+  static constexpr int NumOutputDims = NumInputDims - NumReducedDims;
+  typedef std::conditional_t<NumOutputDims==0, Sizes<>, DSizes<Index, NumOutputDims> > Dimensions;
   typedef typename XprType::Scalar Scalar;
   typedef TensorReductionEvaluatorBase<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Device> Self;
-  static const bool InputPacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess;
+  static constexpr bool InputPacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess;
   typedef typename internal::ReductionReturnType<Op, typename XprType::CoeffReturnType>::type CoeffReturnType;
   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
-  static const Index PacketSize = PacketType<CoeffReturnType, Device>::size;
+  static constexpr Index PacketSize = PacketType<CoeffReturnType, Device>::size;
 
   typedef typename Eigen::internal::traits<XprType>::PointerType TensorPointerType;
   typedef StorageMemory<CoeffReturnType, Device> Storage;
   typedef typename Storage::Type EvaluatorPointerType;
 
-    // Subset of strides of the input tensor for the non-reduced dimensions.
+  // Subset of strides of the input tensor for the non-reduced dimensions.
   // Indexed by output dimensions.
-  static const int NumPreservedStrides = max_n_1<NumOutputDims>::size;
+  static constexpr int NumPreservedStrides = max_n_1<NumOutputDims>::size;
+  
+  // For full reductions
+#if defined(EIGEN_USE_GPU) && (defined(EIGEN_GPUCC))
+  static constexpr bool RunningOnGPU = internal::is_same<Device, Eigen::GpuDevice>::value;
+  static constexpr bool RunningOnSycl = false;
+#elif defined(EIGEN_USE_SYCL)
+static constexpr bool RunningOnSycl = internal::is_same<internal::remove_all_t<Device>, Eigen::SyclDevice>::value;
+static constexpr bool RunningOnGPU = false;
+#else
+  static constexpr bool RunningOnGPU = false;
+  static constexpr bool RunningOnSycl = false;
+#endif
 
+  static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
   enum {
     IsAligned = false,
     PacketAccess = Self::InputPacketAccess && ReducerTraits::PacketAccess,
     BlockAccess = false,
     PreferBlockAccess = true,
-    Layout = TensorEvaluator<ArgType, Device>::Layout,
     CoordAccess = false,  // to be implemented
     RawAccess = false
   };
 
-  typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
+  typedef std::remove_const_t<Scalar> ScalarNoConst;
 
   //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
   typedef internal::TensorBlockNotImplemented TensorBlock;
   //===--------------------------------------------------------------------===//
 
-  static const bool ReducingInnerMostDims = internal::are_inner_most_dims<Dims, NumInputDims, Layout>::value;
-  static const bool PreservingInnerMostDims = internal::preserve_inner_most_dims<Dims, NumInputDims, Layout>::value;
-  static const bool RunningFullReduction = (NumOutputDims==0);
+  static constexpr bool ReducingInnerMostDims = internal::are_inner_most_dims<Dims, NumInputDims, Layout>::value;
+  static constexpr bool PreservingInnerMostDims = internal::preserve_inner_most_dims<Dims, NumInputDims, Layout>::value;
+  static constexpr bool RunningFullReduction = (NumOutputDims==0);
 
   EIGEN_STRONG_INLINE TensorReductionEvaluatorBase(const XprType& op, const Device& device)
       : m_impl(op.expression(), device), m_reducer(op.reducer()), m_result(NULL), m_device(device)
@@ -578,7 +631,7 @@ struct TensorReductionEvaluatorBase<const TensorReductionOp<Op, Dims, ArgType, M
           m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i]);
         }
       } else {
-        m_outputStrides[NumOutputDims - 1] = 1;
+        m_outputStrides[static_cast<size_t>(NumOutputDims - 1)] = 1;
         for (int i = NumOutputDims - 2; i >= 0; --i) {
           m_outputStrides[i] = m_outputStrides[i + 1] * m_dimensions[i + 1];
           m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i]);
@@ -625,7 +678,7 @@ struct TensorReductionEvaluatorBase<const TensorReductionOp<Op, Dims, ArgType, M
             ? internal::array_prod(input_dims)
             : (static_cast<int>(Layout) == static_cast<int>(ColMajor))
                   ? m_preservedStrides[0]
-                  : m_preservedStrides[NumOutputDims - 1];
+                  : m_preservedStrides[static_cast<size_t>(NumOutputDims - 1)];
   }
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
@@ -784,14 +837,13 @@ struct TensorReductionEvaluatorBase<const TensorReductionOp<Op, Dims, ArgType, M
   template<int LoadMode>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
   {
-    EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
     eigen_assert(index + PacketSize - 1 < Index(internal::array_prod(dimensions())));
 
     if (RunningOnGPU && m_result) {
       return internal::pload<PacketReturnType>(m_result + index);
     }
 
-    EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
+    EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
     if (ReducingInnerMostDims) {
       const Index num_values_to_reduce =
         (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? m_preservedStrides[0] : m_preservedStrides[NumPreservedStrides - 1];
@@ -950,17 +1002,6 @@ struct TensorReductionEvaluatorBase<const TensorReductionOp<Op, Dims, ArgType, M
   // Operation to apply for computing the reduction.
   Op m_reducer;
 
-  // For full reductions
-#if defined(EIGEN_USE_GPU) && (defined(EIGEN_GPUCC))
-  static const bool RunningOnGPU = internal::is_same<Device, Eigen::GpuDevice>::value;
-  static const bool RunningOnSycl = false;
-#elif defined(EIGEN_USE_SYCL)
-static const bool RunningOnSycl = internal::is_same<typename internal::remove_all<Device>::type, Eigen::SyclDevice>::value;
-static const bool RunningOnGPU = false;
-#else
-  static const bool RunningOnGPU = false;
-  static const bool RunningOnSycl = false;
-#endif
   EvaluatorPointerType m_result;
 
   const Device EIGEN_DEVICE_REF m_device;
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
deleted file mode 100644
index 68780cd..0000000
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
+++ /dev/null
@@ -1,6 +0,0 @@
-
-#if defined(__clang__) || defined(__GNUC__)
-#warning "Deprecated header file, please either include the main Eigen/CXX11/Tensor header or the respective TensorReductionGpu.h file"
-#endif
-
-#include "TensorReductionGpu.h"
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h
index db4e8d8..51cdf44 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_GPU_H
 #define EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_GPU_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 namespace internal {
 
@@ -98,6 +100,7 @@ __device__ inline void atomicReduce(half2* output, half2 accum, R& reducer) {
     }
   }
 }
+#ifdef EIGEN_GPU_COMPILE_PHASE
 // reduction should be associative since reduction is not atomic in wide vector but atomic in half2 operations
 template <typename R>
 __device__ inline void atomicReduce(Packet4h2* output, Packet4h2 accum, R& reducer) {
@@ -107,6 +110,7 @@ __device__ inline void atomicReduce(Packet4h2* output, Packet4h2 accum, R& reduc
     atomicReduce(houtput+i,*(haccum+i),reducer);
   }
 }
+#endif  // EIGEN_GPU_COMPILE_PHASE
 #endif  // EIGEN_HAS_GPU_FP16
 
 template <>
@@ -213,8 +217,8 @@ __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void FullReductionKernel(Reducer reducer
 #ifdef EIGEN_HAS_GPU_FP16
 template <typename Self,
           typename Reducer, typename Index>
-__global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void ReductionInitFullReduxKernelHalfFloat(Reducer reducer, const Self input, Index num_coeffs,
-                                                      packet_traits<Eigen::half>::type* scratch) {
+__global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void ReductionInitFullReduxKernelHalfFloat(
+    Reducer reducer, const Self input, Index num_coeffs, half* scratch) {
   eigen_assert(blockDim.x == 1);
   eigen_assert(gridDim.x == 1);
   typedef packet_traits<Eigen::half>::type packet_type;
@@ -224,15 +228,16 @@ __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void ReductionInitFullReduxKernelHalfFlo
     half2* h2scratch = reinterpret_cast<half2*>(scratch);
     for (Index i = num_coeffs - packet_remainder; i + 2 <= num_coeffs; i += 2) {
       *h2scratch =
-          __halves2half2(input.m_impl.coeff(i), input.m_impl.coeff(i + 1));
+          __halves2half2(input.coeff(i), input.coeff(i + 1));
       h2scratch++;
     }
     if ((num_coeffs & 1) != 0) {
-      half lastCoeff = input.m_impl.coeff(num_coeffs - 1);
+      half lastCoeff = input.coeff(num_coeffs - 1);
       *h2scratch = __halves2half2(lastCoeff, reducer.initialize());
     }
   } else {
-    *scratch = reducer.template initializePacket<packet_type>();
+    packet_type reduce = reducer.template initializePacket<packet_type>();
+    internal::pstoreu(scratch, reduce);
   }
 }
 
@@ -258,8 +263,9 @@ __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void ReductionInitKernelHalfFloat(Reduce
 
 template <int BlockSize, int NumPerThread, typename Self,
           typename Reducer, typename Index>
-__global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void FullReductionKernelHalfFloat(Reducer reducer, const Self input, Index num_coeffs,
-                                    half* output, packet_traits<Eigen::half>::type* scratch) {
+__global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void FullReductionKernelHalfFloat(
+    Reducer reducer, const Self input, Index num_coeffs,
+    half* output, half* scratch) {
   typedef typename packet_traits<Eigen::half>::type PacketType;
   const int packet_width = unpacket_traits<PacketType>::size;
   eigen_assert(NumPerThread % packet_width == 0);
@@ -273,19 +279,20 @@ __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void FullReductionKernelHalfFloat(Reduce
       int rem = num_coeffs % packet_width;
       if (rem != 0) {
         half2* p_scratch = reinterpret_cast<half2*>(scratch);
-        *scratch = reducer.template initializePacket<PacketType>();
+        pstoreu(scratch, reducer.template initializePacket<PacketType>());
         for (int i = 0; i < rem / 2; i++) {
           *p_scratch = __halves2half2(
-              input.m_impl.coeff(num_coeffs - packet_width + 2 * i),
-              input.m_impl.coeff(num_coeffs - packet_width + 2 * i + 1));
+              input.coeff(num_coeffs - packet_width + 2 * i),
+              input.coeff(num_coeffs - packet_width + 2 * i + 1));
           p_scratch++;
         }
         if ((num_coeffs & 1) != 0) {
-          half last = input.m_impl.coeff(num_coeffs - 1);
+          half last = input.coeff(num_coeffs - 1);
           *p_scratch = __halves2half2(last, reducer.initialize());
         }
       } else {
-        *scratch = reducer.template initializePacket<PacketType>();
+        PacketType reduce = reducer.template initializePacket<PacketType>();
+        pstoreu(scratch, reduce);
       }
     }
     __syncthreads();
@@ -298,7 +305,7 @@ __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void FullReductionKernelHalfFloat(Reduce
   for (Index i = 0; i < max_iter; i += BlockSize) {
     const Index index = first_index + packet_width * i;
     eigen_assert(index + packet_width < num_coeffs);
-    PacketType val = input.m_impl.template packet<Unaligned>(index);
+    PacketType val = input.template packet<Unaligned>(index);
     reducer.reducePacket(val, &accum);
   }
 
@@ -337,7 +344,7 @@ __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void FullReductionKernelHalfFloat(Reduce
   }
 
   if ((threadIdx.x & (warpSize - 1)) == 0) {
-    atomicReduce(scratch, accum, reducer);
+    atomicReduce(reinterpret_cast<PacketType*>(scratch), accum, reducer);
   }
 
   __syncthreads();
@@ -357,17 +364,21 @@ __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void FullReductionKernelHalfFloat(Reduce
 }
 
 template <typename Op>
-__global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void ReductionCleanupKernelHalfFloat(Op reducer, half* output, packet_traits<Eigen::half>::type* scratch) {
+__global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void ReductionCleanupKernelHalfFloat(Op reducer, half* output, half* scratch) {
   eigen_assert(threadIdx.x == 1);
-  half2* pscratch = reinterpret_cast<half2*>(scratch);
-  half tmp = __float2half(0.f);
   typedef packet_traits<Eigen::half>::type packet_type;
-  for (int i = 0; i < unpacket_traits<packet_type>::size; i += 2) {
-    reducer.reduce(__low2half(*pscratch), &tmp);
-    reducer.reduce(__high2half(*pscratch), &tmp);
-    pscratch++;
+  if (unpacket_traits<packet_type>::size == 1) {
+    *output = *scratch;
+  } else {
+    half2* pscratch = reinterpret_cast<half2*>(scratch);
+    half tmp = __float2half(0.f);
+    for (int i = 0; i < unpacket_traits<packet_type>::size; i += 2) {
+      reducer.reduce(__low2half(*pscratch), &tmp);
+      reducer.reduce(__high2half(*pscratch), &tmp);
+      pscratch++;
+    }
+    *output = tmp;
   }
-  *output = tmp;
 }
 
 #endif // EIGEN_HAS_GPU_FP16
@@ -383,10 +394,10 @@ struct FullReductionLauncher {
 template <typename Self, typename Op, typename OutputType, bool PacketAccess>
 struct FullReductionLauncher<
     Self, Op, OutputType, PacketAccess,
-    typename internal::enable_if<
+    std::enable_if_t<
       internal::is_same<float, OutputType>::value ||
       internal::is_same<double, OutputType>::value,
-    void>::type> {
+    void>> {
   static void run(const Self& self, Op& reducer, const GpuDevice& device, OutputType* output, typename Self::Index num_coeffs) {
 
     typedef typename Self::Index Index;
@@ -416,13 +427,11 @@ template <typename Self, typename Op>
 struct FullReductionLauncher<Self, Op, Eigen::half, true> {
   static void run(const Self& self, Op& reducer, const GpuDevice& device, half* output, typename Self::Index num_coeffs) {
     typedef typename Self::Index Index;
-    typedef typename packet_traits<Eigen::half>::type PacketType;
 
     const int block_size = 256;
     const int num_per_thread = 128;
     const int num_blocks = divup<int>(num_coeffs, block_size * num_per_thread);
-    PacketType* scratch = static_cast<PacketType*>(device.scratchpad());
-    // half2* scratch = static_cast<half2*>(device.scratchpad());
+    half* scratch = static_cast<half*>(device.scratchpad());
 
     if (num_blocks > 1) {
       // We initialize the output and the scrathpad outside the reduction kernel when we can't be sure that there
@@ -449,12 +458,12 @@ struct FullReducer<Self, Op, GpuDevice, Vectorizable> {
   // so reduce the scope of the optimized version of the code to the simple cases
   // of doubles, floats and half floats
 #ifdef EIGEN_HAS_GPU_FP16
-  static const bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful &&
+  static constexpr bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful &&
       (internal::is_same<typename Self::CoeffReturnType, float>::value ||
        internal::is_same<typename Self::CoeffReturnType, double>::value ||
        (internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess));
 #else // EIGEN_HAS_GPU_FP16
-  static const bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful &&
+  static constexpr bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful &&
                                                 (internal::is_same<typename Self::CoeffReturnType, float>::value ||
                                                  internal::is_same<typename Self::CoeffReturnType, double>::value);
 #endif // EIGEN_HAS_GPU_FP16
@@ -755,10 +764,10 @@ struct InnerReductionLauncher {
 template <typename Self, typename Op, typename OutputType, bool PacketAccess>
 struct InnerReductionLauncher<
   Self, Op, OutputType, PacketAccess,
-  typename internal::enable_if<
+  std::enable_if_t<
     internal::is_same<float, OutputType>::value ||
     internal::is_same<double, OutputType>::value,
-  void>::type> {
+  void>> {
   static bool run(const Self& self, Op& reducer, const GpuDevice& device, OutputType* output, typename Self::Index num_coeffs_to_reduce, typename Self::Index num_preserved_vals) {
     typedef typename Self::Index Index;
 
@@ -838,12 +847,12 @@ struct InnerReducer<Self, Op, GpuDevice> {
   // so reduce the scope of the optimized version of the code to the simple case
   // of floats and half floats.
 #ifdef EIGEN_HAS_GPU_FP16
-  static const bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful &&
+  static constexpr bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful &&
       (internal::is_same<typename Self::CoeffReturnType, float>::value ||
        internal::is_same<typename Self::CoeffReturnType, double>::value ||
        (internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess));
 #else // EIGEN_HAS_GPU_FP16
-  static const bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful &&
+  static constexpr bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful &&
                                                  (internal::is_same<typename Self::CoeffReturnType, float>::value ||
                                                   internal::is_same<typename Self::CoeffReturnType, double>::value);
 #endif // EIGEN_HAS_GPU_FP16
@@ -900,7 +909,7 @@ struct OuterReducer<Self, Op, GpuDevice> {
   // Unfortunately nvidia doesn't support well exotic types such as complex,
   // so reduce the scope of the optimized version of the code to the simple case
   // of floats.
-  static const bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful &&
+  static constexpr bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful &&
                                                  (internal::is_same<typename Self::CoeffReturnType, float>::value ||
                                                   internal::is_same<typename Self::CoeffReturnType, double>::value);
   template <typename Device, typename OutputType>
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h
index 474eba0..397870f 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h
@@ -27,6 +27,8 @@
 
 #ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSOR_REDUCTION_SYCL_HPP
 #define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSOR_REDUCTION_SYCL_HPP
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 namespace TensorSycl {
 namespace internal {
@@ -125,9 +127,8 @@ class FullReductionKernelFunctor {
   typedef typename OpDef::type Op;
   typedef typename Evaluator::EvaluatorPointerType EvaluatorPointerType;
   typedef typename Evaluator::PacketReturnType PacketReturnType;
-  typedef
-      typename ::Eigen::internal::conditional<(Evaluator::ReducerTraits::PacketAccess & Evaluator::InputPacketAccess),
-                                              PacketReturnType, CoeffReturnType>::type OutType;
+  typedef std::conditional_t<(Evaluator::ReducerTraits::PacketAccess & Evaluator::InputPacketAccess),
+                                              PacketReturnType, CoeffReturnType> OutType;
   typedef cl::sycl::accessor<OutType, 1, cl::sycl::access::mode::read_write, cl::sycl::access::target::local>
       LocalAccessor;
   LocalAccessor scratch;
@@ -143,7 +144,7 @@ class FullReductionKernelFunctor {
   void operator()(cl::sycl::nd_item<1> itemID) { compute_reduction(itemID); }
 
   template <bool Vect = (Evaluator::ReducerTraits::PacketAccess & Evaluator::InputPacketAccess)>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename ::Eigen::internal::enable_if<Vect>::type compute_reduction(
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::enable_if_t<Vect> compute_reduction(
       const cl::sycl::nd_item<1> &itemID) {
     auto output_ptr = final_output.get_pointer();
     Index VectorizedRange = (rng / Evaluator::PacketSize) * Evaluator::PacketSize;
@@ -182,7 +183,7 @@ class FullReductionKernelFunctor {
   }
 
   template <bool Vect = (Evaluator::ReducerTraits::PacketAccess & Evaluator::InputPacketAccess)>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename ::Eigen::internal::enable_if<!Vect>::type compute_reduction(
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::enable_if_t<!Vect> compute_reduction(
       const cl::sycl::nd_item<1> &itemID) {
     auto output_ptr = final_output.get_pointer();
     Index globalid = itemID.get_global_id(0);
@@ -481,7 +482,7 @@ struct FullReducer<Self, Op, Eigen::SyclDevice, Vectorizable> {
   static EIGEN_CONSTEXPR bool HasOptimizedImplementation = true;
   static EIGEN_CONSTEXPR int PacketSize = Self::PacketAccess ? Self::PacketSize : 1;
   static void run(const Self &self, Op &reducer, const Eigen::SyclDevice &dev, EvaluatorPointerType data) {
-    typedef typename conditional<Self::PacketAccess, typename Self::PacketReturnType, CoeffReturnType>::type OutType;
+    typedef std::conditional_t<Self::PacketAccess, typename Self::PacketReturnType, CoeffReturnType> OutType;
     static_assert(!((EIGEN_SYCL_LOCAL_THREAD_DIM0 * EIGEN_SYCL_LOCAL_THREAD_DIM1) &
                     (EIGEN_SYCL_LOCAL_THREAD_DIM0 * EIGEN_SYCL_LOCAL_THREAD_DIM1 - 1)),
                   "The Local thread size must be a power of 2 for the reduction "
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h
index a27d364..843cb9d 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_CXX11_TENSOR_TENSOR_REF_H
 #define EIGEN_CXX11_TENSOR_TENSOR_REF_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
@@ -96,13 +98,13 @@ class TensorLazyEvaluatorWritable : public TensorLazyEvaluatorReadOnly<Dimension
 };
 
 template <typename Dimensions, typename Expr, typename Device>
-class TensorLazyEvaluator : public internal::conditional<bool(internal::is_lvalue<Expr>::value),
+class TensorLazyEvaluator : public std::conditional_t<bool(internal::is_lvalue<Expr>::value),
                             TensorLazyEvaluatorWritable<Dimensions, Expr, Device>,
-                            TensorLazyEvaluatorReadOnly<Dimensions, const Expr, Device> >::type {
+                            TensorLazyEvaluatorReadOnly<Dimensions, const Expr, Device> > {
  public:
-  typedef typename internal::conditional<bool(internal::is_lvalue<Expr>::value),
-                                         TensorLazyEvaluatorWritable<Dimensions, Expr, Device>,
-                                         TensorLazyEvaluatorReadOnly<Dimensions, const Expr, Device> >::type Base;
+  typedef std::conditional_t<bool(internal::is_lvalue<Expr>::value),
+                                  TensorLazyEvaluatorWritable<Dimensions, Expr, Device>,
+                                  TensorLazyEvaluatorReadOnly<Dimensions, const Expr, Device> > Base;
   typedef typename Base::Scalar Scalar;
 
   TensorLazyEvaluator(const Expr& expr, const Device& device) : Base(expr, device) {
@@ -135,15 +137,15 @@ template<typename PlainObjectType> class TensorRef : public TensorBase<TensorRef
     typedef Scalar* PointerType;
     typedef PointerType PointerArgType;
 
-    static const Index NumIndices = PlainObjectType::NumIndices;
+    static constexpr Index NumIndices = PlainObjectType::NumIndices;
     typedef typename PlainObjectType::Dimensions Dimensions;
 
+    static constexpr int Layout = PlainObjectType::Layout;
     enum {
       IsAligned = false,
       PacketAccess = false,
       BlockAccess = false,
       PreferBlockAccess = false,
-      Layout = PlainObjectType::Layout,
       CoordAccess = false,  // to be implemented
       RawAccess = false
     };
@@ -172,7 +174,7 @@ template<typename PlainObjectType> class TensorRef : public TensorBase<TensorRef
       unrefEvaluator();
     }
 
-    TensorRef(const TensorRef& other) : m_evaluator(other.m_evaluator) {
+    TensorRef(const TensorRef& other) : TensorBase<TensorRef<PlainObjectType> >(other), m_evaluator(other.m_evaluator) {
       eigen_assert(m_evaluator->refCount() > 0);
       m_evaluator->incrRefCount();
     }
@@ -204,7 +206,6 @@ template<typename PlainObjectType> class TensorRef : public TensorBase<TensorRef
       return m_evaluator->coeff(index);
     }
 
-#if EIGEN_HAS_VARIADIC_TEMPLATES
     template<typename... IndexTypes> EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE const Scalar operator()(Index firstIndex, IndexTypes... otherIndices) const
     {
@@ -219,85 +220,6 @@ template<typename PlainObjectType> class TensorRef : public TensorBase<TensorRef
       const array<Index, num_indices> indices{{firstIndex, otherIndices...}};
       return coeffRef(indices);
     }
-#else
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const Scalar operator()(Index i0, Index i1) const
-    {
-      array<Index, 2> indices;
-      indices[0] = i0;
-      indices[1] = i1;
-      return coeff(indices);
-    }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const Scalar operator()(Index i0, Index i1, Index i2) const
-    {
-      array<Index, 3> indices;
-      indices[0] = i0;
-      indices[1] = i1;
-      indices[2] = i2;
-      return coeff(indices);
-    }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const Scalar operator()(Index i0, Index i1, Index i2, Index i3) const
-    {
-      array<Index, 4> indices;
-      indices[0] = i0;
-      indices[1] = i1;
-      indices[2] = i2;
-      indices[3] = i3;
-      return coeff(indices);
-    }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const Scalar operator()(Index i0, Index i1, Index i2, Index i3, Index i4) const
-    {
-      array<Index, 5> indices;
-      indices[0] = i0;
-      indices[1] = i1;
-      indices[2] = i2;
-      indices[3] = i3;
-      indices[4] = i4;
-      return coeff(indices);
-    }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Scalar& coeffRef(Index i0, Index i1)
-    {
-      array<Index, 2> indices;
-      indices[0] = i0;
-      indices[1] = i1;
-      return coeffRef(indices);
-    }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Scalar& coeffRef(Index i0, Index i1, Index i2)
-    {
-      array<Index, 3> indices;
-      indices[0] = i0;
-      indices[1] = i1;
-      indices[2] = i2;
-      return coeffRef(indices);
-    }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3)
-    {
-      array<Index, 4> indices;
-      indices[0] = i0;
-      indices[1] = i1;
-      indices[2] = i2;
-      indices[3] = i3;
-      return coeffRef(indices);
-    }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Scalar& coeffRef(Index i0, Index i1, Index i2, Index i3, Index i4)
-    {
-      array<Index, 5> indices;
-      indices[0] = i0;
-      indices[1] = i1;
-      indices[2] = i2;
-      indices[3] = i3;
-      indices[4] = i4;
-      return coeffRef(indices);
-    }
-#endif
 
     template <std::size_t NumIndices> EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE const Scalar coeff(const array<Index, NumIndices>& indices) const
@@ -374,12 +296,12 @@ struct TensorEvaluator<const TensorRef<Derived>, Device>
   typedef StorageMemory<CoeffReturnType, Device> Storage;
   typedef typename Storage::Type EvaluatorPointerType;
 
+  static constexpr int Layout = TensorRef<Derived>::Layout;
   enum {
     IsAligned = false,
     PacketAccess = false,
     BlockAccess = false,
     PreferBlockAccess = false,
-    Layout = TensorRef<Derived>::Layout,
     CoordAccess = false,  // to be implemented
     RawAccess = false
   };
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h
index 586ce68..b5e66b3 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h
@@ -10,6 +10,8 @@
 
 #ifndef EIGEN_CXX11_TENSOR_TENSOR_REVERSE_H
 #define EIGEN_CXX11_TENSOR_TENSOR_REVERSE_H
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /** \class TensorReverse
@@ -28,9 +30,9 @@ struct traits<TensorReverseOp<ReverseDimensions,
   typedef typename XprTraits::StorageKind StorageKind;
   typedef typename XprTraits::Index Index;
   typedef typename XprType::Nested Nested;
-  typedef typename remove_reference<Nested>::type _Nested;
-  static const int NumDimensions = XprTraits::NumDimensions;
-  static const int Layout = XprTraits::Layout;
+  typedef std::remove_reference_t<Nested> Nested_;
+  static constexpr int NumDimensions = XprTraits::NumDimensions;
+  static constexpr int Layout = XprTraits::Layout;
   typedef typename XprTraits::PointerType PointerType;
 };
 
@@ -71,7 +73,7 @@ class TensorReverseOp : public TensorBase<TensorReverseOp<ReverseDimensions,
     const ReverseDimensions& reverse() const { return m_reverse_dims; }
 
     EIGEN_DEVICE_FUNC
-    const typename internal::remove_all<typename XprType::Nested>::type&
+    const internal::remove_all_t<typename XprType::Nested>&
     expression() const { return m_xpr; }
 
     EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorReverseOp)
@@ -88,21 +90,21 @@ struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device
 {
   typedef TensorReverseOp<ReverseDimensions, ArgType> XprType;
   typedef typename XprType::Index Index;
-  static const int NumDims = internal::array_size<ReverseDimensions>::value;
+  static constexpr int NumDims = internal::array_size<ReverseDimensions>::value;
   typedef DSizes<Index, NumDims> Dimensions;
   typedef typename XprType::Scalar Scalar;
   typedef typename XprType::CoeffReturnType CoeffReturnType;
   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
-  static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
+  static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
   typedef StorageMemory<CoeffReturnType, Device> Storage;
   typedef typename Storage::Type EvaluatorPointerType;
 
+  static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
   enum {
     IsAligned         = false,
     PacketAccess      = TensorEvaluator<ArgType, Device>::PacketAccess,
     BlockAccess       = NumDims > 0,
     PreferBlockAccess = true,
-    Layout            = TensorEvaluator<ArgType, Device>::Layout,
     CoordAccess       = false,  // to be implemented
     RawAccess         = false
   };
@@ -213,12 +215,11 @@ struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   PacketReturnType packet(Index index) const
   {
-    EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
     eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
 
     // TODO(ndjaitly): write a better packing routine that uses
     // local structure.
-    EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type
+    EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType>
                                                             values[PacketSize];
     EIGEN_UNROLL_LOOP
     for (int i = 0; i < PacketSize; ++i) {
@@ -413,15 +414,15 @@ struct TensorEvaluator<TensorReverseOp<ReverseDimensions, ArgType>, Device>
                           Device> Base;
   typedef TensorReverseOp<ReverseDimensions, ArgType> XprType;
   typedef typename XprType::Index Index;
-  static const int NumDims = internal::array_size<ReverseDimensions>::value;
+  static constexpr int NumDims = internal::array_size<ReverseDimensions>::value;
   typedef DSizes<Index, NumDims> Dimensions;
 
+  static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
   enum {
     IsAligned = false,
     PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
     BlockAccess = false,
     PreferBlockAccess = false,
-    Layout = TensorEvaluator<ArgType, Device>::Layout,
     CoordAccess = false,  // to be implemented
     RawAccess = false
   };
@@ -431,7 +432,7 @@ struct TensorEvaluator<TensorReverseOp<ReverseDimensions, ArgType>, Device>
   typedef typename XprType::Scalar Scalar;
   typedef typename XprType::CoeffReturnType CoeffReturnType;
   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
-  static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
+  static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
 
   //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
   typedef internal::TensorBlockNotImplemented TensorBlock;
@@ -446,7 +447,6 @@ struct TensorEvaluator<TensorReverseOp<ReverseDimensions, ArgType>, Device>
 
   template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   void writePacket(Index index, const PacketReturnType& x) {
-    EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
     eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
 
     // This code is pilfered from TensorMorphing.h
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h
index beae854..ed0a731 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_CXX11_TENSOR_TENSOR_SCAN_H
 #define EIGEN_CXX11_TENSOR_TENSOR_SCAN_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
@@ -21,9 +23,9 @@ struct traits<TensorScanOp<Op, XprType> >
   typedef traits<XprType> XprTraits;
   typedef typename XprTraits::StorageKind StorageKind;
   typedef typename XprType::Nested Nested;
-  typedef typename remove_reference<Nested>::type _Nested;
-  static const int NumDimensions = XprTraits::NumDimensions;
-  static const int Layout = XprTraits::Layout;
+  typedef std::remove_reference_t<Nested> Nested_;
+  static constexpr int NumDimensions = XprTraits::NumDimensions;
+  static constexpr int Layout = XprTraits::Layout;
   typedef typename XprTraits::PointerType PointerType;
 };
 
@@ -379,21 +381,21 @@ struct TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> {
   typedef typename XprType::Index Index;
   typedef const ArgType ChildTypeNoConst;
   typedef const ArgType ChildType;
-  static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
+  static constexpr int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
   typedef DSizes<Index, NumDims> Dimensions;
-  typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar;
+  typedef std::remove_const_t<typename XprType::Scalar> Scalar;
   typedef typename XprType::CoeffReturnType CoeffReturnType;
   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
   typedef TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> Self;
   typedef StorageMemory<Scalar, Device> Storage;
   typedef typename Storage::Type EvaluatorPointerType;
 
+  static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
   enum {
     IsAligned = false,
     PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
     BlockAccess = false,
     PreferBlockAccess = false,
-    Layout = TensorEvaluator<ArgType, Device>::Layout,
     CoordAccess = false,
     RawAccess = true
   };
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorScanSycl.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorScanSycl.h
index 7f68ecb..636fb7d 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorScanSycl.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorScanSycl.h
@@ -25,7 +25,7 @@
  * buffer is given as an input and all the threads within a work-group scan and
  * reduces the boundaries between the blocks (generated from the previous
  * kernel). and write the data on the temporary buffer. If the second kernel is
- * required, the third and final kerenl (ScanAdjustmentKernelFunctor) will
+ * required, the third and final kernel (ScanAdjustmentKernelFunctor) will
  * adjust the final result into the output buffer.
  * The original algorithm for the parallel prefix sum can be found here:
  *
@@ -37,6 +37,8 @@
 #ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSOR_SYCL_SYCL_HPP
 #define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSOR_SYCL_SYCL_HPP
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 namespace TensorSycl {
 namespace internal {
@@ -105,27 +107,27 @@ struct ScanKernelFunctor {
         inclusive(inclusive_) {}
 
   template <scan_step sst = stp, typename Input>
-  typename ::Eigen::internal::enable_if<sst == scan_step::first, CoeffReturnType>::type EIGEN_DEVICE_FUNC
+  std::enable_if_t<sst == scan_step::first, CoeffReturnType> EIGEN_DEVICE_FUNC
       EIGEN_STRONG_INLINE
       read(const Input &inpt, Index global_id) {
     return inpt.coeff(global_id);
   }
 
   template <scan_step sst = stp, typename Input>
-  typename ::Eigen::internal::enable_if<sst != scan_step::first, CoeffReturnType>::type EIGEN_DEVICE_FUNC
+  std::enable_if_t<sst != scan_step::first, CoeffReturnType> EIGEN_DEVICE_FUNC
       EIGEN_STRONG_INLINE
       read(const Input &inpt, Index global_id) {
     return inpt[global_id];
   }
 
   template <scan_step sst = stp, typename InclusiveOp>
-  typename ::Eigen::internal::enable_if<sst == scan_step::first>::type EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  std::enable_if_t<sst == scan_step::first> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   first_step_inclusive_Operation(InclusiveOp inclusive_op) {
     inclusive_op();
   }
 
   template <scan_step sst = stp, typename InclusiveOp>
-  typename ::Eigen::internal::enable_if<sst != scan_step::first>::type EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  std::enable_if_t<sst != scan_step::first> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   first_step_inclusive_Operation(InclusiveOp) {}
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(cl::sycl::nd_item<1> itemID) {
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h
index e5e5efd..977263a 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_CXX11_TENSOR_TENSOR_SHUFFLING_H
 #define EIGEN_CXX11_TENSOR_TENSOR_SHUFFLING_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /** \class TensorShuffling
@@ -28,9 +30,9 @@ struct traits<TensorShufflingOp<Shuffle, XprType> > : public traits<XprType>
   typedef typename XprTraits::StorageKind StorageKind;
   typedef typename XprTraits::Index Index;
   typedef typename XprType::Nested Nested;
-  typedef typename remove_reference<Nested>::type _Nested;
-  static const int NumDimensions = XprTraits::NumDimensions;
-  static const int Layout = XprTraits::Layout;
+  typedef std::remove_reference_t<Nested> Nested_;
+  static constexpr int NumDimensions = XprTraits::NumDimensions;
+  static constexpr int Layout = XprTraits::Layout;
   typedef typename XprTraits::PointerType PointerType;
 };
 
@@ -69,7 +71,7 @@ class TensorShufflingOp : public TensorBase<TensorShufflingOp<Shuffle, XprType>
     const Shuffle& shufflePermutation() const { return m_shuffle; }
 
     EIGEN_DEVICE_FUNC
-    const typename internal::remove_all<typename XprType::Nested>::type&
+    const internal::remove_all_t<typename XprType::Nested>&
     expression() const { return m_xpr; }
 
     EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorShufflingOp)
@@ -88,26 +90,26 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
   typedef TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device> Self;
   typedef TensorShufflingOp<Shuffle, ArgType> XprType;
   typedef typename XprType::Index Index;
-  static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
+  static constexpr int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
   typedef DSizes<Index, NumDims> Dimensions;
   typedef typename XprType::Scalar Scalar;
   typedef typename XprType::CoeffReturnType CoeffReturnType;
   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
-  static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
+  static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
   typedef StorageMemory<CoeffReturnType, Device> Storage;
   typedef typename Storage::Type EvaluatorPointerType;
 
+  static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
   enum {
     IsAligned         = false,
     PacketAccess      = (PacketType<CoeffReturnType, Device>::size > 1),
     BlockAccess       = TensorEvaluator<ArgType, Device>::RawAccess,
     PreferBlockAccess = true,
-    Layout            = TensorEvaluator<ArgType, Device>::Layout,
     CoordAccess       = false,  // to be implemented
     RawAccess         = false
   };
 
-  typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
+  typedef std::remove_const_t<Scalar> ScalarNoConst;
 
   //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
   typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
@@ -194,7 +196,7 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
   struct PacketLoader {
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     static PacketReturnType Run(const Self& self, Index index) {
-      EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
+      EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
       EIGEN_UNROLL_LOOP
       for (int i = 0; i < PacketSize; ++i) {
         values[i] = self.coeff(index + i);
@@ -211,7 +213,7 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
       if (self.m_is_identity) {
         return self.m_impl.template packet<LoadMode>(index);
       } else {
-        EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
+        EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
         EIGEN_UNROLL_LOOP
         for (int i = 0; i < PacketSize; ++i) {
           values[i] = self.coeff(index + i);
@@ -225,8 +227,7 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
   template<int LoadMode>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
   {
-    EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
-        eigen_assert(index + PacketSize - 1 < dimensions().TotalSize());
+    eigen_assert(index + PacketSize - 1 < dimensions().TotalSize());
     return PacketLoader<LoadMode, Self, TensorEvaluator<ArgType, Device>::PacketAccess>::Run(*this, index);
   }
 
@@ -255,7 +256,7 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
   block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
           bool root_of_expr_ast = false) const {
-    assert(m_impl.data() != NULL);
+    eigen_assert(m_impl.data() != NULL);
 
     typedef internal::TensorBlockIO<ScalarNoConst, Index, NumDims, Layout>
         TensorBlockIO;
@@ -363,23 +364,23 @@ struct TensorEvaluator<TensorShufflingOp<Shuffle, ArgType>, Device>
 
   typedef TensorShufflingOp<Shuffle, ArgType> XprType;
   typedef typename XprType::Index Index;
-  static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
+  static constexpr int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
   typedef DSizes<Index, NumDims> Dimensions;
   typedef typename XprType::Scalar Scalar;
   typedef typename XprType::CoeffReturnType CoeffReturnType;
   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
-  static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
+  static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
+  static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
 
   enum {
     IsAligned         = false,
     PacketAccess      = (PacketType<CoeffReturnType, Device>::size > 1),
     BlockAccess       = TensorEvaluator<ArgType, Device>::RawAccess,
     PreferBlockAccess = true,
-    Layout            = TensorEvaluator<ArgType, Device>::Layout,
     RawAccess         = false
   };
 
-  typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
+  typedef std::remove_const_t<Scalar> ScalarNoConst;
 
   //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
   typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
@@ -397,9 +398,7 @@ struct TensorEvaluator<TensorShufflingOp<Shuffle, ArgType>, Device>
   template <int StoreMode> EIGEN_STRONG_INLINE
   void writePacket(Index index, const PacketReturnType& x)
   {
-    EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
-
-    EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
+    EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
     internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
     EIGEN_UNROLL_LOOP
     for (int i = 0; i < PacketSize; ++i) {
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h
index 5ff0880..f9bb8f6 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h
@@ -17,6 +17,8 @@
   #define EIGEN_INTERNAL_TENSOR_STORAGE_CTOR_PLUGIN
 #endif
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /** \internal
@@ -39,10 +41,10 @@ template<typename T, typename FixedDimensions, int Options_>
 class TensorStorage
 {
  private:
-  static const std::size_t Size = FixedDimensions::total_size;
+  static constexpr std::size_t Size = FixedDimensions::total_size;
 
   // Allocate an array of size at least one to prevent compiler warnings.
-  static const std::size_t MinSize = max_n_1<Size>::size;
+  static constexpr std::size_t MinSize = max_n_1<Size>::size;
   EIGEN_ALIGN_MAX T m_data[MinSize];
 
  public:
@@ -55,17 +57,14 @@ class TensorStorage
   EIGEN_DEVICE_FUNC
   EIGEN_STRONG_INLINE const T *data() const { return m_data; }
 
-  static EIGEN_DEVICE_FUNC
-  EIGEN_STRONG_INLINE const FixedDimensions& dimensions()
-  {
-    static const FixedDimensions* singleton_dimensions = new FixedDimensions();
-    return *singleton_dimensions;
-  }
+  EIGEN_DEVICE_FUNC
+  EIGEN_STRONG_INLINE const FixedDimensions dimensions() const { return FixedDimensions(); }
 
   EIGEN_DEVICE_FUNC
   EIGEN_STRONG_INLINE DenseIndex size() const { return Size; }
 };
 
+
 // pure dynamic
 template<typename T, typename IndexType, int NumIndices_, int Options_>
 class TensorStorage<T, DSizes<IndexType, NumIndices_>, Options_>
@@ -86,12 +85,10 @@ class TensorStorage<T, DSizes<IndexType, NumIndices_>, Options_>
         : m_data(internal::conditional_aligned_new_auto<T,(Options_&DontAlign)==0>(size)), m_dimensions(dimensions)
       { EIGEN_INTERNAL_TENSOR_STORAGE_CTOR_PLUGIN }
 
-#if EIGEN_HAS_VARIADIC_TEMPLATES
     template <typename... DenseIndex>
     EIGEN_DEVICE_FUNC TensorStorage(DenseIndex... indices) : m_dimensions(indices...) {
       m_data = internal::conditional_aligned_new_auto<T,(Options_&DontAlign)==0>(internal::array_prod(m_dimensions));
     }
-#endif
 
     EIGEN_DEVICE_FUNC TensorStorage(const Self& other)
       : m_data(internal::conditional_aligned_new_auto<T,(Options_&DontAlign)==0>(internal::array_prod(other.m_dimensions)))
@@ -108,7 +105,6 @@ class TensorStorage<T, DSizes<IndexType, NumIndices_>, Options_>
       return *this;
     }
 
-#if EIGEN_HAS_RVALUE_REFERENCES
     EIGEN_DEVICE_FUNC TensorStorage(Self&& other) : TensorStorage()
     {
       *this = std::move(other);
@@ -120,7 +116,6 @@ class TensorStorage<T, DSizes<IndexType, NumIndices_>, Options_>
       numext::swap(m_dimensions, other.m_dimensions);
       return *this;
     }
-#endif
 
     EIGEN_DEVICE_FUNC  ~TensorStorage() { internal::conditional_aligned_delete_auto<T,(Options_&DontAlign)==0>(m_data, internal::array_prod(m_dimensions)); }
     EIGEN_DEVICE_FUNC  void swap(Self& other)
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h
index 2f62a66..609afe3 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_CXX11_TENSOR_TENSOR_STRIDING_H
 #define EIGEN_CXX11_TENSOR_TENSOR_STRIDING_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /** \class TensorStriding
@@ -28,9 +30,9 @@ struct traits<TensorStridingOp<Strides, XprType> > : public traits<XprType>
   typedef typename XprTraits::StorageKind StorageKind;
   typedef typename XprTraits::Index Index;
   typedef typename XprType::Nested Nested;
-  typedef typename remove_reference<Nested>::type _Nested;
-  static const int NumDimensions = XprTraits::NumDimensions;
-  static const int Layout = XprTraits::Layout;
+  typedef std::remove_reference_t<Nested> Nested_;
+  static constexpr int NumDimensions = XprTraits::NumDimensions;
+  static constexpr int Layout = XprTraits::Layout;
   typedef typename XprTraits::PointerType PointerType;
 };
 
@@ -69,7 +71,7 @@ class TensorStridingOp : public TensorBase<TensorStridingOp<Strides, XprType> >
     const Strides& strides() const { return m_dims; }
 
     EIGEN_DEVICE_FUNC
-    const typename internal::remove_all<typename XprType::Nested>::type&
+    const internal::remove_all_t<typename XprType::Nested>&
     expression() const { return m_xpr; }
 
     EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorStridingOp)
@@ -86,21 +88,21 @@ struct TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device>
 {
   typedef TensorStridingOp<Strides, ArgType> XprType;
   typedef typename XprType::Index Index;
-  static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
+  static constexpr int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
   typedef DSizes<Index, NumDims> Dimensions;
   typedef typename XprType::Scalar Scalar;
   typedef typename XprType::CoeffReturnType CoeffReturnType;
   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
-  static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
+  static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
   typedef StorageMemory<CoeffReturnType, Device> Storage;
   typedef typename Storage::Type EvaluatorPointerType;
 
+  static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
   enum {
     IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false,
     PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
     BlockAccess = false,
     PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
-    Layout = TensorEvaluator<ArgType, Device>::Layout,
     CoordAccess = false,  // to be implemented
     RawAccess = false
   };
@@ -193,7 +195,7 @@ struct TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device>
       return rslt;
     }
     else {
-      EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
+      EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
       values[0] = m_impl.coeff(inputIndices[0]);
       values[PacketSize-1] = m_impl.coeff(inputIndices[1]);
       EIGEN_UNROLL_LOOP
@@ -265,14 +267,14 @@ struct TensorEvaluator<TensorStridingOp<Strides, ArgType>, Device>
   typedef TensorStridingOp<Strides, ArgType> XprType;
   typedef TensorEvaluator<const XprType, Device> Base;
   //  typedef typename XprType::Index Index;
-  static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
+  static constexpr int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
   //  typedef DSizes<Index, NumDims> Dimensions;
 
+  static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
   enum {
     IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false,
     PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
     PreferBlockAccess = false,
-    Layout = TensorEvaluator<ArgType, Device>::Layout,
     CoordAccess = false,  // to be implemented
     RawAccess = false
   };
@@ -284,7 +286,7 @@ struct TensorEvaluator<TensorStridingOp<Strides, ArgType>, Device>
   typedef typename XprType::Scalar Scalar;
   typedef typename XprType::CoeffReturnType CoeffReturnType;
   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
-  static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
+  static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index)
   {
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h
index 926ecdd..d682202 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h
@@ -11,6 +11,8 @@
 #ifndef EIGEN_CXX11_TENSOR_TENSOR_TRACE_H
 #define EIGEN_CXX11_TENSOR_TENSOR_TRACE_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /** \class TensorTrace
@@ -30,9 +32,9 @@ struct traits<TensorTraceOp<Dims, XprType> > : public traits<XprType>
   typedef typename XprTraits::StorageKind StorageKind;
   typedef typename XprTraits::Index Index;
   typedef typename XprType::Nested Nested;
-  typedef typename remove_reference<Nested>::type _Nested;
-  static const int NumDimensions = XprTraits::NumDimensions - array_size<Dims>::value;
-  static const int Layout = XprTraits::Layout;
+  typedef std::remove_reference_t<Nested> Nested_;
+  static constexpr int NumDimensions = XprTraits::NumDimensions - array_size<Dims>::value;
+  static constexpr int Layout = XprTraits::Layout;
 };
 
 template<typename Dims, typename XprType>
@@ -69,7 +71,7 @@ class TensorTraceOp : public TensorBase<TensorTraceOp<Dims, XprType> >
     const Dims& dims() const { return m_dims; }
 
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const typename internal::remove_all<typename XprType::Nested>::type& expression() const { return m_xpr; }
+    const internal::remove_all_t<typename XprType::Nested>& expression() const { return m_xpr; }
 
   protected:
     typename XprType::Nested m_xpr;
@@ -82,24 +84,24 @@ template<typename Dims, typename ArgType, typename Device>
 struct TensorEvaluator<const TensorTraceOp<Dims, ArgType>, Device>
 {
   typedef TensorTraceOp<Dims, ArgType> XprType;
-  static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
-  static const int NumReducedDims = internal::array_size<Dims>::value;
-  static const int NumOutputDims = NumInputDims - NumReducedDims;
+  static constexpr int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
+  static constexpr int NumReducedDims = internal::array_size<Dims>::value;
+  static constexpr int NumOutputDims = NumInputDims - NumReducedDims;
   typedef typename XprType::Index Index;
   typedef DSizes<Index, NumOutputDims> Dimensions;
   typedef typename XprType::Scalar Scalar;
   typedef typename XprType::CoeffReturnType CoeffReturnType;
   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
-  static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
+  static constexpr int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
   typedef StorageMemory<CoeffReturnType, Device> Storage;
   typedef typename Storage::Type EvaluatorPointerType;
 
+  static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
   enum {
     IsAligned = false,
     PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
     BlockAccess = false,
     PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
-    Layout = TensorEvaluator<ArgType, Device>::Layout,
     CoordAccess = false,
     RawAccess = false
   };
@@ -134,6 +136,7 @@ struct TensorEvaluator<const TensorTraceOp<Dims, ArgType>, Device>
       }
     }
 
+    EIGEN_ONLY_USED_FOR_DEBUG(num_distinct_reduce_dims);
     eigen_assert(num_distinct_reduce_dims == NumReducedDims);
 
     // Compute the dimensions of the result.
@@ -243,11 +246,9 @@ struct TensorEvaluator<const TensorTraceOp<Dims, ArgType>, Device>
 
   template<int LoadMode>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const {
-
-    EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE);
     eigen_assert(index + PacketSize - 1 < dimensions().TotalSize());
 
-    EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
+    EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
     for (int i = 0; i < PacketSize; ++i) {
         values[i] = coeff(index + i);
     }
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h
index 4f7fd34..8c705ec 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_CXX11_TENSOR_TENSOR_TRAITS_H
 #define EIGEN_CXX11_TENSOR_TENSOR_TRAITS_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 namespace internal {
 
@@ -50,8 +52,8 @@ struct traits<Tensor<Scalar_, NumIndices_, Options_, IndexType_> >
   typedef Scalar_ Scalar;
   typedef Dense StorageKind;
   typedef IndexType_ Index;
-  static const int NumDimensions = NumIndices_;
-  static const int Layout = Options_ & RowMajor ? RowMajor : ColMajor;
+  static constexpr int NumDimensions = NumIndices_;
+  static constexpr int Layout = Options_ & RowMajor ? RowMajor : ColMajor;
   enum {
     Options = Options_,
     Flags = compute_tensor_flags<Scalar_, Options_>::ret | (is_const<Scalar_>::value ? 0 : LvalueBit)
@@ -69,8 +71,8 @@ struct traits<TensorFixedSize<Scalar_, Dimensions, Options_, IndexType_> >
   typedef Scalar_ Scalar;
   typedef Dense StorageKind;
   typedef IndexType_ Index;
-  static const int NumDimensions = array_size<Dimensions>::value;
-  static const int Layout = Options_ & RowMajor ? RowMajor : ColMajor;
+  static constexpr int NumDimensions = array_size<Dimensions>::value;
+  static constexpr int Layout = Options_ & RowMajor ? RowMajor : ColMajor;
   enum {
     Options = Options_,
     Flags = compute_tensor_flags<Scalar_, Options_>::ret | (is_const<Scalar_>::value ? 0: LvalueBit)
@@ -90,8 +92,8 @@ struct traits<TensorMap<PlainObjectType, Options_, MakePointer_> >
   typedef typename BaseTraits::Scalar Scalar;
   typedef typename BaseTraits::StorageKind StorageKind;
   typedef typename BaseTraits::Index Index;
-  static const int NumDimensions = BaseTraits::NumDimensions;
-  static const int Layout = BaseTraits::Layout;
+  static constexpr int NumDimensions = BaseTraits::NumDimensions;
+  static constexpr int Layout = BaseTraits::Layout;
   enum {
     Options = Options_,
     Flags = BaseTraits::Flags
@@ -112,8 +114,8 @@ struct traits<TensorRef<PlainObjectType> >
   typedef typename BaseTraits::Scalar Scalar;
   typedef typename BaseTraits::StorageKind StorageKind;
   typedef typename BaseTraits::Index Index;
-  static const int NumDimensions = BaseTraits::NumDimensions;
-  static const int Layout = BaseTraits::Layout;
+  static constexpr int NumDimensions = BaseTraits::NumDimensions;
+  static constexpr int Layout = BaseTraits::Layout;
   enum {
     Options = BaseTraits::Options,
     Flags = BaseTraits::Flags
@@ -122,16 +124,16 @@ struct traits<TensorRef<PlainObjectType> >
 };
 
 
-template<typename _Scalar, int NumIndices_, int Options, typename IndexType_>
-struct eval<Tensor<_Scalar, NumIndices_, Options, IndexType_>, Eigen::Dense>
+template<typename Scalar_, int NumIndices_, int Options, typename IndexType_>
+struct eval<Tensor<Scalar_, NumIndices_, Options, IndexType_>, Eigen::Dense>
 {
-  typedef const Tensor<_Scalar, NumIndices_, Options, IndexType_>EIGEN_DEVICE_REF type;
+  typedef const Tensor<Scalar_, NumIndices_, Options, IndexType_>EIGEN_DEVICE_REF type;
 };
 
-template<typename _Scalar, int NumIndices_, int Options, typename IndexType_>
-struct eval<const Tensor<_Scalar, NumIndices_, Options, IndexType_>, Eigen::Dense>
+template<typename Scalar_, int NumIndices_, int Options, typename IndexType_>
+struct eval<const Tensor<Scalar_, NumIndices_, Options, IndexType_>, Eigen::Dense>
 {
-  typedef const Tensor<_Scalar, NumIndices_, Options, IndexType_>EIGEN_DEVICE_REF type;
+  typedef const Tensor<Scalar_, NumIndices_, Options, IndexType_>EIGEN_DEVICE_REF type;
 };
 
 template<typename Scalar_, typename Dimensions, int Options, typename IndexType_>
@@ -254,10 +256,10 @@ struct nested<const TensorRef<PlainObjectType> >
 // the SAME case.
 // When the stride is 1, we have the simplified case R'=R-K+1, C'=C-K+1, Pr=0,
 // Pc=0.
-typedef enum {
+enum PaddingType {
   PADDING_VALID = 1,
   PADDING_SAME = 2
-} PaddingType;
+};
 
 }  // end namespace Eigen
 
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h
index d23f2e4..37be364 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_CXX11_TENSOR_TENSOR_UINT128_H
 #define EIGEN_CXX11_TENSOR_TENSOR_UINT128_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 namespace internal {
 
@@ -55,7 +57,7 @@ struct TensorUInt128
   template<typename T>
   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
   explicit TensorUInt128(const T& x) : high(0), low(x) {
-    eigen_assert((static_cast<typename conditional<sizeof(T) == 8, uint64_t, uint32_t>::type>(x) <= NumTraits<uint64_t>::highest()));
+    eigen_assert((static_cast<std::conditional_t<sizeof(T) == 8, uint64_t, uint32_t>>(x) <= NumTraits<uint64_t>::highest()));
     eigen_assert(x >= 0);
   }
 
@@ -78,14 +80,14 @@ template <typename HL, typename LL, typename HR, typename LR>
 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 bool operator == (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
 {
-  return (lhs.high == rhs.high) & (lhs.low == rhs.low);
+  return (lhs.high == rhs.high) && (lhs.low == rhs.low);
 }
 
 template <typename HL, typename LL, typename HR, typename LR>
 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 bool operator != (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
 {
-  return (lhs.high != rhs.high) | (lhs.low != rhs.low);
+  return (lhs.high != rhs.high) || (lhs.low != rhs.low);
 }
 
 template <typename HL, typename LL, typename HR, typename LR>
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h
index 0beb9ff..3523a94 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h
@@ -4,6 +4,8 @@
 #ifndef EIGEN_CXX11_TENSOR_TENSOR_VOLUME_PATCH_H
 #define EIGEN_CXX11_TENSOR_TENSOR_VOLUME_PATCH_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /** \class TensorVolumePatch
@@ -26,14 +28,14 @@ namespace internal {
 template<DenseIndex Planes, DenseIndex Rows, DenseIndex Cols, typename XprType>
 struct traits<TensorVolumePatchOp<Planes, Rows, Cols, XprType> > : public traits<XprType>
 {
-  typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar;
+  typedef std::remove_const_t<typename XprType::Scalar> Scalar;
   typedef traits<XprType> XprTraits;
   typedef typename XprTraits::StorageKind StorageKind;
   typedef typename XprTraits::Index Index;
   typedef typename XprType::Nested Nested;
-  typedef typename remove_reference<Nested>::type _Nested;
-  static const int NumDimensions = XprTraits::NumDimensions + 1;
-  static const int Layout = XprTraits::Layout;
+  typedef std::remove_reference_t<Nested> Nested_;
+  static constexpr int NumDimensions = XprTraits::NumDimensions + 1;
+  static constexpr int Layout = XprTraits::Layout;
   typedef typename XprTraits::PointerType PointerType;
 
 };
@@ -135,7 +137,7 @@ class TensorVolumePatchOp : public TensorBase<TensorVolumePatchOp<Planes, Rows,
     Scalar padding_value() const { return m_padding_value; }
 
     EIGEN_DEVICE_FUNC
-    const typename internal::remove_all<typename XprType::Nested>::type&
+    const internal::remove_all_t<typename XprType::Nested>&
     expression() const { return m_xpr; }
 
   protected:
@@ -170,22 +172,22 @@ struct TensorEvaluator<const TensorVolumePatchOp<Planes, Rows, Cols, ArgType>, D
 {
   typedef TensorVolumePatchOp<Planes, Rows, Cols, ArgType> XprType;
   typedef typename XprType::Index Index;
-  static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
-  static const int NumDims = NumInputDims + 1;
+  static constexpr int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
+  static constexpr int NumDims = NumInputDims + 1;
   typedef DSizes<Index, NumDims> Dimensions;
-  typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar;
+  typedef std::remove_const_t<typename XprType::Scalar> Scalar;
   typedef typename XprType::CoeffReturnType CoeffReturnType;
   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
-  static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
+  static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
   typedef StorageMemory<CoeffReturnType, Device> Storage;
   typedef typename Storage::Type EvaluatorPointerType;
 
+  static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
   enum {
     IsAligned = false,
     PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
     BlockAccess = false,
     PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
-    Layout = TensorEvaluator<ArgType, Device>::Layout,
     CoordAccess = false,
     RawAccess = false
   };
@@ -419,7 +421,6 @@ struct TensorEvaluator<const TensorVolumePatchOp<Planes, Rows, Cols, ArgType>, D
   template<int LoadMode>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
   {
-    EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
     eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
 
     if (m_in_row_strides != 1 || m_in_col_strides != 1 || m_row_inflate_strides != 1 || m_col_inflate_strides != 1 ||
@@ -543,7 +544,7 @@ struct TensorEvaluator<const TensorVolumePatchOp<Planes, Rows, Cols, ArgType>, D
  protected:
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index) const
   {
-    EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
+    EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
     EIGEN_UNROLL_LOOP
     for (int i = 0; i < PacketSize; ++i) {
       values[i] = coeff(index+i);
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/DynamicSymmetry.h b/libs/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/DynamicSymmetry.h
index bc4f202..200f588 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/DynamicSymmetry.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/DynamicSymmetry.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_CXX11_TENSORSYMMETRY_DYNAMICSYMMETRY_H
 #define EIGEN_CXX11_TENSORSYMMETRY_DYNAMICSYMMETRY_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 class DynamicSGroup
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/InternalHeaderCheck.h b/libs/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/InternalHeaderCheck.h
new file mode 100644
index 0000000..b0affe6
--- /dev/null
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/InternalHeaderCheck.h
@@ -0,0 +1,3 @@
+#ifndef EIGEN_CXX11_TENSORSYMMETRY_MODULE_H
+#error "Please include unsupported/Eigen/CXX11/TensorSymmetry instead of including headers inside the src directory directly."
+#endif
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/StaticSymmetry.h b/libs/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/StaticSymmetry.h
index 942293b..e87403a 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/StaticSymmetry.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/StaticSymmetry.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_CXX11_TENSORSYMMETRY_STATICSYMMETRY_H
 #define EIGEN_CXX11_TENSORSYMMETRY_STATICSYMMETRY_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/Symmetry.h b/libs/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/Symmetry.h
index 879d6cd..a95f79c 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/Symmetry.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/Symmetry.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_CXX11_TENSORSYMMETRY_SYMMETRY_H
 #define EIGEN_CXX11_TENSORSYMMETRY_SYMMETRY_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 enum {
@@ -237,11 +239,11 @@ struct tensor_symmetry_pre_analysis<NumIndices, Gen_, Gens_...>
   typedef tensor_static_symgroup_if<(sizeof...(Gens_) + 1 <= max_static_generators), NumIndices, Gen_, Gens_...> helper;
   constexpr static std::size_t possible_size = helper::size;
 
-  typedef typename conditional<
+  typedef std::conditional_t<
     possible_size == 0 || possible_size >= max_static_elements,
     DynamicSGroupFromTemplateArgs<Gen_, Gens_...>,
     typename helper::type
-  >::type root_type;
+  > root_type;
 };
 
 template<bool instantiate, std::size_t NumIndices, typename... Gens>
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/util/TemplateGroupTheory.h b/libs/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/util/TemplateGroupTheory.h
index 54bf9db..c27d1ed 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/util/TemplateGroupTheory.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/util/TemplateGroupTheory.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_CXX11_TENSORSYMMETRY_TEMPLATEGROUPTHEORY_H
 #define EIGEN_CXX11_TENSORSYMMETRY_TEMPLATEGROUPTHEORY_H
 
+#include "../InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
@@ -126,11 +128,11 @@ template<
 >
 struct strip_identities<Equality, id, type_list<t, ts...>>
 {
-  typedef typename conditional<
+  typedef std::conditional_t<
     Equality<id, t>::value,
     typename strip_identities<Equality, id, type_list<ts...>>::type,
     typename concat<type_list<t>, typename strip_identities<Equality, id, type_list<ts...>>::type>::type
-  >::type type;
+  > type;
   constexpr static int global_flags = Equality<id, t>::global_flags | strip_identities<Equality, id, type_list<ts...>>::global_flags;
 };
 
@@ -637,21 +639,21 @@ struct enumerate_group_elements_noid<Multiply, Equality, id, type_list<>, initia
   * \tparam Equality      The equality check operation that checks if two group elements
   *                       are equal to another.
   * \tparam id            The identity element
-  * \tparam _generators   A list of (possibly redundant) generators of the group
+  * \tparam Generators_   A list of (possibly redundant) generators of the group
   */
 template<
   template<typename, typename> class Multiply,
   template<typename, typename> class Equality,
   typename id,
-  typename _generators
+  typename Generators_
 >
 struct enumerate_group_elements
   : public enumerate_group_elements_noid<
       Multiply,
       Equality,
       id,
-      typename strip_identities<Equality, id, _generators>::type,
-      strip_identities<Equality, id, _generators>::global_flags
+      typename strip_identities<Equality, id, Generators_>::type,
+      strip_identities<Equality, id, Generators_>::global_flags
     >
 {
 };
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/ThreadPool/Barrier.h b/libs/eigen/unsupported/Eigen/CXX11/src/ThreadPool/Barrier.h
index e4c59dc..70ad31c 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/ThreadPool/Barrier.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/ThreadPool/Barrier.h
@@ -13,6 +13,8 @@
 #ifndef EIGEN_CXX11_THREADPOOL_BARRIER_H
 #define EIGEN_CXX11_THREADPOOL_BARRIER_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 class Barrier {
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/ThreadPool/EventCount.h b/libs/eigen/unsupported/Eigen/CXX11/src/ThreadPool/EventCount.h
index 4549aa0..8fdf70e 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/ThreadPool/EventCount.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/ThreadPool/EventCount.h
@@ -7,8 +7,10 @@
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
-#ifndef EIGEN_CXX11_THREADPOOL_EVENTCOUNT_H_
-#define EIGEN_CXX11_THREADPOOL_EVENTCOUNT_H_
+#ifndef EIGEN_CXX11_THREADPOOL_EVENTCOUNT_H
+#define EIGEN_CXX11_THREADPOOL_EVENTCOUNT_H
+
+#include "./InternalHeaderCheck.h"
 
 namespace Eigen {
 
@@ -85,7 +87,7 @@ class EventCount {
       CheckState(state, true);
       uint64_t newstate;
       if ((state & kSignalMask) != 0) {
-        // Consume the signal and return immidiately.
+        // Consume the signal and return immediately.
         newstate = state - kWaiterInc - kSignalInc;
       } else {
         // Remove this thread from pre-wait counter and add to the waiter stack.
@@ -112,7 +114,7 @@ class EventCount {
       CheckState(state, true);
       uint64_t newstate = state - kWaiterInc;
       // We don't know if the thread was also notified or not,
-      // so we should not consume a signal unconditionaly.
+      // so we should not consume a signal unconditionally.
       // Only if number of waiters is equal to number of signals,
       // we know that the thread was notified and we must take away the signal.
       if (((state & kWaiterMask) >> kWaiterShift) ==
@@ -246,4 +248,4 @@ class EventCount {
 
 }  // namespace Eigen
 
-#endif  // EIGEN_CXX11_THREADPOOL_EVENTCOUNT_H_
+#endif  // EIGEN_CXX11_THREADPOOL_EVENTCOUNT_H
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/ThreadPool/InternalHeaderCheck.h b/libs/eigen/unsupported/Eigen/CXX11/src/ThreadPool/InternalHeaderCheck.h
new file mode 100644
index 0000000..82a89a9
--- /dev/null
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/ThreadPool/InternalHeaderCheck.h
@@ -0,0 +1,3 @@
+#ifndef EIGEN_CXX11_THREADPOOL_MODULE_H
+#error "Please include unsupported/Eigen/CXX11/ThreadPool instead of including headers inside the src directory directly."
+#endif
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h b/libs/eigen/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h
index 23a2b54..390ba40 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_CXX11_THREADPOOL_NONBLOCKING_THREAD_POOL_H
 #define EIGEN_CXX11_THREADPOOL_NONBLOCKING_THREAD_POOL_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 template <typename Environment>
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h b/libs/eigen/unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h
index b572ebc..f99ce1a 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h
@@ -7,8 +7,10 @@
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
-#ifndef EIGEN_CXX11_THREADPOOL_RUNQUEUE_H_
-#define EIGEN_CXX11_THREADPOOL_RUNQUEUE_H_
+#ifndef EIGEN_CXX11_THREADPOOL_RUNQUEUE_H
+#define EIGEN_CXX11_THREADPOOL_RUNQUEUE_H
+
+#include "./InternalHeaderCheck.h"
 
 namespace Eigen {
 
@@ -233,4 +235,4 @@ class RunQueue {
 
 }  // namespace Eigen
 
-#endif  // EIGEN_CXX11_THREADPOOL_RUNQUEUE_H_
+#endif  // EIGEN_CXX11_THREADPOOL_RUNQUEUE_H
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadEnvironment.h b/libs/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadEnvironment.h
index d94a064..02ec366 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadEnvironment.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadEnvironment.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_CXX11_THREADPOOL_THREAD_ENVIRONMENT_H
 #define EIGEN_CXX11_THREADPOOL_THREAD_ENVIRONMENT_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 struct StlThreadEnvironment {
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadLocal.h b/libs/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadLocal.h
index 4e68474..aff93ce 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadLocal.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadLocal.h
@@ -18,10 +18,7 @@
 
 #else
 
-#if EIGEN_MAX_CPP_VER >= 11 &&                         \
-    ((EIGEN_COMP_GNUC && EIGEN_GNUC_AT_LEAST(4, 8)) || \
-     __has_feature(cxx_thread_local)                || \
-     (EIGEN_COMP_MSVC >= 1900) )
+#if ((EIGEN_COMP_GNUC) || __has_feature(cxx_thread_local) || EIGEN_COMP_MSVC )
 #define EIGEN_THREAD_LOCAL static thread_local
 #endif
 
@@ -62,6 +59,8 @@
 
 #endif  // EIGEN_AVOID_THREAD_LOCAL
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadPoolInterface.h b/libs/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadPoolInterface.h
index 25030dc..e6750a9 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadPoolInterface.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadPoolInterface.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_CXX11_THREADPOOL_THREAD_POOL_INTERFACE_H
 #define EIGEN_CXX11_THREADPOOL_THREAD_POOL_INTERFACE_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 // This defines an interface that ThreadPoolDevice can take to use
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadYield.h b/libs/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadYield.h
index a859c7b..f556ff6 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadYield.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadYield.h
@@ -11,10 +11,6 @@
 #define EIGEN_CXX11_THREADPOOL_THREAD_YIELD_H
 
 // Try to come up with a portable way to yield
-#if EIGEN_COMP_GNUC && EIGEN_GNUC_AT_MOST(4, 7)
-#define EIGEN_THREAD_YIELD() sched_yield()
-#else
 #define EIGEN_THREAD_YIELD() std::this_thread::yield()
-#endif
 
 #endif  // EIGEN_CXX11_THREADPOOL_THREAD_YIELD_H
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/util/CXX11Meta.h b/libs/eigen/unsupported/Eigen/CXX11/src/util/CXX11Meta.h
index 149ceaf..bcf847e 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/util/CXX11Meta.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/util/CXX11Meta.h
@@ -35,7 +35,8 @@ template<typename T, T... nn>
 struct numeric_list { constexpr static std::size_t count = sizeof...(nn); };
 
 template<typename T, T n, T... nn>
-struct numeric_list<T, n, nn...> { static const std::size_t count = sizeof...(nn) + 1; const static T first_value = n; };
+struct numeric_list<T, n, nn...> { static constexpr std::size_t count = sizeof...(nn) + 1;
+                                   static constexpr T first_value = n; };
 
 #ifndef EIGEN_PARSED_BY_DOXYGEN
 /* numeric list constructors
@@ -81,7 +82,8 @@ template<typename a, typename... as>        struct take<0, type_list<a, as...>>
 template<>                                  struct take<0, type_list<>>         { typedef type_list<> type; };
 
 template<typename T, int n, T a, T... as> struct take<n, numeric_list<T, a, as...>> : concat<numeric_list<T, a>, typename take<n-1, numeric_list<T, as...>>::type> {};
-template<typename T, int n>               struct take<n, numeric_list<T>>           { typedef numeric_list<T> type; };
+// XXX The following breaks in gcc-11, and is invalid anyways.
+// template<typename T, int n>               struct take<n, numeric_list<T>>           { typedef numeric_list<T> type; };
 template<typename T, T a, T... as>        struct take<0, numeric_list<T, a, as...>> { typedef numeric_list<T> type; };
 template<typename T>                      struct take<0, numeric_list<T>>           { typedef numeric_list<T> type; };
 
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/util/CXX11Workarounds.h b/libs/eigen/unsupported/Eigen/CXX11/src/util/CXX11Workarounds.h
index 056736c..386e390 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/util/CXX11Workarounds.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/util/CXX11Workarounds.h
@@ -27,18 +27,6 @@
 #error GNU C++ Compiler (g++) only supports required C++ features since version 4.6.
 #endif
 
-/* Check that the compiler at least claims to support C++11. It might not be sufficient
- * because the compiler may not implement it correctly, but at least we'll know.
- * On the other hand, visual studio still doesn't claim to support C++11 although it's
- * compliant enugh for our purpose.
- */
-#if (EIGEN_COMP_CXXVER < 11)
-#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER)
-#pragma GCC diagnostic error "-Wfatal-errors"
-#endif
-#error This library needs at least a C++11 compliant compiler. If you use g++/clang, please enable the -std=c++11 compiler flag. (-std=c++0x on older versions.)
-#endif
-
 namespace Eigen {
 
 namespace internal {
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/util/EmulateArray.h b/libs/eigen/unsupported/Eigen/CXX11/src/util/EmulateArray.h
index 834b20b..a4b1d0c 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/util/EmulateArray.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/util/EmulateArray.h
@@ -10,16 +10,43 @@
 #ifndef EIGEN_EMULATE_ARRAY_H
 #define EIGEN_EMULATE_ARRAY_H
 
-
-
-// The array class is only available starting with cxx11. Emulate our own here
-// if needed. Beware, msvc still doesn't advertise itself as a c++11 compiler!
-// Moreover, CUDA doesn't support the STL containers, so we use our own instead.
-#if (__cplusplus <= 199711L && EIGEN_COMP_MSVC < 1900) || defined(EIGEN_GPUCC) || defined(EIGEN_AVOID_STL_ARRAY)
+// CUDA doesn't support the STL containers, so we use our own instead.
+#if defined(EIGEN_GPUCC) || defined(EIGEN_AVOID_STL_ARRAY)
 
 namespace Eigen {
 template <typename T, size_t n> class array {
+
  public:
+  typedef T value_type;
+  typedef T* iterator;
+  typedef const T* const_iterator;
+
+  EIGEN_DEVICE_FUNC
+  EIGEN_STRONG_INLINE iterator begin() { return values; }
+  EIGEN_DEVICE_FUNC
+  EIGEN_STRONG_INLINE const_iterator begin() const { return values; }
+
+  EIGEN_DEVICE_FUNC
+  EIGEN_STRONG_INLINE iterator end() { return values + n; }
+  EIGEN_DEVICE_FUNC
+  EIGEN_STRONG_INLINE const_iterator end() const { return values + n; }
+
+
+#if !defined(EIGEN_GPUCC)
+  typedef std::reverse_iterator<iterator> reverse_iterator;
+  typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
+
+  EIGEN_DEVICE_FUNC
+  EIGEN_STRONG_INLINE reverse_iterator rbegin() { return reverse_iterator(end());}
+  EIGEN_DEVICE_FUNC
+  EIGEN_STRONG_INLINE const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); }
+
+  EIGEN_DEVICE_FUNC
+  EIGEN_STRONG_INLINE reverse_iterator rend() { return reverse_iterator(begin()); }
+  EIGEN_DEVICE_FUNC
+  EIGEN_STRONG_INLINE const_reverse_iterator rend() const { return const_reverse_iterator(begin()); }
+#endif
+
   EIGEN_DEVICE_FUNC
   EIGEN_STRONG_INLINE T& operator[] (size_t index) { eigen_internal_assert(index < size()); return values[index]; }
   EIGEN_DEVICE_FUNC
@@ -40,6 +67,7 @@ template <typename T, size_t n> class array {
   EIGEN_DEVICE_FUNC
   EIGEN_STRONG_INLINE const T& back() const { return values[n-1]; }
 
+
   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
   static std::size_t size() { return n; }
 
@@ -122,13 +150,11 @@ template <typename T, size_t n> class array {
     values[7] = v8;
   }
 
-#if EIGEN_HAS_VARIADIC_TEMPLATES
   EIGEN_DEVICE_FUNC
   EIGEN_STRONG_INLINE array(std::initializer_list<T> l) {
     eigen_assert(l.size() == n);
     internal::smart_copy(l.begin(), l.end(), values);
   }
-#endif
 };
 
 
@@ -172,12 +198,10 @@ template <typename T> class array<T, 0> {
   EIGEN_DEVICE_FUNC
   EIGEN_STRONG_INLINE array() : dummy() { }
 
-#if EIGEN_HAS_VARIADIC_TEMPLATES
   EIGEN_DEVICE_FUNC array(std::initializer_list<T> l) : dummy() {
     EIGEN_UNUSED_VARIABLE(l);
     eigen_assert(l.size() == 0);
   }
-#endif
 
  private:
   T dummy;
@@ -226,6 +250,7 @@ template<class T, std::size_t N> struct array_size<const array<T,N>& > {
 
 // The compiler supports c++11, and we're not targeting cuda: use std::array as Eigen::array
 #include <array>
+
 namespace Eigen {
 
 template <typename T, std::size_t N> using array = std::array<T, N>;
diff --git a/libs/eigen/unsupported/Eigen/CXX11/src/util/MaxSizeVector.h b/libs/eigen/unsupported/Eigen/CXX11/src/util/MaxSizeVector.h
index 277ab14..ca0e3d1 100644
--- a/libs/eigen/unsupported/Eigen/CXX11/src/util/MaxSizeVector.h
+++ b/libs/eigen/unsupported/Eigen/CXX11/src/util/MaxSizeVector.h
@@ -29,7 +29,7 @@ namespace Eigen {
   */
 template <typename T>
 class MaxSizeVector {
-  static const size_t alignment = EIGEN_PLAIN_ENUM_MAX(EIGEN_ALIGNOF(T), sizeof(void*));
+  static const size_t alignment = internal::plain_enum_max(EIGEN_ALIGNOF(T), sizeof(void*));
  public:
   // Construct a new MaxSizeVector, reserve n elements.
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
diff --git a/libs/eigen/unsupported/Eigen/FFT b/libs/eigen/unsupported/Eigen/FFT
index c8c311a..b929e84 100644
--- a/libs/eigen/unsupported/Eigen/FFT
+++ b/libs/eigen/unsupported/Eigen/FFT
@@ -7,8 +7,8 @@
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
-#ifndef EIGEN_FFT_H
-#define EIGEN_FFT_H
+#ifndef EIGEN_FFT_MODULE_H
+#define EIGEN_FFT_MODULE_H
 
 #include <complex>
 #include <vector>
@@ -29,10 +29,19 @@
   * The default implementation is based on kissfft. It is a small, free, and
   * reasonably efficient default.
   *
-  * There are currently two implementation backend:
+  * There are currently four implementation backend:
   *
+  * - kissfft(https://github.com/mborgerding/kissfft) : Simple and not so fast, BSD-3-Clause.
+  *   It is a mixed-radix Fast Fourier Transform based up on the principle, "Keep It Simple, Stupid."
+  *   Notice that:kissfft fails to handle "atypically-sized" inputs(i.e., sizes with large factors),a workaround is using fftw or pocketfft.
   * - fftw (http://www.fftw.org) : faster, GPL -- incompatible with Eigen in LGPL form, bigger code size.
-  * - MKL (http://en.wikipedia.org/wiki/Math_Kernel_Library) : fastest, commercial -- may be incompatible with Eigen in GPL form.
+  * - MKL (https://www.intel.com/content/www/us/en/developer/tools/oneapi/onemkl-download.html) : fastest, free -- may be incompatible with Eigen in GPL form.
+  * - pocketfft (https://gitlab.mpcdf.mpg.de/mtr/pocketfft) : faster than kissfft, BSD 3-clause.
+  *   It is a heavily modified implementation of FFTPack, with the following advantages:
+  *   1.strictly C++11 compliant
+  *   2.more accurate twiddle factor computation
+  *   3.very fast plan generation
+  *   4.worst case complexity for transform sizes with large prime factors is N*log(N), because Bluestein's algorithm is used for these cases
   *
   * \section FFTDesign Design
   *
@@ -79,15 +88,21 @@
      template <typename T> struct default_fft_impl : public internal::fftw_impl<T> {};
    }
 #elif defined EIGEN_MKL_DEFAULT
-// TODO 
-// intel Math Kernel Library: fastest, commercial -- may be incompatible with Eigen in GPL form
+// intel Math Kernel Library: fastest, free -- may be incompatible with Eigen in GPL form
 #  include "src/FFT/ei_imklfft_impl.h"
    namespace Eigen {
-     template <typename T> struct default_fft_impl : public internal::imklfft_impl {};
+     template <typename T> struct default_fft_impl : public internal::imklfft::imklfft_impl<T> {};
    }
-#else
+#elif defined EIGEN_POCKETFFT_DEFAULT
+// internal::pocketfft_impl: a heavily modified implementation of FFTPack, with many advantages.
+# include<pocketfft_hdronly.h>
+# include"src/FFT/ei_pocketfft_impl.h"
+  namespace Eigen {
+     template <typename T>
+      struct default_fft_impl : public internal::pocketfft_impl<T> {};
+  }
+#else 
 // internal::kissfft_impl:  small, free, reasonably efficient default, derived from kissfft
-//
 # include "src/FFT/ei_kissfft_impl.h"
   namespace Eigen {
      template <typename T> 
@@ -195,19 +210,19 @@ class FFT
         m_impl.fwd(dst,src,static_cast<int>(nfft));
     }
 
-    /*
+#if defined EIGEN_FFTW_DEFAULT || defined EIGEN_POCKETFFT_DEFAULT || defined EIGEN_MKL_DEFAULT
     inline 
     void fwd2(Complex * dst, const Complex * src, int n0,int n1)
     {
       m_impl.fwd2(dst,src,n0,n1);
     }
-    */
+#endif
 
-    template <typename _Input>
+    template <typename Input_>
     inline
-    void fwd( std::vector<Complex> & dst, const std::vector<_Input> & src) 
+    void fwd( std::vector<Complex> & dst, const std::vector<Input_> & src)
     {
-      if ( NumTraits<_Input>::IsComplex == 0 && HasFlag(HalfSpectrum) )
+      if ( NumTraits<Input_>::IsComplex == 0 && HasFlag(HalfSpectrum) )
         dst.resize( (src.size()>>1)+1); // half the bins + Nyquist bin
       else
         dst.resize(src.size());
@@ -343,19 +358,18 @@ class FFT
       }
     }
 
-    template <typename _Output>
+    template <typename Output_>
     inline
-    void inv( std::vector<_Output> & dst, const std::vector<Complex> & src,Index nfft=-1)
+    void inv( std::vector<Output_> & dst, const std::vector<Complex> & src,Index nfft=-1)
     {
       if (nfft<1)
-        nfft = ( NumTraits<_Output>::IsComplex == 0 && HasFlag(HalfSpectrum) ) ? 2*(src.size()-1) : src.size();
+        nfft = ( NumTraits<Output_>::IsComplex == 0 && HasFlag(HalfSpectrum) ) ? 2*(src.size()-1) : src.size();
       dst.resize( nfft );
       inv( &dst[0],&src[0],nfft);
     }
 
 
-    /*
-    // TODO: multi-dimensional FFTs
+#if defined EIGEN_FFTW_DEFAULT || defined EIGEN_POCKETFFT_DEFAULT || defined EIGEN_MKL_DEFAULT
     inline 
     void inv2(Complex * dst, const Complex * src, int n0,int n1)
     {
@@ -363,7 +377,8 @@ class FFT
       if ( HasFlag( Unscaled ) == false)
           scale(dst,1./(n0*n1),n0*n1);
     }
-  */
+#endif
+
 
     inline
     impl_type & impl() {return m_impl;}
diff --git a/libs/eigen/unsupported/Eigen/IterativeSolvers b/libs/eigen/unsupported/Eigen/IterativeSolvers
index a3f58d6..8f6336f 100644
--- a/libs/eigen/unsupported/Eigen/IterativeSolvers
+++ b/libs/eigen/unsupported/Eigen/IterativeSolvers
@@ -16,15 +16,61 @@
 
 
 /**
-  * \defgroup IterativeLinearSolvers_Module Iterative solvers module
+  * \defgroup IterativeLinearSolvers_Module Iterative Solvers module
   * This module aims to provide various iterative linear and non linear solver algorithms.
   * It currently provides:
   *  - a constrained conjugate gradient
   *  - a Householder GMRES implementation
   *  - an IDR(s) implementation
+  *  - a BiCGSTAB(L) implementation
   *  - a DGMRES implementation
   *  - a MINRES implementation
+  *  - a IDRSTABL implementation
   *
+  * Choosing the best solver for solving \c A \c x = \c b depends a lot on the preconditioner chosen as well as the properties of \c A. The following flowchart might help you.
+  * \dot width=50%
+  * digraph g {
+* node [ fontname=Arial, fontsize=11];
+* edge [ fontname=Helvetica, fontsize=10 ];
+*	A1[label="hermitian",shape="box"];
+* A2[label="positive definite",shape="box"];
+* CG[shape="plaintext"];
+* A3[label="ill conditioned",shape="box"];
+* A4[label="good preconditioner",shape="box"];
+* A5[label="flexible preconditioner",shape="box"];
+* A6[label="strongly indefinite",shape="box"];
+* A8[label="large imaginary eigenvalue",shape="box"];
+* A7[label="large imaginary eigenvalue",shape="box"];
+*
+* SYMMLQ[shape="plaintext"];
+* MINRES[shape="plaintext"];
+* GCR[shape="plaintext"];
+* GMRES[shape="plaintext"];
+* IDRSTABL[shape="plaintext"];
+* IDRS[shape="plaintext"];
+* BICGSTABL[shape="plaintext"];
+* BICGSTAB[shape="plaintext"];
+*	
+*	A1 -> A2 [label="yes"];
+*	A2 -> CG [label="yes"];
+*	A2 -> A3 [label="no"];
+*	A3 -> SYMMLQ [label="yes"];
+*	A3 -> MINRES [label="no"];
+*
+*	A1 -> A4 [label="no"];
+*	A4 -> A5 [label="yes"];
+*	A5 -> GCR [label="yes"];
+*	A5 -> GMRES [label="no"];
+*
+*	A4 -> A6 [label="no"];
+*	A6 -> A8 [label="yes"];
+*	A6 -> A7 [label="no"];
+*	A7 -> BICGSTABL [label="yes"];
+*	A7 -> BICGSTAB [label="no"];
+*	A8 -> IDRSTABL [label="yes"];
+*	A8 -> IDRS [label="no"];
+* }
+  * \enddot
   * \code
   * #include <unsupported/Eigen/IterativeSolvers>
   * \endcode
@@ -41,9 +87,10 @@
 #include "src/IterativeSolvers/IncompleteLU.h"
 #include "src/IterativeSolvers/GMRES.h"
 #include "src/IterativeSolvers/DGMRES.h"
-//#include "src/IterativeSolvers/SSORPreconditioner.h"
 #include "src/IterativeSolvers/MINRES.h"
 #include "src/IterativeSolvers/IDRS.h"
+#include "src/IterativeSolvers/BiCGSTABL.h"
+#include "src/IterativeSolvers/IDRSTABL.h"
 
 #include "../../Eigen/src/Core/util/ReenableStupidWarnings.h"
 
diff --git a/libs/eigen/unsupported/Eigen/KroneckerProduct b/libs/eigen/unsupported/Eigen/KroneckerProduct
index 5f5afb8..9643ae2 100644
--- a/libs/eigen/unsupported/Eigen/KroneckerProduct
+++ b/libs/eigen/unsupported/Eigen/KroneckerProduct
@@ -10,10 +10,7 @@
 #define EIGEN_KRONECKER_PRODUCT_MODULE_H
 
 #include "../../Eigen/Core"
-
-#include "../../Eigen/src/Core/util/DisableStupidWarnings.h"
-
-#include "../../Eigen/src/SparseCore/SparseUtil.h"
+#include "../../Eigen/SparseCore"
 
 namespace Eigen {
 
diff --git a/libs/eigen/unsupported/Eigen/LevenbergMarquardt b/libs/eigen/unsupported/Eigen/LevenbergMarquardt
index 1090505..b5ace56 100644
--- a/libs/eigen/unsupported/Eigen/LevenbergMarquardt
+++ b/libs/eigen/unsupported/Eigen/LevenbergMarquardt
@@ -7,8 +7,8 @@
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
-#ifndef EIGEN_LEVENBERGMARQUARDT_MODULE
-#define EIGEN_LEVENBERGMARQUARDT_MODULE
+#ifndef EIGEN_LEVENBERGMARQUARDT_MODULE_H
+#define EIGEN_LEVENBERGMARQUARDT_MODULE_H
 
 // #include <vector>
 
@@ -46,4 +46,4 @@
 
 #include "../../Eigen/src/Core/util/ReenableStupidWarnings.h"
 
-#endif // EIGEN_LEVENBERGMARQUARDT_MODULE
+#endif // EIGEN_LEVENBERGMARQUARDT_MODULE_H
diff --git a/libs/eigen/unsupported/Eigen/MatrixFunctions b/libs/eigen/unsupported/Eigen/MatrixFunctions
index 20c23d1..dddedb4 100644
--- a/libs/eigen/unsupported/Eigen/MatrixFunctions
+++ b/libs/eigen/unsupported/Eigen/MatrixFunctions
@@ -8,8 +8,8 @@
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
-#ifndef EIGEN_MATRIX_FUNCTIONS
-#define EIGEN_MATRIX_FUNCTIONS
+#ifndef EIGEN_MATRIX_FUNCTIONS_MODULE_H
+#define EIGEN_MATRIX_FUNCTIONS_MODULE_H
 
 #include <cfloat>
 #include <list>
@@ -500,5 +500,4 @@ Output: \verbinclude MatrixSquareRoot.out
 
 */
 
-#endif // EIGEN_MATRIX_FUNCTIONS
-
+#endif // EIGEN_MATRIX_FUNCTIONS_MODULE_H
diff --git a/libs/eigen/unsupported/Eigen/NNLS b/libs/eigen/unsupported/Eigen/NNLS
new file mode 100644
index 0000000..2804508
--- /dev/null
+++ b/libs/eigen/unsupported/Eigen/NNLS
@@ -0,0 +1,388 @@
+/* Non-Negagive Least Squares Algorithm for Eigen.
+ *
+ * Copyright (C) 2021 Essex Edwards, <essex.edwards@gmail.com>
+ * Copyright (C) 2013 Hannes Matuschek, hannes.matuschek at uni-potsdam.de
+ *
+ * This Source Code Form is subject to the terms of the Mozilla
+ * Public License v. 2.0. If a copy of the MPL was not distributed
+ * with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+/** \defgroup nnls Non-Negative Least Squares (NNLS) Module
+ * This module provides a single class @c Eigen::NNLS implementing the NNLS algorithm.
+ * The algorithm is described in "SOLVING LEAST SQUARES PROBLEMS", by Charles L. Lawson and
+ * Richard J. Hanson, Prentice-Hall, 1974 and solves optimization problems of the form
+ *
+ * \f[ \min \left\Vert Ax-b\right\Vert_2^2\quad s.t.\, x\ge 0\,.\f]
+ *
+ * The algorithm solves the constrained least-squares problem above by iteratively improving
+ * an estimate of which constraints are active (elements of \f$x\f$ equal to zero)
+ * and which constraints are inactive (elements of \f$x\f$ greater than zero).
+ * Each iteration, an unconstrained linear least-squares problem solves for the
+ * components of \f$x\f$ in the (estimated) inactive set and the sets are updated.
+ * The unconstrained problem minimizes \f$\left\Vert A^Nx^N-b\right\Vert_2^2\f$,
+ * where \f$A^N\f$ is a matrix formed by selecting all columns of A which are
+ * in the inactive set \f$N\f$.
+ *
+ */
+
+#ifndef EIGEN_NNLS_H
+#define EIGEN_NNLS_H
+
+#include "../../Eigen/Core"
+#include "../../Eigen/QR"
+
+#include <limits>
+
+namespace Eigen {
+
+/** \ingroup nnls
+ * \class NNLS
+ * \brief Implementation of the Non-Negative Least Squares (NNLS) algorithm.
+ * \tparam MatrixType The type of the system matrix \f$A\f$.
+ *
+ * This class implements the NNLS algorithm as described in "SOLVING LEAST SQUARES PROBLEMS",
+ * Charles L. Lawson and Richard J. Hanson, Prentice-Hall, 1974. This algorithm solves a least
+ * squares problem iteratively and ensures that the solution is non-negative. I.e.
+ *
+ * \f[ \min \left\Vert Ax-b\right\Vert_2^2\quad s.t.\, x\ge 0 \f]
+ *
+ * The algorithm solves the constrained least-squares problem above by iteratively improving
+ * an estimate of which constraints are active (elements of \f$x\f$ equal to zero)
+ * and which constraints are inactive (elements of \f$x\f$ greater than zero).
+ * Each iteration, an unconstrained linear least-squares problem solves for the
+ * components of \f$x\f$ in the (estimated) inactive set and the sets are updated.
+ * The unconstrained problem minimizes \f$\left\Vert A^Nx^N-b\right\Vert_2^2\f$,
+ * where \f$A^N\f$ is a matrix formed by selecting all columns of A which are
+ * in the inactive set \f$N\f$.
+ *
+ * See <a href="https://en.wikipedia.org/wiki/Non-negative_least_squares">the
+ * wikipedia page on non-negative least squares</a> for more background information.
+ *
+ * \note Please note that it is possible to construct an NNLS problem for which the
+ *       algorithm does not converge. In practice these cases are extremely rare.
+ */
+template <class MatrixType_>
+class NNLS {
+ public:
+  typedef MatrixType_ MatrixType;
+
+  enum {
+    RowsAtCompileTime = MatrixType::RowsAtCompileTime,
+    ColsAtCompileTime = MatrixType::ColsAtCompileTime,
+    Options = MatrixType::Options,
+    MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
+    MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
+  };
+
+  typedef typename MatrixType::Scalar Scalar;
+  typedef typename MatrixType::RealScalar RealScalar;
+  typedef typename MatrixType::Index Index;
+
+  /** Type of a row vector of the system matrix \f$A\f$. */
+  typedef Matrix<Scalar, ColsAtCompileTime, 1> SolutionVectorType;
+  /** Type of a column vector of the system matrix \f$A\f$. */
+  typedef Matrix<Scalar, RowsAtCompileTime, 1> RhsVectorType;
+  typedef Matrix<Index, ColsAtCompileTime, 1> IndicesType;
+
+  /** */
+  NNLS();
+
+  /** \brief Constructs a NNLS sovler and initializes it with the given system matrix @c A.
+   * \param A Specifies the system matrix.
+   * \param max_iter Specifies the maximum number of iterations to solve the system.
+   * \param tol Specifies the precision of the optimum.
+   *        This is an absolute tolerance on the gradient of the Lagrangian, \f$A^T(Ax-b)-\lambda\f$
+   *        (with Lagrange multipliers \f$\lambda\f$).
+   */
+  NNLS(const MatrixType &A, Index max_iter = -1, Scalar tol = NumTraits<Scalar>::dummy_precision());
+
+  /** Initializes the solver with the matrix \a A for further solving NNLS problems.
+   *
+   * This function mostly initializes/computes the preconditioner. In the future
+   * we might, for instance, implement column reordering for faster matrix vector products.
+   */
+  template <typename MatrixDerived>
+  NNLS<MatrixType> &compute(const EigenBase<MatrixDerived> &A);
+
+  /** \brief Solves the NNLS problem.
+   *
+   * The dimension of @c b must be equal to the number of rows of @c A, given to the constructor.
+   *
+   * \returns The approximate solution vector \f$ x \f$. Use info() to determine if the solve was a success or not.
+   * \sa info()
+   */
+  const SolutionVectorType &solve(const RhsVectorType &b);
+
+  /** \brief Returns the solution if a problem was solved.
+   * If not, an uninitialized vector may be returned. */
+  const SolutionVectorType &x() const { return x_; }
+
+  /** \returns the tolerance threshold used by the stopping criteria.
+   * \sa setTolerance()
+   */
+  Scalar tolerance() const { return tolerance_; }
+
+  /** Sets the tolerance threshold used by the stopping criteria.
+   *
+   * This is an absolute tolerance on the gradient of the Lagrangian, \f$A^T(Ax-b)-\lambda\f$
+   * (with Lagrange multipliers \f$\lambda\f$).
+   */
+  NNLS<MatrixType> &setTolerance(const Scalar &tolerance) {
+    tolerance_ = tolerance;
+    return *this;
+  }
+
+  /** \returns the max number of iterations.
+   * It is either the value set by setMaxIterations or, by default, twice the number of columns of the matrix.
+   */
+  Index maxIterations() const { return max_iter_ < 0 ? 2 * A_.cols() : max_iter_; }
+
+  /** Sets the max number of iterations.
+   * Default is twice the number of columns of the matrix.
+   * The algorithm requires at least k iterations to produce a solution vector with k non-zero entries.
+   */
+  NNLS<MatrixType> &setMaxIterations(Index maxIters) {
+    max_iter_ = maxIters;
+    return *this;
+  }
+
+  /** \returns the number of iterations (least-squares solves) performed during the last solve */
+  Index iterations() const { return iterations_; }
+
+  /** \returns Success if the iterations converged, and an error values otherwise. */
+  ComputationInfo info() const { return info_; }
+
+ private:
+  /** \internal Adds the given index @c idx to the inactive set N and updates the QR decomposition of \f$A^N\f$. */
+  void moveToInactiveSet_(Index idx);
+
+  /** \internal Removes the given index idx from the inactive set N and updates the QR decomposition of \f$A^N\f$. */
+  void moveToActiveSet_(Index idx);
+
+  /** \internal Solves the least-squares problem \f$\left\Vert y-A^Nx\right\Vert_2^2\f$. */
+  void solveInactiveSet_(const RhsVectorType &b);
+
+ private:
+  typedef Matrix<Scalar, ColsAtCompileTime, ColsAtCompileTime> MatrixAtAType;
+
+  /** \internal Holds the maximum number of iterations for the NNLS algorithm.
+   *  @c -1 means to use the default value. */
+  Index max_iter_;
+  /** \internal Holds the number of iterations. */
+  Index iterations_;
+  /** \internal Holds success/fail of the last solve. */
+  ComputationInfo info_;
+  /** \internal Size of the inactive set. */
+  Index numInactive_;
+  /** \internal Accuracy of the algorithm w.r.t the optimality of the solution (gradient). */
+  Scalar tolerance_;
+  /** \internal The system matrix, a copy of the one given to the constructor. */
+  MatrixType A_;
+  /** \internal Precomputed product \f$A^TA\f$. */
+  MatrixAtAType AtA_;
+  /** \internal Will hold the solution. */
+  SolutionVectorType x_;
+  /** \internal Will hold the current gradient.\f$A^Tb - A^TAx\f$ */
+  SolutionVectorType gradient_;
+  /** \internal Will hold the partial solution. */
+  SolutionVectorType y_;
+  /** \internal Precomputed product \f$A^Tb\f$. */
+  SolutionVectorType Atb_;
+  /** \internal Holds the current permutation partitioning the active and inactive sets.
+   * The first @c numInactive_ elements form the inactive set and the rest the active set. */
+  IndicesType index_sets_;
+  /** \internal QR decomposition to solve the (inactive) sub system (together with @c qrCoeffs_). */
+  MatrixType QR_;
+  /** \internal QR decomposition to solve the (inactive) sub system (together with @c QR_). */
+  SolutionVectorType qrCoeffs_;
+  /** \internal Some workspace for QR decomposition. */
+  SolutionVectorType tempSolutionVector_;
+  RhsVectorType tempRhsVector_;
+};
+
+/* ********************************************************************************************
+ * Implementation
+ * ******************************************************************************************** */
+
+template <typename MatrixType>
+NNLS<MatrixType>::NNLS()
+    : max_iter_(-1),
+      iterations_(0),
+      info_(ComputationInfo::InvalidInput),
+      numInactive_(0),
+      tolerance_(NumTraits<Scalar>::dummy_precision()) {}
+
+template <typename MatrixType>
+NNLS<MatrixType>::NNLS(const MatrixType &A, Index max_iter, Scalar tol) : max_iter_(max_iter), tolerance_(tol) {
+  compute(A);
+}
+
+template <typename MatrixType>
+template <typename MatrixDerived>
+NNLS<MatrixType> &NNLS<MatrixType>::compute(const EigenBase<MatrixDerived> &A) {
+  // Ensure Scalar type is real. The non-negativity constraint doesn't obviously extend to complex numbers.
+  EIGEN_STATIC_ASSERT(!NumTraits<Scalar>::IsComplex, NUMERIC_TYPE_MUST_BE_REAL);
+
+  // max_iter_: unchanged
+  iterations_ = 0;
+  info_ = ComputationInfo::Success;
+  numInactive_ = 0;
+  // tolerance: unchanged
+  A_ = A.derived();
+  AtA_.noalias() = A_.transpose() * A_;
+  x_.resize(A_.cols());
+  gradient_.resize(A_.cols());
+  y_.resize(A_.cols());
+  Atb_.resize(A_.cols());
+  index_sets_.resize(A_.cols());
+  QR_.resize(A_.rows(), A_.cols());
+  qrCoeffs_.resize(A_.cols());
+  tempSolutionVector_.resize(A_.cols());
+  tempRhsVector_.resize(A_.rows());
+
+  return *this;
+}
+
+template <typename MatrixType>
+const typename NNLS<MatrixType>::SolutionVectorType &NNLS<MatrixType>::solve(const RhsVectorType &b) {
+  // Initialize solver
+  iterations_ = 0;
+  info_ = ComputationInfo::NumericalIssue;
+  x_.setZero();
+
+  index_sets_ = IndicesType::LinSpaced(A_.cols(), 0, A_.cols() - 1);  // Identity permutation.
+  numInactive_ = 0;
+
+  // Precompute A^T*b
+  Atb_.noalias() = A_.transpose() * b;
+
+  const Index maxIterations = this->maxIterations();
+
+  // OUTER LOOP
+  while (true) {
+    // Early exit if all variables are inactive, which breaks 'maxCoeff' below.
+    if (A_.cols() == numInactive_) {
+      info_ = ComputationInfo::Success;
+      return x_;
+    }
+
+    // Find the maximum element of the gradient in the active set.
+    // If it is small or negative, then we have converged.
+    // Else, we move that variable to the inactive set.
+    gradient_.noalias() = Atb_ - AtA_ * x_;
+
+    const Index numActive = A_.cols() - numInactive_;
+    Index argmaxGradient = -1;
+    const Scalar maxGradient = gradient_(index_sets_.tail(numActive)).maxCoeff(&argmaxGradient);
+    argmaxGradient += numInactive_;  // beacause tail() skipped the first numInactive_ elements
+
+    if (maxGradient < tolerance_) {
+      info_ = ComputationInfo::Success;
+      return x_;
+    }
+
+    moveToInactiveSet_(argmaxGradient);
+
+    // INNER LOOP
+    while (true) {
+      // Check if max. number of iterations is reached
+      if (iterations_ >= maxIterations) {
+        info_ = ComputationInfo::NoConvergence;
+        return x_;
+      }
+
+      // Solve least-squares problem in inactive set only,
+      // this step is rather trivial as moveToInactiveSet_ & moveToActiveSet_
+      // updates the QR decomposition of inactive columns A^N.
+      // solveInactiveSet_ puts the solution in y_
+      solveInactiveSet_(b);
+      ++iterations_;  // The solve is expensive, so that is what we count as an iteration.
+
+      // Check feasability...
+      bool feasible = true;
+      Scalar alpha = NumTraits<Scalar>::highest();
+      Index infeasibleIdx = -1;  // Which variable became infeasible first.
+      for (Index i = 0; i < numInactive_; i++) {
+        Index idx = index_sets_[i];
+        if (y_(idx) < 0) {
+          // t should always be in [0,1].
+          Scalar t = -x_(idx) / (y_(idx) - x_(idx));
+          if (alpha > t) {
+            alpha = t;
+            infeasibleIdx = i;
+            feasible = false;
+          }
+        }
+      }
+      eigen_assert(feasible || 0 <= infeasibleIdx);
+
+      // If solution is feasible, exit to outer loop
+      if (feasible) {
+        x_ = y_;
+        break;
+      }
+
+      // Infeasible solution -> interpolate to feasible one
+      for (Index i = 0; i < numInactive_; i++) {
+        Index idx = index_sets_[i];
+        x_(idx) += alpha * (y_(idx) - x_(idx));
+      }
+
+      // Remove these indices from the inactive set and update QR decomposition
+      moveToActiveSet_(infeasibleIdx);
+    }
+  }
+}
+
+template <typename MatrixType>
+void NNLS<MatrixType>::moveToInactiveSet_(Index idx) {
+  // Update permutation matrix:
+  std::swap(index_sets_(idx), index_sets_(numInactive_));
+  numInactive_++;
+
+  // Perform rank-1 update of the QR decomposition stored in QR_ & qrCoeff_
+  internal::householder_qr_inplace_update(QR_, qrCoeffs_, A_.col(index_sets_(numInactive_ - 1)), numInactive_ - 1,
+                                          tempSolutionVector_.data());
+}
+
+template <typename MatrixType>
+void NNLS<MatrixType>::moveToActiveSet_(Index idx) {
+  // swap index with last inactive one & reduce number of inactive columns
+  std::swap(index_sets_(idx), index_sets_(numInactive_ - 1));
+  numInactive_--;
+  // Update QR decomposition starting from the removed index up to the end [idx, ..., numInactive_]
+  for (Index i = idx; i < numInactive_; i++) {
+    Index col = index_sets_(i);
+    internal::householder_qr_inplace_update(QR_, qrCoeffs_, A_.col(col), i, tempSolutionVector_.data());
+  }
+}
+
+template <typename MatrixType>
+void NNLS<MatrixType>::solveInactiveSet_(const RhsVectorType &b) {
+  eigen_assert(numInactive_ > 0);
+
+  tempRhsVector_ = b;
+
+  // tmpRHS(0:numInactive_-1) := Q'*b
+  // tmpRHS(numInactive_:end) := useless stuff we would rather not compute at all.
+  tempRhsVector_.applyOnTheLeft(
+      householderSequence(QR_.leftCols(numInactive_), qrCoeffs_.head(numInactive_)).transpose());
+
+  // tempSol(0:numInactive_-1) := inv(R) * Q' * b
+  //  = the least-squares solution for the inactive variables.
+  tempSolutionVector_.head(numInactive_) =            //
+      QR_.topLeftCorner(numInactive_, numInactive_)   //
+          .template triangularView<Upper>()           //
+          .solve(tempRhsVector_.head(numInactive_));  //
+
+  // tempSol(numInactive_:end) := 0 = the value for the constrained variables.
+  tempSolutionVector_.tail(y_.size() - numInactive_).setZero();
+
+  // Back permute into original column order of A
+  y_.noalias() = index_sets_.asPermutation() * tempSolutionVector_.head(y_.size());
+}
+
+}  // namespace Eigen
+
+#endif  // EIGEN_NNLS_H
diff --git a/libs/eigen/unsupported/Eigen/NonLinearOptimization b/libs/eigen/unsupported/Eigen/NonLinearOptimization
index 961f192..6bf566e 100644
--- a/libs/eigen/unsupported/Eigen/NonLinearOptimization
+++ b/libs/eigen/unsupported/Eigen/NonLinearOptimization
@@ -7,8 +7,8 @@
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
-#ifndef EIGEN_NONLINEAROPTIMIZATION_MODULE
-#define EIGEN_NONLINEAROPTIMIZATION_MODULE
+#ifndef EIGEN_NONLINEAROPTIMIZATION_MODULE_H
+#define EIGEN_NONLINEAROPTIMIZATION_MODULE_H
 
 #include <vector>
 
@@ -137,4 +137,4 @@
 #include "src/NonLinearOptimization/LevenbergMarquardt.h"
 
 
-#endif // EIGEN_NONLINEAROPTIMIZATION_MODULE
+#endif // EIGEN_NONLINEAROPTIMIZATION_MODULE_H
diff --git a/libs/eigen/unsupported/Eigen/NumericalDiff b/libs/eigen/unsupported/Eigen/NumericalDiff
index 0668f96..9d6270a 100644
--- a/libs/eigen/unsupported/Eigen/NumericalDiff
+++ b/libs/eigen/unsupported/Eigen/NumericalDiff
@@ -7,8 +7,8 @@
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
-#ifndef EIGEN_NUMERICALDIFF_MODULE
-#define EIGEN_NUMERICALDIFF_MODULE
+#ifndef EIGEN_NUMERICALDIFF_MODULE_H
+#define EIGEN_NUMERICALDIFF_MODULE_H
 
 #include "../../Eigen/Core"
 
@@ -53,4 +53,4 @@ namespace Eigen {
 //@}
 
 
-#endif // EIGEN_NUMERICALDIFF_MODULE
+#endif // EIGEN_NUMERICALDIFF_MODULE_H
diff --git a/libs/eigen/unsupported/Eigen/OpenGLSupport b/libs/eigen/unsupported/Eigen/OpenGLSupport
index f8c2130..2468333 100644
--- a/libs/eigen/unsupported/Eigen/OpenGLSupport
+++ b/libs/eigen/unsupported/Eigen/OpenGLSupport
@@ -7,8 +7,8 @@
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
-#ifndef EIGEN_OPENGL_MODULE
-#define EIGEN_OPENGL_MODULE
+#ifndef EIGEN_OPENGL_MODULE_H
+#define EIGEN_OPENGL_MODULE_H
 
 #include "../../Eigen/Geometry"
 
@@ -319,4 +319,4 @@ EIGEN_GL_FUNC1_SPECIALIZATION_VEC(glUniform,GLint,const,double,       4,4dv_ei)
 
 }
 
-#endif // EIGEN_OPENGL_MODULE
+#endif // EIGEN_OPENGL_MODULE_H
diff --git a/libs/eigen/unsupported/Eigen/SparseExtra b/libs/eigen/unsupported/Eigen/SparseExtra
index ba5cbd6..f4920de 100644
--- a/libs/eigen/unsupported/Eigen/SparseExtra
+++ b/libs/eigen/unsupported/Eigen/SparseExtra
@@ -16,6 +16,7 @@
 
 #include <vector>
 #include <map>
+#include <unordered_map>
 #include <cstdlib>
 #include <cstring>
 #include <algorithm>
@@ -30,17 +31,19 @@
 /**
   * \defgroup SparseExtra_Module SparseExtra module
   *
-  * This module contains some experimental features extending the sparse module.
+  * This module contains some experimental features extending the sparse module:
+  * - A RandomSetter which is a wrapper object allowing to set/update a sparse matrix with random access.
+  * - A SparseInverse which calculates a sparse subset of the inverse of a sparse matrix corresponding to nonzeros of the input
+  * - MatrixMarket format(https://math.nist.gov/MatrixMarket/formats.html) readers and writers for sparse and dense matrices.
   *
   * \code
-  * #include <Eigen/SparseExtra>
+  * #include <unsupported/Eigen/SparseExtra>
   * \endcode
   */
 
 
-#include "src/SparseExtra/DynamicSparseMatrix.h"
-#include "src/SparseExtra/BlockOfDynamicSparseMatrix.h"
 #include "src/SparseExtra/RandomSetter.h"
+#include "src/SparseExtra/SparseInverse.h"
 
 #include "src/SparseExtra/MarketIO.h"
 
diff --git a/libs/eigen/unsupported/Eigen/SpecialFunctions b/libs/eigen/unsupported/Eigen/SpecialFunctions
index f6a2460..41a3631 100644
--- a/libs/eigen/unsupported/Eigen/SpecialFunctions
+++ b/libs/eigen/unsupported/Eigen/SpecialFunctions
@@ -7,8 +7,8 @@
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
-#ifndef EIGEN_SPECIALFUNCTIONS_MODULE
-#define EIGEN_SPECIALFUNCTIONS_MODULE
+#ifndef EIGEN_SPECIALFUNCTIONS_MODULE_H
+#define EIGEN_SPECIALFUNCTIONS_MODULE_H
 
 #include <math.h>
 
@@ -100,4 +100,4 @@ namespace Eigen {
 
 #include "../../Eigen/src/Core/util/ReenableStupidWarnings.h"
 
-#endif // EIGEN_SPECIALFUNCTIONS_MODULE
+#endif // EIGEN_SPECIALFUNCTIONS_MODULE_H
diff --git a/libs/eigen/unsupported/Eigen/src/AutoDiff/AutoDiffJacobian.h b/libs/eigen/unsupported/Eigen/src/AutoDiff/AutoDiffJacobian.h
index 33b6c39..6ef6bf4 100644
--- a/libs/eigen/unsupported/Eigen/src/AutoDiff/AutoDiffJacobian.h
+++ b/libs/eigen/unsupported/Eigen/src/AutoDiff/AutoDiffJacobian.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_AUTODIFF_JACOBIAN_H
 #define EIGEN_AUTODIFF_JACOBIAN_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen
 {
 
@@ -20,17 +22,8 @@ public:
   AutoDiffJacobian(const Functor& f) : Functor(f) {}
 
   // forward constructors
-#if EIGEN_HAS_VARIADIC_TEMPLATES
   template<typename... T>
   AutoDiffJacobian(const T& ...Values) : Functor(Values...) {}
-#else
-  template<typename T0>
-  AutoDiffJacobian(const T0& a0) : Functor(a0) {}
-  template<typename T0, typename T1>
-  AutoDiffJacobian(const T0& a0, const T1& a1) : Functor(a0, a1) {}
-  template<typename T0, typename T1, typename T2>
-  AutoDiffJacobian(const T0& a0, const T1& a1, const T2& a2) : Functor(a0, a1, a2) {}
-#endif
 
   typedef typename Functor::InputType InputType;
   typedef typename Functor::ValueType ValueType;
@@ -50,7 +43,6 @@ public:
   typedef Matrix<ActiveScalar, InputsAtCompileTime, 1> ActiveInput;
   typedef Matrix<ActiveScalar, ValuesAtCompileTime, 1> ActiveValue;
 
-#if EIGEN_HAS_VARIADIC_TEMPLATES
   // Some compilers don't accept variadic parameters after a default parameter,
   // i.e., we can't just write _jac=0 but we need to overload operator():
   EIGEN_STRONG_INLINE
@@ -61,19 +53,12 @@ public:
   template<typename... ParamsType>
   void operator() (const InputType& x, ValueType* v, JacobianType* _jac,
                    const ParamsType&... Params) const
-#else
-  void operator() (const InputType& x, ValueType* v, JacobianType* _jac=0) const
-#endif
   {
     eigen_assert(v!=0);
 
     if (!_jac)
     {
-#if EIGEN_HAS_VARIADIC_TEMPLATES
       Functor::operator()(x, v, Params...);
-#else
-      Functor::operator()(x, v);
-#endif
       return;
     }
 
@@ -89,11 +74,7 @@ public:
     for (Index i=0; i<jac.cols(); i++)
       ax[i].derivatives() = DerivativeType::Unit(x.rows(),i);
 
-#if EIGEN_HAS_VARIADIC_TEMPLATES
     Functor::operator()(ax, &av, Params...);
-#else
-    Functor::operator()(ax, &av);
-#endif
 
     for (Index i=0; i<jac.rows(); i++)
     {
diff --git a/libs/eigen/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h b/libs/eigen/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h
old mode 100755
new mode 100644
index 0f166e3..97222d1
--- a/libs/eigen/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h
+++ b/libs/eigen/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_AUTODIFF_SCALAR_H
 #define EIGEN_AUTODIFF_SCALAR_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
@@ -66,14 +68,14 @@ inline AutoDiffScalar<NewDerType> MakeAutoDiffScalar(const typename NewDerType::
 template<typename DerivativeType>
 class AutoDiffScalar
   : public internal::auto_diff_special_op
-            <DerivativeType, !internal::is_same<typename internal::traits<typename internal::remove_all<DerivativeType>::type>::Scalar,
-                                          typename NumTraits<typename internal::traits<typename internal::remove_all<DerivativeType>::type>::Scalar>::Real>::value>
+            <DerivativeType, !internal::is_same<typename internal::traits<internal::remove_all_t<DerivativeType>>::Scalar,
+                                          typename NumTraits<typename internal::traits<internal::remove_all_t<DerivativeType>>::Scalar>::Real>::value>
 {
   public:
     typedef internal::auto_diff_special_op
-            <DerivativeType, !internal::is_same<typename internal::traits<typename internal::remove_all<DerivativeType>::type>::Scalar,
-                       typename NumTraits<typename internal::traits<typename internal::remove_all<DerivativeType>::type>::Scalar>::Real>::value> Base;
-    typedef typename internal::remove_all<DerivativeType>::type DerType;
+            <DerivativeType, !internal::is_same<typename internal::traits<internal::remove_all_t<DerivativeType>>::Scalar,
+                       typename NumTraits<typename internal::traits<internal::remove_all_t<DerivativeType>>::Scalar>::Real>::value> Base;
+    typedef internal::remove_all_t<DerivativeType> DerType;
     typedef typename internal::traits<DerType>::Scalar Scalar;
     typedef typename NumTraits<Scalar>::Real Real;
 
@@ -108,9 +110,9 @@ class AutoDiffScalar
     template<typename OtherDerType>
     AutoDiffScalar(const AutoDiffScalar<OtherDerType>& other
 #ifndef EIGEN_PARSED_BY_DOXYGEN
-    , typename internal::enable_if<
-            internal::is_same<Scalar, typename internal::traits<typename internal::remove_all<OtherDerType>::type>::Scalar>::value
-        &&  internal::is_convertible<OtherDerType,DerType>::value , void*>::type = 0
+    , std::enable_if_t<
+            internal::is_same<Scalar, typename internal::traits<internal::remove_all_t<OtherDerType>>::Scalar>::value
+        &&  internal::is_convertible<OtherDerType,DerType>::value , void*> = 0
 #endif
     )
       : m_value(other.value()), m_derivatives(other.derivatives())
@@ -178,12 +180,12 @@ class AutoDiffScalar
     template<typename OtherDerType> inline bool operator==(const AutoDiffScalar<OtherDerType>& b) const  { return m_value == b.value(); }
     template<typename OtherDerType> inline bool operator!=(const AutoDiffScalar<OtherDerType>& b) const  { return m_value != b.value(); }
 
-    inline const AutoDiffScalar<DerType&> operator+(const Scalar& other) const
+    inline AutoDiffScalar<DerType&> operator+(const Scalar& other) const
     {
       return AutoDiffScalar<DerType&>(m_value + other, m_derivatives);
     }
 
-    friend inline const AutoDiffScalar<DerType&> operator+(const Scalar& a, const AutoDiffScalar& b)
+    friend inline AutoDiffScalar<DerType&> operator+(const Scalar& a, const AutoDiffScalar& b)
     {
       return AutoDiffScalar<DerType&>(a + b.value(), b.derivatives());
     }
@@ -205,11 +207,11 @@ class AutoDiffScalar
     }
 
     template<typename OtherDerType>
-    inline const AutoDiffScalar<CwiseBinaryOp<internal::scalar_sum_op<Scalar>,const DerType,const typename internal::remove_all<OtherDerType>::type> >
+    inline AutoDiffScalar<CwiseBinaryOp<internal::scalar_sum_op<Scalar>,const DerType,const internal::remove_all_t<OtherDerType>> >
     operator+(const AutoDiffScalar<OtherDerType>& other) const
     {
       internal::make_coherent(m_derivatives, other.derivatives());
-      return AutoDiffScalar<CwiseBinaryOp<internal::scalar_sum_op<Scalar>,const DerType,const typename internal::remove_all<OtherDerType>::type> >(
+      return AutoDiffScalar<CwiseBinaryOp<internal::scalar_sum_op<Scalar>,const DerType,const internal::remove_all_t<OtherDerType>> >(
         m_value + other.value(),
         m_derivatives + other.derivatives());
     }
@@ -222,12 +224,12 @@ class AutoDiffScalar
       return *this;
     }
 
-    inline const AutoDiffScalar<DerType&> operator-(const Scalar& b) const
+    inline AutoDiffScalar<DerType&> operator-(const Scalar& b) const
     {
       return AutoDiffScalar<DerType&>(m_value - b, m_derivatives);
     }
 
-    friend inline const AutoDiffScalar<CwiseUnaryOp<internal::scalar_opposite_op<Scalar>, const DerType> >
+    friend inline AutoDiffScalar<CwiseUnaryOp<internal::scalar_opposite_op<Scalar>, const DerType> >
     operator-(const Scalar& a, const AutoDiffScalar& b)
     {
       return AutoDiffScalar<CwiseUnaryOp<internal::scalar_opposite_op<Scalar>, const DerType> >
@@ -241,11 +243,11 @@ class AutoDiffScalar
     }
 
     template<typename OtherDerType>
-    inline const AutoDiffScalar<CwiseBinaryOp<internal::scalar_difference_op<Scalar>, const DerType,const typename internal::remove_all<OtherDerType>::type> >
+    inline AutoDiffScalar<CwiseBinaryOp<internal::scalar_difference_op<Scalar>, const DerType,const internal::remove_all_t<OtherDerType>> >
     operator-(const AutoDiffScalar<OtherDerType>& other) const
     {
       internal::make_coherent(m_derivatives, other.derivatives());
-      return AutoDiffScalar<CwiseBinaryOp<internal::scalar_difference_op<Scalar>, const DerType,const typename internal::remove_all<OtherDerType>::type> >(
+      return AutoDiffScalar<CwiseBinaryOp<internal::scalar_difference_op<Scalar>, const DerType,const internal::remove_all_t<OtherDerType>> >(
         m_value - other.value(),
         m_derivatives - other.derivatives());
     }
@@ -258,7 +260,7 @@ class AutoDiffScalar
       return *this;
     }
 
-    inline const AutoDiffScalar<CwiseUnaryOp<internal::scalar_opposite_op<Scalar>, const DerType> >
+    inline AutoDiffScalar<CwiseUnaryOp<internal::scalar_opposite_op<Scalar>, const DerType> >
     operator-() const
     {
       return AutoDiffScalar<CwiseUnaryOp<internal::scalar_opposite_op<Scalar>, const DerType> >(
@@ -266,13 +268,13 @@ class AutoDiffScalar
         -m_derivatives);
     }
 
-    inline const AutoDiffScalar<EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(DerType,Scalar,product) >
+    inline AutoDiffScalar<EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(DerType,Scalar,product) >
     operator*(const Scalar& other) const
     {
       return MakeAutoDiffScalar(m_value * other, m_derivatives * other);
     }
 
-    friend inline const AutoDiffScalar<EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(DerType,Scalar,product) >
+    friend inline AutoDiffScalar<EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(DerType,Scalar,product) >
     operator*(const Scalar& other, const AutoDiffScalar& a)
     {
       return MakeAutoDiffScalar(a.value() * other, a.derivatives() * other);
@@ -294,13 +296,13 @@ class AutoDiffScalar
 //         a.derivatives() * other);
 //     }
 
-    inline const AutoDiffScalar<EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(DerType,Scalar,product) >
+    inline AutoDiffScalar<EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(DerType,Scalar,product) >
     operator/(const Scalar& other) const
     {
       return MakeAutoDiffScalar(m_value / other, (m_derivatives * (Scalar(1)/other)));
     }
 
-    friend inline const AutoDiffScalar<EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(DerType,Scalar,product) >
+    friend inline AutoDiffScalar<EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(DerType,Scalar,product) >
     operator/(const Scalar& other, const AutoDiffScalar& a)
     {
       return MakeAutoDiffScalar(other / a.value(), a.derivatives() * (Scalar(-other) / (a.value()*a.value())));
@@ -323,10 +325,10 @@ class AutoDiffScalar
 //     }
 
     template<typename OtherDerType>
-    inline const AutoDiffScalar<EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(
+    inline AutoDiffScalar<EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(
         CwiseBinaryOp<internal::scalar_difference_op<Scalar> EIGEN_COMMA
           const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(DerType,Scalar,product) EIGEN_COMMA
-          const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(typename internal::remove_all<OtherDerType>::type,Scalar,product) >,Scalar,product) >
+          const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(internal::remove_all_t<OtherDerType>,Scalar,product) >,Scalar,product) >
     operator/(const AutoDiffScalar<OtherDerType>& other) const
     {
       internal::make_coherent(m_derivatives, other.derivatives());
@@ -337,9 +339,9 @@ class AutoDiffScalar
     }
 
     template<typename OtherDerType>
-    inline const AutoDiffScalar<CwiseBinaryOp<internal::scalar_sum_op<Scalar>,
+    inline AutoDiffScalar<CwiseBinaryOp<internal::scalar_sum_op<Scalar>,
         const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(DerType,Scalar,product),
-        const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(typename internal::remove_all<OtherDerType>::type,Scalar,product) > >
+        const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(internal::remove_all_t<OtherDerType>,Scalar,product) > >
     operator*(const AutoDiffScalar<OtherDerType>& other) const
     {
       internal::make_coherent(m_derivatives, other.derivatives());
@@ -387,7 +389,7 @@ struct auto_diff_special_op<DerivativeType, true>
 //   : auto_diff_scalar_op<DerivativeType, typename NumTraits<Scalar>::Real,
 //                            is_same<Scalar,typename NumTraits<Scalar>::Real>::value>
 {
-  typedef typename remove_all<DerivativeType>::type DerType;
+  typedef remove_all_t<DerivativeType> DerType;
   typedef typename traits<DerType>::Scalar Scalar;
   typedef typename NumTraits<Scalar>::Real Real;
 
@@ -405,12 +407,12 @@ struct auto_diff_special_op<DerivativeType, true>
   AutoDiffScalar<DerivativeType>& derived() { return *static_cast<AutoDiffScalar<DerivativeType>*>(this); }
 
 
-  inline const AutoDiffScalar<DerType&> operator+(const Real& other) const
+  inline AutoDiffScalar<DerType&> operator+(const Real& other) const
   {
     return AutoDiffScalar<DerType&>(derived().value() + other, derived().derivatives());
   }
 
-  friend inline const AutoDiffScalar<DerType&> operator+(const Real& a, const AutoDiffScalar<DerivativeType>& b)
+  friend inline AutoDiffScalar<DerType&> operator+(const Real& a, const AutoDiffScalar<DerivativeType>& b)
   {
     return AutoDiffScalar<DerType&>(a + b.value(), b.derivatives());
   }
@@ -422,7 +424,7 @@ struct auto_diff_special_op<DerivativeType, true>
   }
 
 
-  inline const AutoDiffScalar<typename CwiseUnaryOp<bind2nd_op<scalar_product_op<Scalar,Real> >, DerType>::Type >
+  inline AutoDiffScalar<typename CwiseUnaryOp<bind2nd_op<scalar_product_op<Scalar,Real> >, DerType>::Type >
   operator*(const Real& other) const
   {
     return AutoDiffScalar<typename CwiseUnaryOp<bind2nd_op<scalar_product_op<Scalar,Real> >, DerType>::Type >(
@@ -430,7 +432,7 @@ struct auto_diff_special_op<DerivativeType, true>
       derived().derivatives() * other);
   }
 
-  friend inline const AutoDiffScalar<typename CwiseUnaryOp<bind1st_op<scalar_product_op<Real,Scalar> >, DerType>::Type >
+  friend inline AutoDiffScalar<typename CwiseUnaryOp<bind1st_op<scalar_product_op<Real,Scalar> >, DerType>::Type >
   operator*(const Real& other, const AutoDiffScalar<DerivativeType>& a)
   {
     return AutoDiffScalar<typename CwiseUnaryOp<bind1st_op<scalar_product_op<Real,Scalar> >, DerType>::Type >(
@@ -556,18 +558,18 @@ struct ScalarBinaryOpTraits<typename DerType::Scalar,AutoDiffScalar<DerType>, Bi
 
 #define EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(FUNC,CODE) \
   template<typename DerType> \
-  inline const Eigen::AutoDiffScalar< \
-  EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(typename Eigen::internal::remove_all<DerType>::type, typename Eigen::internal::traits<typename Eigen::internal::remove_all<DerType>::type>::Scalar, product) > \
+  inline Eigen::AutoDiffScalar< \
+  EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Eigen::internal::remove_all_t<DerType>, typename Eigen::internal::traits<Eigen::internal::remove_all_t<DerType>>::Scalar, product) > \
   FUNC(const Eigen::AutoDiffScalar<DerType>& x) { \
     using namespace Eigen; \
-    typedef typename Eigen::internal::traits<typename Eigen::internal::remove_all<DerType>::type>::Scalar Scalar; \
+    typedef typename Eigen::internal::traits<Eigen::internal::remove_all_t<DerType>>::Scalar Scalar; \
     EIGEN_UNUSED_VARIABLE(sizeof(Scalar)); \
     CODE; \
   }
 
 template<typename DerType>
 struct CleanedUpDerType {
-  typedef AutoDiffScalar<typename Eigen::internal::remove_all<DerType>::type::PlainObject> type;
+  typedef AutoDiffScalar<typename Eigen::internal::remove_all_t<DerType>::PlainObject> type;
 };
 
 template<typename DerType>
@@ -639,9 +641,9 @@ EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(log,
   return Eigen::MakeAutoDiffScalar(log(x.value()),x.derivatives() * (Scalar(1)/x.value()));)
 
 template<typename DerType>
-inline const Eigen::AutoDiffScalar<
-EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(typename internal::remove_all<DerType>::type,typename internal::traits<typename internal::remove_all<DerType>::type>::Scalar,product) >
-pow(const Eigen::AutoDiffScalar<DerType> &x, const typename internal::traits<typename internal::remove_all<DerType>::type>::Scalar &y)
+inline Eigen::AutoDiffScalar<
+EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(internal::remove_all_t<DerType>, typename internal::traits<internal::remove_all_t<DerType>>::Scalar,product) >
+pow(const Eigen::AutoDiffScalar<DerType> &x, const typename internal::traits<internal::remove_all_t<DerType>>::Scalar &y)
 {
   using namespace Eigen;
   using std::pow;
@@ -650,11 +652,11 @@ pow(const Eigen::AutoDiffScalar<DerType> &x, const typename internal::traits<typ
 
 
 template<typename DerTypeA,typename DerTypeB>
-inline const AutoDiffScalar<Matrix<typename internal::traits<typename internal::remove_all<DerTypeA>::type>::Scalar,Dynamic,1> >
+inline AutoDiffScalar<Matrix<typename internal::traits<internal::remove_all_t<DerTypeA>>::Scalar,Dynamic,1> >
 atan2(const AutoDiffScalar<DerTypeA>& a, const AutoDiffScalar<DerTypeB>& b)
 {
   using std::atan2;
-  typedef typename internal::traits<typename internal::remove_all<DerTypeA>::type>::Scalar Scalar;
+  typedef typename internal::traits<internal::remove_all_t<DerTypeA>>::Scalar Scalar;
   typedef AutoDiffScalar<Matrix<Scalar,Dynamic,1> > PlainADS;
   PlainADS ret;
   ret.value() = atan2(a.value(), b.value());
@@ -700,9 +702,9 @@ EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(cosh,
 #undef EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY
 
 template<typename DerType> struct NumTraits<AutoDiffScalar<DerType> >
-  : NumTraits< typename NumTraits<typename internal::remove_all<DerType>::type::Scalar>::Real >
+  : NumTraits< typename NumTraits<typename internal::remove_all_t<DerType>::Scalar>::Real >
 {
-  typedef typename internal::remove_all<DerType>::type DerTypeCleaned;
+  typedef internal::remove_all_t<DerType> DerTypeCleaned;
   typedef AutoDiffScalar<Matrix<typename NumTraits<typename DerTypeCleaned::Scalar>::Real,DerTypeCleaned::RowsAtCompileTime,DerTypeCleaned::ColsAtCompileTime,
                                 0, DerTypeCleaned::MaxRowsAtCompileTime, DerTypeCleaned::MaxColsAtCompileTime> > Real;
   typedef AutoDiffScalar<DerType> NonInteger;
@@ -713,6 +715,23 @@ template<typename DerType> struct NumTraits<AutoDiffScalar<DerType> >
   };
 };
 
+namespace internal {
+template<typename DerivativeType>
+struct is_identically_zero_impl<AutoDiffScalar<DerivativeType>> {
+  static inline bool run(const AutoDiffScalar<DerivativeType>& s)
+  {
+    const DerivativeType& derivatives = s.derivatives();
+    for(int i=0; i<derivatives.size(); ++i)
+    {
+      if(!numext::is_exactly_zero(derivatives[i]))
+      {
+        return false;
+      }
+    }
+    return numext::is_exactly_zero(s.value());
+  }
+};
+}
 }
 
 namespace std {
diff --git a/libs/eigen/unsupported/Eigen/src/AutoDiff/AutoDiffVector.h b/libs/eigen/unsupported/Eigen/src/AutoDiff/AutoDiffVector.h
index 8c2d048..c544422 100644
--- a/libs/eigen/unsupported/Eigen/src/AutoDiff/AutoDiffVector.h
+++ b/libs/eigen/unsupported/Eigen/src/AutoDiff/AutoDiffVector.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_AUTODIFF_VECTOR_H
 #define EIGEN_AUTODIFF_VECTOR_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /* \class AutoDiffScalar
diff --git a/libs/eigen/unsupported/Eigen/src/AutoDiff/InternalHeaderCheck.h b/libs/eigen/unsupported/Eigen/src/AutoDiff/InternalHeaderCheck.h
new file mode 100644
index 0000000..1584337
--- /dev/null
+++ b/libs/eigen/unsupported/Eigen/src/AutoDiff/InternalHeaderCheck.h
@@ -0,0 +1,3 @@
+#ifndef EIGEN_AUTODIFF_MODULE_H
+#error "Please include unsupported/Eigen/AutoDiff instead of including headers inside the src directory directly."
+#endif
diff --git a/libs/eigen/unsupported/Eigen/src/BVH/BVAlgorithms.h b/libs/eigen/unsupported/Eigen/src/BVH/BVAlgorithms.h
index 994c8af..7c83a5f 100644
--- a/libs/eigen/unsupported/Eigen/src/BVH/BVAlgorithms.h
+++ b/libs/eigen/unsupported/Eigen/src/BVH/BVAlgorithms.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_BVALGORITHMS_H
 #define EIGEN_BVALGORITHMS_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
diff --git a/libs/eigen/unsupported/Eigen/src/BVH/InternalHeaderCheck.h b/libs/eigen/unsupported/Eigen/src/BVH/InternalHeaderCheck.h
new file mode 100644
index 0000000..7aade9b
--- /dev/null
+++ b/libs/eigen/unsupported/Eigen/src/BVH/InternalHeaderCheck.h
@@ -0,0 +1,3 @@
+#ifndef EIGEN_BVH_MODULE_H
+#error "Please include unsupported/Eigen/BVH instead of including headers inside the src directory directly."
+#endif
diff --git a/libs/eigen/unsupported/Eigen/src/BVH/KdBVH.h b/libs/eigen/unsupported/Eigen/src/BVH/KdBVH.h
index 2d5b76a..e7d6684 100644
--- a/libs/eigen/unsupported/Eigen/src/BVH/KdBVH.h
+++ b/libs/eigen/unsupported/Eigen/src/BVH/KdBVH.h
@@ -10,6 +10,8 @@
 #ifndef KDBVH_H_INCLUDED
 #define KDBVH_H_INCLUDED
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
@@ -55,23 +57,23 @@ struct get_boxes_helper<ObjectList, VolumeList, int> {
 /** \class KdBVH
  *  \brief A simple bounding volume hierarchy based on AlignedBox
  *
- *  \param _Scalar The underlying scalar type of the bounding boxes
- *  \param _Dim The dimension of the space in which the hierarchy lives
+ *  \param Scalar_ The underlying scalar type of the bounding boxes
+ *  \param Dim_ The dimension of the space in which the hierarchy lives
  *  \param _Object The object type that lives in the hierarchy.  It must have value semantics.  Either bounding_box(_Object) must
- *                 be defined and return an AlignedBox<_Scalar, _Dim> or bounding boxes must be provided to the tree initializer.
+ *                 be defined and return an AlignedBox<Scalar_, Dim_> or bounding boxes must be provided to the tree initializer.
  *
  *  This class provides a simple (as opposed to optimized) implementation of a bounding volume hierarchy analogous to a Kd-tree.
  *  Given a sequence of objects, it computes their bounding boxes, constructs a Kd-tree of their centers
  *  and builds a BVH with the structure of that Kd-tree.  When the elements of the tree are too expensive to be copied around,
  *  it is useful for _Object to be a pointer.
  */
-template<typename _Scalar, int _Dim, typename _Object> class KdBVH
+template<typename Scalar_, int Dim_, typename _Object> class KdBVH
 {
 public:
-  enum { Dim = _Dim };
+  enum { Dim = Dim_ };
   typedef _Object Object;
   typedef std::vector<Object, aligned_allocator<Object> > ObjectList;
-  typedef _Scalar Scalar;
+  typedef Scalar_ Scalar;
   typedef AlignedBox<Scalar, Dim> Volume;
   typedef std::vector<Volume, aligned_allocator<Volume> > VolumeList;
   typedef int Index;
diff --git a/libs/eigen/unsupported/Eigen/src/Eigenvalues/ArpackSelfAdjointEigenSolver.h b/libs/eigen/unsupported/Eigen/src/Eigenvalues/ArpackSelfAdjointEigenSolver.h
index 0fbd847..b74449f 100644
--- a/libs/eigen/unsupported/Eigen/src/Eigenvalues/ArpackSelfAdjointEigenSolver.h
+++ b/libs/eigen/unsupported/Eigen/src/Eigenvalues/ArpackSelfAdjointEigenSolver.h
@@ -12,6 +12,8 @@
 
 #include "../../../../Eigen/Dense"
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
diff --git a/libs/eigen/unsupported/Eigen/src/Eigenvalues/InternalHeaderCheck.h b/libs/eigen/unsupported/Eigen/src/Eigenvalues/InternalHeaderCheck.h
new file mode 100644
index 0000000..c00cb57
--- /dev/null
+++ b/libs/eigen/unsupported/Eigen/src/Eigenvalues/InternalHeaderCheck.h
@@ -0,0 +1,3 @@
+#ifndef EIGEN_EIGENVALUES_MODULE_H
+#error "Please include unsupported/Eigen/Eigenvalues instead of including headers inside the src directory directly."
+#endif
diff --git a/libs/eigen/unsupported/Eigen/src/EulerAngles/EulerAngles.h b/libs/eigen/unsupported/Eigen/src/EulerAngles/EulerAngles.h
index e43cdb7..252be42 100644
--- a/libs/eigen/unsupported/Eigen/src/EulerAngles/EulerAngles.h
+++ b/libs/eigen/unsupported/Eigen/src/EulerAngles/EulerAngles.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_EULERANGLESCLASS_H// TODO: Fix previous "EIGEN_EULERANGLES_H" definition?
 #define EIGEN_EULERANGLESCLASS_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen
 {
   /** \class EulerAngles
@@ -92,18 +94,18 @@ namespace Eigen
     *
     * More information about Euler angles: https://en.wikipedia.org/wiki/Euler_angles
     *
-    * \tparam _Scalar the scalar type, i.e. the type of the angles.
+    * \tparam Scalar_ the scalar type, i.e. the type of the angles.
     *
     * \tparam _System the EulerSystem to use, which represents the axes of rotation.
     */
-  template <typename _Scalar, class _System>
-  class EulerAngles : public RotationBase<EulerAngles<_Scalar, _System>, 3>
+  template <typename Scalar_, class _System>
+  class EulerAngles : public RotationBase<EulerAngles<Scalar_, _System>, 3>
   {
     public:
-      typedef RotationBase<EulerAngles<_Scalar, _System>, 3> Base;
+      typedef RotationBase<EulerAngles<Scalar_, _System>, 3> Base;
       
       /** the scalar type of the angles */
-      typedef _Scalar Scalar;
+      typedef Scalar_ Scalar;
       typedef typename NumTraits<Scalar>::Real RealScalar;
       
       /** the EulerSystem to use, which represents the axes of rotation. */
@@ -322,10 +324,10 @@ EIGEN_EULER_ANGLES_TYPEDEFS(double, d)
 
   namespace internal
   {
-    template<typename _Scalar, class _System>
-    struct traits<EulerAngles<_Scalar, _System> >
+    template<typename Scalar_, class _System>
+    struct traits<EulerAngles<Scalar_, _System> >
     {
-      typedef _Scalar Scalar;
+      typedef Scalar_ Scalar;
     };
     
     // set from a rotation matrix
diff --git a/libs/eigen/unsupported/Eigen/src/EulerAngles/EulerSystem.h b/libs/eigen/unsupported/Eigen/src/EulerAngles/EulerSystem.h
index 2a833b0..278f9bf 100644
--- a/libs/eigen/unsupported/Eigen/src/EulerAngles/EulerSystem.h
+++ b/libs/eigen/unsupported/Eigen/src/EulerAngles/EulerSystem.h
@@ -10,10 +10,12 @@
 #ifndef EIGEN_EULERSYSTEM_H
 #define EIGEN_EULERSYSTEM_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen
 {
   // Forward declarations
-  template <typename _Scalar, class _System>
+  template <typename Scalar_, class _System>
   class EulerAngles;
   
   namespace internal
@@ -130,13 +132,13 @@ namespace Eigen
     //  that enum is not guerantee to support negative numbers
     
     /** The first rotation axis */
-    static const int AlphaAxis = _AlphaAxis;
+    static constexpr int AlphaAxis = _AlphaAxis;
     
     /** The second rotation axis */
-    static const int BetaAxis = _BetaAxis;
+    static constexpr int BetaAxis = _BetaAxis;
     
     /** The third rotation axis */
-    static const int GammaAxis = _GammaAxis;
+    static constexpr int GammaAxis = _GammaAxis;
 
     enum
     {
@@ -260,7 +262,7 @@ namespace Eigen
     {
       CalcEulerAngles_imp(
         res.angles(), mat,
-        typename internal::conditional<IsTaitBryan, internal::true_type, internal::false_type>::type());
+        std::conditional_t<IsTaitBryan, internal::true_type, internal::false_type>());
 
       if (IsAlphaOpposite)
         res.alpha() = -res.alpha();
@@ -272,7 +274,7 @@ namespace Eigen
         res.gamma() = -res.gamma();
     }
     
-    template <typename _Scalar, class _System>
+    template <typename Scalar_, class _System>
     friend class Eigen::EulerAngles;
     
     template<typename System,
diff --git a/libs/eigen/unsupported/Eigen/src/EulerAngles/InternalHeaderCheck.h b/libs/eigen/unsupported/Eigen/src/EulerAngles/InternalHeaderCheck.h
new file mode 100644
index 0000000..0c00a30
--- /dev/null
+++ b/libs/eigen/unsupported/Eigen/src/EulerAngles/InternalHeaderCheck.h
@@ -0,0 +1,3 @@
+#ifndef EIGEN_EULERANGLES_MODULE_H
+#error "Please include unsupported/Eigen/EulerAngles instead of including headers inside the src directory directly."
+#endif
diff --git a/libs/eigen/unsupported/Eigen/src/FFT/InternalHeaderCheck.h b/libs/eigen/unsupported/Eigen/src/FFT/InternalHeaderCheck.h
new file mode 100644
index 0000000..801e245
--- /dev/null
+++ b/libs/eigen/unsupported/Eigen/src/FFT/InternalHeaderCheck.h
@@ -0,0 +1,3 @@
+#ifndef EIGEN_FFT_MODULE_H
+#error "Please include unsupported/Eigen/FFT instead of including headers inside the src directory directly."
+#endif
diff --git a/libs/eigen/unsupported/Eigen/src/FFT/ei_fftw_impl.h b/libs/eigen/unsupported/Eigen/src/FFT/ei_fftw_impl.h
index 1c2cd24..f188027 100644
--- a/libs/eigen/unsupported/Eigen/src/FFT/ei_fftw_impl.h
+++ b/libs/eigen/unsupported/Eigen/src/FFT/ei_fftw_impl.h
@@ -7,6 +7,8 @@
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
@@ -173,10 +175,10 @@ namespace internal {
       }
   };
 
-  template <typename _Scalar>
+  template <typename Scalar_>
   struct fftw_impl
   {
-      typedef _Scalar Scalar;
+      typedef Scalar_ Scalar;
       typedef std::complex<Scalar> Complex;
 
       inline
diff --git a/libs/eigen/unsupported/Eigen/src/FFT/ei_imklfft_impl.h b/libs/eigen/unsupported/Eigen/src/FFT/ei_imklfft_impl.h
new file mode 100644
index 0000000..186a66c
--- /dev/null
+++ b/libs/eigen/unsupported/Eigen/src/FFT/ei_imklfft_impl.h
@@ -0,0 +1,288 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include <mkl_dfti.h>
+
+#include "./InternalHeaderCheck.h"
+
+#include <complex>
+
+namespace Eigen {
+namespace internal {
+namespace imklfft {
+
+#define RUN_OR_ASSERT(EXPR, ERROR_MSG)                   \
+  {                                                      \
+    MKL_LONG status = (EXPR);                            \
+    eigen_assert(status == DFTI_NO_ERROR && (ERROR_MSG)); \
+  };
+
+inline MKL_Complex16* complex_cast(const std::complex<double>* p) {
+  return const_cast<MKL_Complex16*>(reinterpret_cast<const MKL_Complex16*>(p));
+}
+
+inline MKL_Complex8* complex_cast(const std::complex<float>* p) {
+  return const_cast<MKL_Complex8*>(reinterpret_cast<const MKL_Complex8*>(p));
+}
+
+/*
+ * Parameters:
+ * precision: enum, Precision of the transform: DFTI_SINGLE or DFTI_DOUBLE.
+ * forward_domain: enum, Forward domain of the transform: DFTI_COMPLEX or
+ * DFTI_REAL. dimension: MKL_LONG Dimension of the transform. sizes: MKL_LONG if
+ * dimension = 1.Length of the transform for a one-dimensional transform. sizes:
+ * Array of type MKL_LONG otherwise. Lengths of each dimension for a
+ * multi-dimensional transform.
+ */
+inline void configure_descriptor(DFTI_DESCRIPTOR_HANDLE* handl,
+                                 enum DFTI_CONFIG_VALUE precision,
+                                 enum DFTI_CONFIG_VALUE forward_domain,
+                                 MKL_LONG dimension, MKL_LONG* sizes) {
+  eigen_assert(dimension == 1 ||
+               dimension == 2 &&
+                   "Transformation dimension must be less than 3.");
+
+  if (dimension == 1) {
+    RUN_OR_ASSERT(DftiCreateDescriptor(handl, precision, forward_domain,
+                                       dimension, *sizes),
+                  "DftiCreateDescriptor failed.")
+    if (forward_domain == DFTI_REAL) {
+      // Set CCE storage
+      RUN_OR_ASSERT(DftiSetValue(*handl, DFTI_CONJUGATE_EVEN_STORAGE,
+                                 DFTI_COMPLEX_COMPLEX),
+                    "DftiSetValue failed.")
+    }
+  } else {
+    RUN_OR_ASSERT(
+        DftiCreateDescriptor(handl, precision, DFTI_COMPLEX, dimension, sizes),
+        "DftiCreateDescriptor failed.")
+  }
+
+  RUN_OR_ASSERT(DftiSetValue(*handl, DFTI_PLACEMENT, DFTI_NOT_INPLACE),
+                "DftiSetValue failed.")
+  RUN_OR_ASSERT(DftiCommitDescriptor(*handl), "DftiCommitDescriptor failed.")
+}
+
+template <typename T>
+struct plan {};
+
+template <>
+struct plan<float> {
+  typedef float scalar_type;
+  typedef MKL_Complex8 complex_type;
+
+  DFTI_DESCRIPTOR_HANDLE m_plan;
+
+  plan() : m_plan(0) {}
+  ~plan() {
+    if (m_plan) DftiFreeDescriptor(&m_plan);
+  };
+
+  enum DFTI_CONFIG_VALUE precision = DFTI_SINGLE;
+
+  inline void forward(complex_type* dst, complex_type* src, MKL_LONG nfft) {
+    if (m_plan == 0) {
+      configure_descriptor(&m_plan, precision, DFTI_COMPLEX, 1, &nfft);
+    }
+    RUN_OR_ASSERT(DftiComputeForward(m_plan, src, dst),
+                  "DftiComputeForward failed.")
+  }
+
+  inline void inverse(complex_type* dst, complex_type* src, MKL_LONG nfft) {
+    if (m_plan == 0) {
+      configure_descriptor(&m_plan, precision, DFTI_COMPLEX, 1, &nfft);
+    }
+    RUN_OR_ASSERT(DftiComputeBackward(m_plan, src, dst),
+                  "DftiComputeBackward failed.")
+  }
+
+  inline void forward(complex_type* dst, scalar_type* src, MKL_LONG nfft) {
+    if (m_plan == 0) {
+      configure_descriptor(&m_plan, precision, DFTI_REAL, 1, &nfft);
+    }
+    RUN_OR_ASSERT(DftiComputeForward(m_plan, src, dst),
+                  "DftiComputeForward failed.")
+  }
+
+  inline void inverse(scalar_type* dst, complex_type* src, MKL_LONG nfft) {
+    if (m_plan == 0) {
+      configure_descriptor(&m_plan, precision, DFTI_REAL, 1, &nfft);
+    }
+    RUN_OR_ASSERT(DftiComputeBackward(m_plan, src, dst),
+                  "DftiComputeBackward failed.")
+  }
+
+  inline void forward2(complex_type* dst, complex_type* src, int n0, int n1) {
+    if (m_plan == 0) {
+      MKL_LONG sizes[2] = {n0, n1};
+      configure_descriptor(&m_plan, precision, DFTI_COMPLEX, 2, sizes);
+    }
+    RUN_OR_ASSERT(DftiComputeForward(m_plan, src, dst),
+                  "DftiComputeForward failed.")
+  }
+
+  inline void inverse2(complex_type* dst, complex_type* src, int n0, int n1) {
+    if (m_plan == 0) {
+      MKL_LONG sizes[2] = {n0, n1};
+      configure_descriptor(&m_plan, precision, DFTI_COMPLEX, 2, sizes);
+    }
+    RUN_OR_ASSERT(DftiComputeBackward(m_plan, src, dst),
+                  "DftiComputeBackward failed.")
+  }
+};
+
+template <>
+struct plan<double> {
+  typedef double scalar_type;
+  typedef MKL_Complex16 complex_type;
+
+  DFTI_DESCRIPTOR_HANDLE m_plan;
+
+  plan() : m_plan(0) {}
+  ~plan() {
+    if (m_plan) DftiFreeDescriptor(&m_plan);
+  };
+
+  enum DFTI_CONFIG_VALUE precision = DFTI_DOUBLE;
+
+  inline void forward(complex_type* dst, complex_type* src, MKL_LONG nfft) {
+    if (m_plan == 0) {
+      configure_descriptor(&m_plan, precision, DFTI_COMPLEX, 1, &nfft);
+    }
+    RUN_OR_ASSERT(DftiComputeForward(m_plan, src, dst),
+                  "DftiComputeForward failed.")
+  }
+
+  inline void inverse(complex_type* dst, complex_type* src, MKL_LONG nfft) {
+    if (m_plan == 0) {
+      configure_descriptor(&m_plan, precision, DFTI_COMPLEX, 1, &nfft);
+    }
+    RUN_OR_ASSERT(DftiComputeBackward(m_plan, src, dst),
+                  "DftiComputeBackward failed.")
+  }
+
+  inline void forward(complex_type* dst, scalar_type* src, MKL_LONG nfft) {
+    if (m_plan == 0) {
+      configure_descriptor(&m_plan, precision, DFTI_REAL, 1, &nfft);
+    }
+    RUN_OR_ASSERT(DftiComputeForward(m_plan, src, dst),
+                  "DftiComputeForward failed.")
+  }
+
+  inline void inverse(scalar_type* dst, complex_type* src, MKL_LONG nfft) {
+    if (m_plan == 0) {
+      configure_descriptor(&m_plan, precision, DFTI_REAL, 1, &nfft);
+    }
+    RUN_OR_ASSERT(DftiComputeBackward(m_plan, src, dst),
+                  "DftiComputeBackward failed.")
+  }
+
+  inline void forward2(complex_type* dst, complex_type* src, int n0, int n1) {
+    if (m_plan == 0) {
+      MKL_LONG sizes[2] = {n0, n1};
+      configure_descriptor(&m_plan, precision, DFTI_COMPLEX, 2, sizes);
+    }
+    RUN_OR_ASSERT(DftiComputeForward(m_plan, src, dst),
+                  "DftiComputeForward failed.")
+  }
+
+  inline void inverse2(complex_type* dst, complex_type* src, int n0, int n1) {
+    if (m_plan == 0) {
+      MKL_LONG sizes[2] = {n0, n1};
+      configure_descriptor(&m_plan, precision, DFTI_COMPLEX, 2, sizes);
+    }
+    RUN_OR_ASSERT(DftiComputeBackward(m_plan, src, dst),
+                  "DftiComputeBackward failed.")
+  }
+};
+
+template <typename Scalar_>
+struct imklfft_impl {
+  typedef Scalar_ Scalar;
+  typedef std::complex<Scalar> Complex;
+
+  inline void clear() { m_plans.clear(); }
+
+  // complex-to-complex forward FFT
+  inline void fwd(Complex* dst, const Complex* src, int nfft) {
+    MKL_LONG size = nfft;
+    get_plan(nfft, dst, src)
+        .forward(complex_cast(dst), complex_cast(src), size);
+  }
+
+  // real-to-complex forward FFT
+  inline void fwd(Complex* dst, const Scalar* src, int nfft) {
+    MKL_LONG size = nfft;
+    get_plan(nfft, dst, src)
+        .forward(complex_cast(dst), const_cast<Scalar*>(src), nfft);
+  }
+
+  // 2-d complex-to-complex
+  inline void fwd2(Complex* dst, const Complex* src, int n0, int n1) {
+    get_plan(n0, n1, dst, src)
+        .forward2(complex_cast(dst), complex_cast(src), n0, n1);
+  }
+
+  // inverse complex-to-complex
+  inline void inv(Complex* dst, const Complex* src, int nfft) {
+    MKL_LONG size = nfft;
+    get_plan(nfft, dst, src)
+        .inverse(complex_cast(dst), complex_cast(src), nfft);
+  }
+
+  // half-complex to scalar
+  inline void inv(Scalar* dst, const Complex* src, int nfft) {
+    MKL_LONG size = nfft;
+    get_plan(nfft, dst, src)
+        .inverse(const_cast<Scalar*>(dst), complex_cast(src), nfft);
+  }
+
+  // 2-d complex-to-complex
+  inline void inv2(Complex* dst, const Complex* src, int n0, int n1) {
+    get_plan(n0, n1, dst, src)
+        .inverse2(complex_cast(dst), complex_cast(src), n0, n1);
+  }
+
+ private:
+  std::map<int64_t, plan<Scalar>> m_plans;
+
+  inline plan<Scalar>& get_plan(int nfft, void* dst,
+                                const void* src) {
+    int inplace = dst == src ? 1 : 0;
+    int aligned = ((reinterpret_cast<size_t>(src) & 15) |
+                   (reinterpret_cast<size_t>(dst) & 15)) == 0
+                      ? 1
+                      : 0;
+    int64_t key = ((nfft << 2) | (inplace << 1) | aligned)
+                  << 1;
+
+    // Create element if key does not exist.
+    return m_plans[key];
+  }
+
+  inline plan<Scalar>& get_plan(int n0, int n1, void* dst,
+                                const void* src) {
+    int inplace = (dst == src) ? 1 : 0;
+    int aligned = ((reinterpret_cast<size_t>(src) & 15) |
+                   (reinterpret_cast<size_t>(dst) & 15)) == 0
+                      ? 1
+                      : 0;
+    int64_t key = (((((int64_t)n0) << 31) | (n1 << 2) |
+                    (inplace << 1) | aligned)
+                   << 1) +
+                  1;
+
+    // Create element if key does not exist.
+    return m_plans[key];
+  }
+};
+
+#undef RUN_OR_ASSERT
+
+}  // namespace imklfft
+}  // namespace internal
+}  // namespace Eigen
diff --git a/libs/eigen/unsupported/Eigen/src/FFT/ei_kissfft_impl.h b/libs/eigen/unsupported/Eigen/src/FFT/ei_kissfft_impl.h
index 430953a..90e6690 100644
--- a/libs/eigen/unsupported/Eigen/src/FFT/ei_kissfft_impl.h
+++ b/libs/eigen/unsupported/Eigen/src/FFT/ei_kissfft_impl.h
@@ -7,6 +7,8 @@
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
@@ -14,10 +16,10 @@ namespace internal {
   // This FFT implementation was derived from kissfft http:sourceforge.net/projects/kissfft
   // Copyright 2003-2009 Mark Borgerding
 
-template <typename _Scalar>
+template <typename Scalar_>
 struct kiss_cpx_fft
 {
-  typedef _Scalar Scalar;
+  typedef Scalar_ Scalar;
   typedef std::complex<Scalar> Complex;
   std::vector<Complex> m_twiddles;
   std::vector<int> m_stageRadix;
@@ -90,9 +92,9 @@ struct kiss_cpx_fft
     }while(n>1);
   }
 
-  template <typename _Src>
+  template <typename Src_>
     inline
-    void work( int stage,Complex * xout, const _Src * xin, size_t fstride,size_t in_stride)
+    void work( int stage,Complex * xout, const Src_ * xin, size_t fstride,size_t in_stride)
     {
       int p = m_stageRadix[stage];
       int m = m_stageRemainder[stage];
@@ -292,10 +294,10 @@ struct kiss_cpx_fft
     }
 };
 
-template <typename _Scalar>
+template <typename Scalar_>
 struct kissfft_impl
 {
-  typedef _Scalar Scalar;
+  typedef Scalar_ Scalar;
   typedef std::complex<Scalar> Complex;
 
   void clear() 
diff --git a/libs/eigen/unsupported/Eigen/src/FFT/ei_pocketfft_impl.h b/libs/eigen/unsupported/Eigen/src/FFT/ei_pocketfft_impl.h
new file mode 100644
index 0000000..f2da890
--- /dev/null
+++ b/libs/eigen/unsupported/Eigen/src/FFT/ei_pocketfft_impl.h
@@ -0,0 +1,69 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra. 
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+using namespace pocketfft;
+using namespace pocketfft::detail;
+
+namespace Eigen {
+
+namespace internal {
+
+template<typename _Scalar>
+struct pocketfft_impl
+{
+  typedef _Scalar Scalar;
+  typedef std::complex<Scalar> Complex;
+
+  inline void clear() {}
+
+  inline void fwd(Complex* dst, const Scalar* src, int nfft){
+    const shape_t  shape_{ static_cast<size_t>(nfft) };
+    const shape_t  axes_{ 0 };
+    const stride_t stride_in{ sizeof(Scalar) };
+    const stride_t stride_out{ sizeof(Complex) };
+    r2c(shape_, stride_in, stride_out, axes_, FORWARD, src, dst, static_cast<Scalar>(1));
+  }
+
+  inline void fwd(Complex* dst, const Complex* src, int nfft){
+    const shape_t  shape_{ static_cast<size_t>(nfft) };
+    const shape_t  axes_{ 0 };
+    const stride_t stride_{ sizeof(Complex) };
+    c2c(shape_, stride_, stride_, axes_, FORWARD, src, dst, static_cast<Scalar>(1));
+  }
+
+  inline void inv(Scalar* dst,  const Complex* src, int nfft){
+    const shape_t  shape_{ static_cast<size_t>(nfft) };
+    const shape_t  axes_{ 0 };
+    const stride_t stride_in{ sizeof(Complex) };
+    const stride_t stride_out{ sizeof(Scalar) };
+    c2r(shape_, stride_in, stride_out, axes_, BACKWARD, src, dst, static_cast<Scalar>(1));
+  }  
+
+  inline void inv(Complex* dst, const Complex* src, int nfft){
+    const shape_t  shape_{ static_cast<size_t>(nfft) };
+    const shape_t  axes_{ 0 };
+    const stride_t stride_{ sizeof(Complex) };
+    c2c(shape_, stride_, stride_, axes_, BACKWARD, src, dst, static_cast<Scalar>(1));
+  }
+
+  inline void fwd2(Complex* dst, const Complex* src, int nfft0, int nfft1){
+    const shape_t  shape_{ static_cast<size_t>(nfft0), static_cast<size_t>(nfft1) };
+    const shape_t  axes_{ 0, 1 };
+    const stride_t stride_{ static_cast<ptrdiff_t>(sizeof(Complex)*nfft1), static_cast<ptrdiff_t>(sizeof(Complex)) };
+    c2c(shape_, stride_, stride_, axes_, FORWARD, src, dst, static_cast<Scalar>(1));
+  }
+
+  inline void inv2(Complex* dst, const Complex* src, int nfft0, int nfft1){
+    const shape_t  shape_{ static_cast<size_t>(nfft0), static_cast<size_t>(nfft1) };
+    const shape_t  axes_{ 0, 1 };
+    const stride_t stride_{ static_cast<ptrdiff_t>(sizeof(Complex)*nfft1), static_cast<ptrdiff_t>(sizeof(Complex)) };
+    c2c(shape_, stride_, stride_, axes_, BACKWARD, src, dst, static_cast<Scalar>(1));
+  }
+};
+
+} // namespace internal
+} // namespace Eigen
diff --git a/libs/eigen/unsupported/Eigen/src/IterativeSolvers/BiCGSTABL.h b/libs/eigen/unsupported/Eigen/src/IterativeSolvers/BiCGSTABL.h
new file mode 100644
index 0000000..141d705
--- /dev/null
+++ b/libs/eigen/unsupported/Eigen/src/IterativeSolvers/BiCGSTABL.h
@@ -0,0 +1,339 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2020 Chris Schoutrop <c.e.m.schoutrop@tue.nl>
+// Copyright (C) 2020 Jens Wehner <j.wehner@esciencecenter.nl>
+// Copyright (C) 2020 Jan van Dijk <j.v.dijk@tue.nl>
+// Copyright (C) 2020 Adithya Vijaykumar
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+/*
+
+  This implementation of BiCGStab(L) is based on the papers
+      General algorithm:
+      1. G.L.G. Sleijpen, D.R. Fokkema. (1993). BiCGstab(l) for linear equations
+  involving unsymmetric matrices with complex spectrum. Electronic Transactions
+  on Numerical Analysis. Polynomial step update:
+      2. G.L.G. Sleijpen, M.B. Van Gijzen. (2010) Exploiting BiCGstab(l)
+  strategies to induce dimension reduction SIAM Journal on Scientific Computing.
+      3. Fokkema, Diederik R. Enhanced implementation of BiCGstab (l) for
+  solving linear systems of equations. Universiteit Utrecht. Mathematisch
+  Instituut, 1996
+      4. Sleijpen, G. L., & van der Vorst, H. A. (1996). Reliable updated
+  residuals in hybrid Bi-CG methods. Computing, 56(2), 141-163.
+*/
+
+#ifndef EIGEN_BICGSTABL_H
+#define EIGEN_BICGSTABL_H
+
+namespace Eigen {
+
+namespace internal {
+/**     \internal Low-level bi conjugate gradient stabilized algorithm with L
+   additional residual minimization steps \param mat The matrix A \param rhs The
+   right hand side vector b \param x On input and initial solution, on output
+   the computed solution. \param precond A preconditioner being able to
+   efficiently solve for an approximation of Ax=b (regardless of b) \param iters
+   On input the max number of iteration, on output the number of performed
+   iterations. \param tol_error On input the tolerance error, on output an
+   estimation of the relative error. \param L On input Number of additional
+   GMRES steps to take. If L is too large (~20) instabilities occur. \return
+   false in the case of numerical issue, for example a break down of BiCGSTABL.
+*/
+template <typename MatrixType, typename Rhs, typename Dest, typename Preconditioner>
+bool bicgstabl(const MatrixType &mat, const Rhs &rhs, Dest &x, const Preconditioner &precond, Index &iters,
+               typename Dest::RealScalar &tol_error, Index L) {
+  using numext::abs;
+  using numext::sqrt;
+  typedef typename Dest::RealScalar RealScalar;
+  typedef typename Dest::Scalar Scalar;
+  const Index N = rhs.size();
+  L = L < x.rows() ? L : x.rows();
+
+  Index k = 0;
+
+  const RealScalar tol = tol_error;
+  const Index maxIters = iters;
+
+  typedef Matrix<Scalar, Dynamic, 1> VectorType;
+  typedef Matrix<Scalar, Dynamic, Dynamic, ColMajor> DenseMatrixType;
+
+  DenseMatrixType rHat(N, L + 1);
+  DenseMatrixType uHat(N, L + 1);
+
+  // We start with an initial guess x_0 and let us set r_0 as (residual
+  // calculated from x_0)
+  VectorType x0 = x;
+  rHat.col(0) = rhs - mat * x0;  // r_0
+
+  x.setZero();  // This will contain the updates to the solution.
+  // rShadow is arbritary, but must never be orthogonal to any residual.
+  VectorType rShadow = VectorType::Random(N);
+
+  VectorType x_prime = x;
+
+  // Redundant: x is already set to 0
+  // x.setZero();
+  VectorType b_prime = rHat.col(0);
+
+  // Other vectors and scalars initialization
+  Scalar rho0 = 1.0;
+  Scalar alpha = 0.0;
+  Scalar omega = 1.0;
+
+  uHat.col(0).setZero();
+
+  bool bicg_convergence = false;
+
+  const RealScalar normb = rhs.stableNorm();
+  if (internal::isApprox(normb, RealScalar(0))) {
+    x.setZero();
+    iters = 0;
+    return true;
+  }
+  RealScalar normr = rHat.col(0).stableNorm();
+  RealScalar Mx = normr;
+  RealScalar Mr = normr;
+
+  // Keep track of the solution with the lowest residual
+  RealScalar normr_min = normr;
+  VectorType x_min = x_prime + x;
+
+  // Criterion for when to apply the group-wise update, conform ref 3.
+  const RealScalar delta = 0.01;
+
+  bool compute_res = false;
+  bool update_app = false;
+
+  while (normr > tol * normb && k < maxIters) {
+    rho0 *= -omega;
+
+    for (Index j = 0; j < L; ++j) {
+      const Scalar rho1 = rShadow.dot(rHat.col(j));
+
+      if (!(numext::isfinite)(rho1) || rho0 == RealScalar(0.0)) {
+        // We cannot continue computing, return the best solution found.
+        x += x_prime;
+
+        // Check if x is better than the best stored solution thus far.
+        normr = (rhs - mat * (precond.solve(x) + x0)).stableNorm();
+
+        if (normr > normr_min || !(numext::isfinite)(normr)) {
+          // x_min is a better solution than x, return x_min
+          x = x_min;
+          normr = normr_min;
+        }
+        tol_error = normr / normb;
+        iters = k;
+        // x contains the updates to x0, add those back to obtain the solution
+        x = precond.solve(x);
+        x += x0;
+        return (normr < tol * normb);
+      }
+
+      const Scalar beta = alpha * (rho1 / rho0);
+      rho0 = rho1;
+      // Update search directions
+      uHat.leftCols(j + 1) = rHat.leftCols(j + 1) - beta * uHat.leftCols(j + 1);
+      uHat.col(j + 1) = mat * precond.solve(uHat.col(j));
+      const Scalar sigma = rShadow.dot(uHat.col(j + 1));
+      alpha = rho1 / sigma;
+      // Update residuals
+      rHat.leftCols(j + 1) -= alpha * uHat.middleCols(1, j + 1);
+      rHat.col(j + 1) = mat * precond.solve(rHat.col(j));
+      // Complete BiCG iteration by updating x
+      x += alpha * uHat.col(0);
+      normr = rHat.col(0).stableNorm();
+      // Check for early exit
+      if (normr < tol * normb) {
+        /*
+          Convergence was achieved during BiCG step.
+          Without this check BiCGStab(L) fails for trivial matrices, such as
+          when the preconditioner already is the inverse, or the input matrix is
+          identity.
+        */
+        bicg_convergence = true;
+        break;
+      } else if (normr < normr_min) {
+        // We found an x with lower residual, keep this one.
+        x_min = x + x_prime;
+        normr_min = normr;
+      }
+    }
+    if (!bicg_convergence) {
+      /*
+        The polynomial/minimize residual step.
+
+        QR Householder method for argmin is more stable than (modified)
+        Gram-Schmidt, in the sense that there is less loss of orthogonality. It
+        is more accurate than solving the normal equations, since the normal
+        equations scale with condition number squared.
+      */
+      const VectorType gamma = rHat.rightCols(L).householderQr().solve(rHat.col(0));
+      x += rHat.leftCols(L) * gamma;
+      rHat.col(0) -= rHat.rightCols(L) * gamma;
+      uHat.col(0) -= uHat.rightCols(L) * gamma;
+      normr = rHat.col(0).stableNorm();
+      omega = gamma(L - 1);
+    }
+    if (normr < normr_min) {
+      // We found an x with lower residual, keep this one.
+      x_min = x + x_prime;
+      normr_min = normr;
+    }
+
+    k++;
+
+    /*
+      Reliable update part
+
+      The recursively computed residual can deviate from the actual residual
+      after several iterations. However, computing the residual from the
+      definition costs extra MVs and should not be done at each iteration. The
+      reliable update strategy computes the true residual from the definition:
+      r=b-A*x at strategic intervals. Furthermore a "group wise update" strategy
+      is used to combine updates, which improves accuracy.
+    */
+
+    // Maximum norm of residuals since last update of x.
+    Mx = numext::maxi(Mx, normr);
+    // Maximum norm of residuals since last computation of the true residual.
+    Mr = numext::maxi(Mr, normr);
+
+    if (normr < delta * normb && normb <= Mx) {
+      update_app = true;
+    }
+
+    if (update_app || (normr < delta * Mr && normb <= Mr)) {
+      compute_res = true;
+    }
+
+    if (bicg_convergence) {
+      update_app = true;
+      compute_res = true;
+      bicg_convergence = false;
+    }
+
+    if (compute_res) {
+      // Explicitly compute residual from the definition
+
+      // This is equivalent to the shifted version of rhs - mat *
+      // (precond.solve(x)+x0)
+      rHat.col(0) = b_prime - mat * precond.solve(x);
+      normr = rHat.col(0).stableNorm();
+      Mr = normr;
+
+      if (update_app) {
+        // After the group wise update, the original problem is translated to a
+        // shifted one.
+        x_prime += x;
+        x.setZero();
+        b_prime = rHat.col(0);
+        Mx = normr;
+      }
+    }
+    if (normr < normr_min) {
+      // We found an x with lower residual, keep this one.
+      x_min = x + x_prime;
+      normr_min = normr;
+    }
+
+    compute_res = false;
+    update_app = false;
+  }
+
+  // Convert internal variable to the true solution vector x
+  x += x_prime;
+
+  normr = (rhs - mat * (precond.solve(x) + x0)).stableNorm();
+  if (normr > normr_min || !(numext::isfinite)(normr)) {
+    // x_min is a better solution than x, return x_min
+    x = x_min;
+    normr = normr_min;
+  }
+  tol_error = normr / normb;
+  iters = k;
+
+  // x contains the updates to x0, add those back to obtain the solution
+  x = precond.solve(x);
+  x += x0;
+  return true;
+}
+
+}  // namespace internal
+
+template <typename MatrixType_, typename Preconditioner_ = DiagonalPreconditioner<typename MatrixType_::Scalar>>
+class BiCGSTABL;
+
+namespace internal {
+
+template <typename MatrixType_, typename Preconditioner_>
+struct traits<Eigen::BiCGSTABL<MatrixType_, Preconditioner_>> {
+  typedef MatrixType_ MatrixType;
+  typedef Preconditioner_ Preconditioner;
+};
+
+}  // namespace internal
+
+template <typename MatrixType_, typename Preconditioner_>
+class BiCGSTABL : public IterativeSolverBase<BiCGSTABL<MatrixType_, Preconditioner_>> {
+  typedef IterativeSolverBase<BiCGSTABL> Base;
+  using Base::m_error;
+  using Base::m_info;
+  using Base::m_isInitialized;
+  using Base::m_iterations;
+  using Base::matrix;
+  Index m_L;
+
+ public:
+  typedef MatrixType_ MatrixType;
+  typedef typename MatrixType::Scalar Scalar;
+  typedef typename MatrixType::RealScalar RealScalar;
+  typedef Preconditioner_ Preconditioner;
+
+  /** Default constructor. */
+  BiCGSTABL() : m_L(2) {}
+
+  /**
+  Initialize the solver with matrix \a A for further \c Ax=b solving.
+
+  This constructor is a shortcut for the default constructor followed
+  by a call to compute().
+
+  \warning this class stores a reference to the matrix A as well as some
+  precomputed values that depend on it. Therefore, if \a A is changed
+  this class becomes invalid. Call compute() to update it with the new
+  matrix A, or modify a copy of A.
+  */
+  template <typename MatrixDerived>
+  explicit BiCGSTABL(const EigenBase<MatrixDerived> &A) : Base(A.derived()), m_L(2) {}
+
+  /** \internal */
+  /** Loops over the number of columns of b and does the following:
+    1. sets the tolerence and maxIterations
+    2. Calls the function that has the core solver routine
+  */
+  template <typename Rhs, typename Dest>
+  void _solve_vector_with_guess_impl(const Rhs &b, Dest &x) const {
+    m_iterations = Base::maxIterations();
+
+    m_error = Base::m_tolerance;
+
+    bool ret = internal::bicgstabl(matrix(), b, x, Base::m_preconditioner, m_iterations, m_error, m_L);
+    m_info = (!ret) ? NumericalIssue : m_error <= Base::m_tolerance ? Success : NoConvergence;
+  }
+
+  /** Sets the parameter L, indicating how many minimize residual steps are
+   * used. Default: 2 */
+  void setL(Index L) {
+    eigen_assert(L >= 1 && "L needs to be positive");
+    m_L = L;
+  }
+};
+
+}  // namespace Eigen
+
+#endif /* EIGEN_BICGSTABL_H */
diff --git a/libs/eigen/unsupported/Eigen/src/IterativeSolvers/ConstrainedConjGrad.h b/libs/eigen/unsupported/Eigen/src/IterativeSolvers/ConstrainedConjGrad.h
index e7d70f3..47f8c84 100644
--- a/libs/eigen/unsupported/Eigen/src/IterativeSolvers/ConstrainedConjGrad.h
+++ b/libs/eigen/unsupported/Eigen/src/IterativeSolvers/ConstrainedConjGrad.h
@@ -33,6 +33,8 @@
 
 #include "../../../../Eigen/Core"
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
@@ -163,7 +165,7 @@ void constrained_cg(const TMatrix& A, const CMatrix& C, VectorX& x,
     p = z + gamma*p;
 
     ++iter;
-    // one dimensionnal optimization
+    // one dimensional optimization
     q = A * p;
     lambda = rho / q.dot(p);
     for (Index i = 0; i < C.rows(); ++i)
diff --git a/libs/eigen/unsupported/Eigen/src/IterativeSolvers/DGMRES.h b/libs/eigen/unsupported/Eigen/src/IterativeSolvers/DGMRES.h
index 5ae011b..0387ef3 100644
--- a/libs/eigen/unsupported/Eigen/src/IterativeSolvers/DGMRES.h
+++ b/libs/eigen/unsupported/Eigen/src/IterativeSolvers/DGMRES.h
@@ -12,19 +12,21 @@
 
 #include "../../../../Eigen/Eigenvalues"
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
   
-template< typename _MatrixType,
-          typename _Preconditioner = DiagonalPreconditioner<typename _MatrixType::Scalar> >
+template< typename MatrixType_,
+          typename Preconditioner_ = DiagonalPreconditioner<typename MatrixType_::Scalar> >
 class DGMRES;
 
 namespace internal {
 
-template< typename _MatrixType, typename _Preconditioner>
-struct traits<DGMRES<_MatrixType,_Preconditioner> >
+template< typename MatrixType_, typename Preconditioner_>
+struct traits<DGMRES<MatrixType_,Preconditioner_> >
 {
-  typedef _MatrixType MatrixType;
-  typedef _Preconditioner Preconditioner;
+  typedef MatrixType_ MatrixType;
+  typedef Preconditioner_ Preconditioner;
 };
 
 /** \brief Computes a permutation vector to have a sorted sequence
@@ -68,8 +70,8 @@ void sortWithPermutation (VectorType& vec, IndexType& perm, typename IndexType::
  * the IncompleteLUT for instance. The preconditioner is applied 
  * at right of the matrix and the combination is multiplicative.
  * 
- * \tparam _MatrixType the type of the sparse matrix A, can be a dense or a sparse matrix.
- * \tparam _Preconditioner the type of the preconditioner. Default is DiagonalPreconditioner
+ * \tparam MatrixType_ the type of the sparse matrix A, can be a dense or a sparse matrix.
+ * \tparam Preconditioner_ the type of the preconditioner. Default is DiagonalPreconditioner
  * Typical usage :
  * \code
  * SparseMatrix<double> A;
@@ -97,8 +99,8 @@ void sortWithPermutation (VectorType& vec, IndexType& perm, typename IndexType::
 
  * 
  */
-template< typename _MatrixType, typename _Preconditioner>
-class DGMRES : public IterativeSolverBase<DGMRES<_MatrixType,_Preconditioner> >
+template< typename MatrixType_, typename Preconditioner_>
+class DGMRES : public IterativeSolverBase<DGMRES<MatrixType_,Preconditioner_> >
 {
     typedef IterativeSolverBase<DGMRES> Base;
     using Base::matrix;
@@ -110,11 +112,11 @@ class DGMRES : public IterativeSolverBase<DGMRES<_MatrixType,_Preconditioner> >
   public:
     using Base::_solve_impl;
     using Base::_solve_with_guess_impl;
-    typedef _MatrixType MatrixType;
+    typedef MatrixType_ MatrixType;
     typedef typename MatrixType::Scalar Scalar;
     typedef typename MatrixType::StorageIndex StorageIndex;
     typedef typename MatrixType::RealScalar RealScalar;
-    typedef _Preconditioner Preconditioner;
+    typedef Preconditioner_ Preconditioner;
     typedef Matrix<Scalar,Dynamic,Dynamic> DenseMatrix; 
     typedef Matrix<RealScalar,Dynamic,Dynamic> DenseRealMatrix; 
     typedef Matrix<Scalar,Dynamic,1> DenseVector;
@@ -223,9 +225,9 @@ class DGMRES : public IterativeSolverBase<DGMRES<_MatrixType,_Preconditioner> >
  * A right preconditioner is used combined with deflation.
  * 
  */
-template< typename _MatrixType, typename _Preconditioner>
+template< typename MatrixType_, typename Preconditioner_>
 template<typename Rhs, typename Dest>
-void DGMRES<_MatrixType, _Preconditioner>::dgmres(const MatrixType& mat,const Rhs& rhs, Dest& x,
+void DGMRES<MatrixType_, Preconditioner_>::dgmres(const MatrixType& mat,const Rhs& rhs, Dest& x,
               const Preconditioner& precond) const
 {
   const RealScalar considerAsZero = (std::numeric_limits<RealScalar>::min)();
@@ -281,9 +283,9 @@ void DGMRES<_MatrixType, _Preconditioner>::dgmres(const MatrixType& mat,const Rh
  * \param normRhs The norm of the right hand side vector
  * \param nbIts The number of iterations
  */
-template< typename _MatrixType, typename _Preconditioner>
+template< typename MatrixType_, typename Preconditioner_>
 template<typename Dest>
-Index DGMRES<_MatrixType, _Preconditioner>::dgmresCycle(const MatrixType& mat, const Preconditioner& precond, Dest& x, DenseVector& r0, RealScalar& beta, const RealScalar& normRhs, Index& nbIts) const
+Index DGMRES<MatrixType_, Preconditioner_>::dgmresCycle(const MatrixType& mat, const Preconditioner& precond, Dest& x, DenseVector& r0, RealScalar& beta, const RealScalar& normRhs, Index& nbIts) const
 {
   //Initialization 
   DenseVector g(m_restart+1); // Right hand side of the least square problem
@@ -374,8 +376,8 @@ Index DGMRES<_MatrixType, _Preconditioner>::dgmresCycle(const MatrixType& mat, c
 }
 
 
-template< typename _MatrixType, typename _Preconditioner>
-void DGMRES<_MatrixType, _Preconditioner>::dgmresInitDeflation(Index& rows) const
+template< typename MatrixType_, typename Preconditioner_>
+void DGMRES<MatrixType_, Preconditioner_>::dgmresInitDeflation(Index& rows) const
 {
   m_U.resize(rows, m_maxNeig);
   m_MU.resize(rows, m_maxNeig); 
@@ -384,14 +386,14 @@ void DGMRES<_MatrixType, _Preconditioner>::dgmresInitDeflation(Index& rows) cons
   m_isDeflAllocated = true; 
 }
 
-template< typename _MatrixType, typename _Preconditioner>
-inline typename DGMRES<_MatrixType, _Preconditioner>::ComplexVector DGMRES<_MatrixType, _Preconditioner>::schurValues(const ComplexSchur<DenseMatrix>& schurofH) const
+template< typename MatrixType_, typename Preconditioner_>
+inline typename DGMRES<MatrixType_, Preconditioner_>::ComplexVector DGMRES<MatrixType_, Preconditioner_>::schurValues(const ComplexSchur<DenseMatrix>& schurofH) const
 {
   return schurofH.matrixT().diagonal();
 }
 
-template< typename _MatrixType, typename _Preconditioner>
-inline typename DGMRES<_MatrixType, _Preconditioner>::ComplexVector DGMRES<_MatrixType, _Preconditioner>::schurValues(const RealSchur<DenseMatrix>& schurofH) const
+template< typename MatrixType_, typename Preconditioner_>
+inline typename DGMRES<MatrixType_, Preconditioner_>::ComplexVector DGMRES<MatrixType_, Preconditioner_>::schurValues(const RealSchur<DenseMatrix>& schurofH) const
 {
   const DenseMatrix& T = schurofH.matrixT();
   Index it = T.rows();
@@ -415,11 +417,11 @@ inline typename DGMRES<_MatrixType, _Preconditioner>::ComplexVector DGMRES<_Matr
   return eig;
 }
 
-template< typename _MatrixType, typename _Preconditioner>
-Index DGMRES<_MatrixType, _Preconditioner>::dgmresComputeDeflationData(const MatrixType& mat, const Preconditioner& precond, const Index& it, StorageIndex& neig) const
+template< typename MatrixType_, typename Preconditioner_>
+Index DGMRES<MatrixType_, Preconditioner_>::dgmresComputeDeflationData(const MatrixType& mat, const Preconditioner& precond, const Index& it, StorageIndex& neig) const
 {
   // First, find the Schur form of the Hessenberg matrix H
-  typename internal::conditional<NumTraits<Scalar>::IsComplex, ComplexSchur<DenseMatrix>, RealSchur<DenseMatrix> >::type schurofH; 
+  std::conditional_t<NumTraits<Scalar>::IsComplex, ComplexSchur<DenseMatrix>, RealSchur<DenseMatrix> > schurofH;
   bool computeU = true;
   DenseMatrix matrixQ(it,it); 
   matrixQ.setIdentity();
@@ -498,9 +500,9 @@ Index DGMRES<_MatrixType, _Preconditioner>::dgmresComputeDeflationData(const Mat
   m_isDeflInitialized = true;
   return 0; 
 }
-template<typename _MatrixType, typename _Preconditioner>
+template<typename MatrixType_, typename Preconditioner_>
 template<typename RhsType, typename DestType>
-Index DGMRES<_MatrixType, _Preconditioner>::dgmresApplyDeflation(const RhsType &x, DestType &y) const
+Index DGMRES<MatrixType_, Preconditioner_>::dgmresApplyDeflation(const RhsType &x, DestType &y) const
 {
   DenseVector x1 = m_U.leftCols(m_r).transpose() * x; 
   y = x + m_U.leftCols(m_r) * ( m_lambdaN * m_luT.solve(x1) - x1);
diff --git a/libs/eigen/unsupported/Eigen/src/IterativeSolvers/GMRES.h b/libs/eigen/unsupported/Eigen/src/IterativeSolvers/GMRES.h
index ff91209..741d9fe 100644
--- a/libs/eigen/unsupported/Eigen/src/IterativeSolvers/GMRES.h
+++ b/libs/eigen/unsupported/Eigen/src/IterativeSolvers/GMRES.h
@@ -11,6 +11,8 @@
 #ifndef EIGEN_GMRES_H
 #define EIGEN_GMRES_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
@@ -216,17 +218,17 @@ bool gmres(const MatrixType & mat, const Rhs & rhs, Dest & x, const Precondition
 
 }
 
-template< typename _MatrixType,
-          typename _Preconditioner = DiagonalPreconditioner<typename _MatrixType::Scalar> >
+template< typename MatrixType_,
+          typename Preconditioner_ = DiagonalPreconditioner<typename MatrixType_::Scalar> >
 class GMRES;
 
 namespace internal {
 
-template< typename _MatrixType, typename _Preconditioner>
-struct traits<GMRES<_MatrixType,_Preconditioner> >
+template< typename MatrixType_, typename Preconditioner_>
+struct traits<GMRES<MatrixType_,Preconditioner_> >
 {
-  typedef _MatrixType MatrixType;
-  typedef _Preconditioner Preconditioner;
+  typedef MatrixType_ MatrixType;
+  typedef Preconditioner_ Preconditioner;
 };
 
 }
@@ -237,8 +239,8 @@ struct traits<GMRES<_MatrixType,_Preconditioner> >
   * This class allows to solve for A.x = b sparse linear problems using a generalized minimal
   * residual method. The vectors x and b can be either dense or sparse.
   *
-  * \tparam _MatrixType the type of the sparse matrix A, can be a dense or a sparse matrix.
-  * \tparam _Preconditioner the type of the preconditioner. Default is DiagonalPreconditioner
+  * \tparam MatrixType_ the type of the sparse matrix A, can be a dense or a sparse matrix.
+  * \tparam Preconditioner_ the type of the preconditioner. Default is DiagonalPreconditioner
   *
   * The maximal number of iterations and tolerance value can be controlled via the setMaxIterations()
   * and setTolerance() methods. The defaults are the size of the problem for the maximal number of iterations
@@ -265,8 +267,8 @@ struct traits<GMRES<_MatrixType,_Preconditioner> >
   *
   * \sa class SimplicialCholesky, DiagonalPreconditioner, IdentityPreconditioner
   */
-template< typename _MatrixType, typename _Preconditioner>
-class GMRES : public IterativeSolverBase<GMRES<_MatrixType,_Preconditioner> >
+template< typename MatrixType_, typename Preconditioner_>
+class GMRES : public IterativeSolverBase<GMRES<MatrixType_,Preconditioner_> >
 {
   typedef IterativeSolverBase<GMRES> Base;
   using Base::matrix;
@@ -280,10 +282,10 @@ private:
 
 public:
   using Base::_solve_impl;
-  typedef _MatrixType MatrixType;
+  typedef MatrixType_ MatrixType;
   typedef typename MatrixType::Scalar Scalar;
   typedef typename MatrixType::RealScalar RealScalar;
-  typedef _Preconditioner Preconditioner;
+  typedef Preconditioner_ Preconditioner;
 
 public:
 
diff --git a/libs/eigen/unsupported/Eigen/src/IterativeSolvers/IDRS.h b/libs/eigen/unsupported/Eigen/src/IterativeSolvers/IDRS.h
old mode 100755
new mode 100644
index 90d20fa..d8f7a32
--- a/libs/eigen/unsupported/Eigen/src/IterativeSolvers/IDRS.h
+++ b/libs/eigen/unsupported/Eigen/src/IterativeSolvers/IDRS.h
@@ -9,427 +9,385 @@
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
-
 #ifndef EIGEN_IDRS_H
 #define EIGEN_IDRS_H
 
-namespace Eigen
-{
+#include "./InternalHeaderCheck.h"
 
-	namespace internal
-	{
-		/**     \internal Low-level Induced Dimension Reduction algoritm
-		        \param A The matrix A
-		        \param b The right hand side vector b
-		        \param x On input and initial solution, on output the computed solution.
-		        \param precond A preconditioner being able to efficiently solve for an
-		                  approximation of Ax=b (regardless of b)
-		        \param iter On input the max number of iteration, on output the number of performed iterations.
-		        \param relres On input the tolerance error, on output an estimation of the relative error.
-		        \param S On input Number of the dimension of the shadow space.
-				\param smoothing switches residual smoothing on.
-				\param angle small omega lead to faster convergence at the expense of numerical stability
-				\param replacement switches on a residual replacement strategy to increase accuracy of residual at the expense of more Mat*vec products
-		        \return false in the case of numerical issue, for example a break down of IDRS.
-		*/
-		template<typename Vector, typename RealScalar>
-		typename Vector::Scalar omega(const Vector& t, const Vector& s, RealScalar angle)
-		{
-			using numext::abs;
-			typedef typename Vector::Scalar Scalar;
-			const RealScalar ns = s.norm();
-			const RealScalar nt = t.norm();
-			const Scalar ts = t.dot(s);
-			const RealScalar rho = abs(ts / (nt * ns));
+namespace Eigen {
 
-			if (rho < angle) {
-				if (ts == Scalar(0)) {
-					return Scalar(0);
-				}
-				// Original relation for om is given by
-				// om = om * angle / rho;
-				// To alleviate potential (near) division by zero this can be rewritten as
-				// om = angle * (ns / nt) * (ts / abs(ts)) = angle * (ns / nt) * sgn(ts)
-  				return angle * (ns / nt) * (ts / abs(ts));
-			}
-			return ts / (nt * nt);
-		}
+namespace internal {
+/**     \internal Low-level Induced Dimension Reduction algorithm
+        \param A The matrix A
+        \param b The right hand side vector b
+        \param x On input and initial solution, on output the computed solution.
+        \param precond A preconditioner being able to efficiently solve for an
+                  approximation of Ax=b (regardless of b)
+        \param iter On input the max number of iteration, on output the number of performed iterations.
+        \param relres On input the tolerance error, on output an estimation of the relative error.
+        \param S On input Number of the dimension of the shadow space.
+                \param smoothing switches residual smoothing on.
+                \param angle small omega lead to faster convergence at the expense of numerical stability
+                \param replacement switches on a residual replacement strategy to increase accuracy of residual at the
+   expense of more Mat*vec products \return false in the case of numerical issue, for example a break down of IDRS.
+*/
+template <typename Vector, typename RealScalar>
+typename Vector::Scalar omega(const Vector& t, const Vector& s, RealScalar angle) {
+  using numext::abs;
+  typedef typename Vector::Scalar Scalar;
+  const RealScalar ns = s.stableNorm();
+  const RealScalar nt = t.stableNorm();
+  const Scalar ts = t.dot(s);
+  const RealScalar rho = abs(ts / (nt * ns));
 
-		template <typename MatrixType, typename Rhs, typename Dest, typename Preconditioner>
-		bool idrs(const MatrixType& A, const Rhs& b, Dest& x, const Preconditioner& precond,
-			Index& iter,
-			typename Dest::RealScalar& relres, Index S, bool smoothing, typename Dest::RealScalar angle, bool replacement)
-		{
-			typedef typename Dest::RealScalar RealScalar;
-			typedef typename Dest::Scalar Scalar;
-			typedef Matrix<Scalar, Dynamic, 1> VectorType;
-			typedef Matrix<Scalar, Dynamic, Dynamic, ColMajor> DenseMatrixType;
-			const Index N = b.size();
-			S = S < x.rows() ? S : x.rows();
-			const RealScalar tol = relres;
-			const Index maxit = iter;
+  if (rho < angle) {
+    if (ts == Scalar(0)) {
+      return Scalar(0);
+    }
+    // Original relation for om is given by
+    // om = om * angle / rho;
+    // To alleviate potential (near) division by zero this can be rewritten as
+    // om = angle * (ns / nt) * (ts / abs(ts)) = angle * (ns / nt) * sgn(ts)
+    return angle * (ns / nt) * (ts / abs(ts));
+  }
+  return ts / (nt * nt);
+}
 
-			Index replacements = 0;
-			bool trueres = false;
+template <typename MatrixType, typename Rhs, typename Dest, typename Preconditioner>
+bool idrs(const MatrixType& A, const Rhs& b, Dest& x, const Preconditioner& precond, Index& iter,
+          typename Dest::RealScalar& relres, Index S, bool smoothing, typename Dest::RealScalar angle,
+          bool replacement) {
+  typedef typename Dest::RealScalar RealScalar;
+  typedef typename Dest::Scalar Scalar;
+  typedef Matrix<Scalar, Dynamic, 1> VectorType;
+  typedef Matrix<Scalar, Dynamic, Dynamic, ColMajor> DenseMatrixType;
+  const Index N = b.size();
+  S = S < x.rows() ? S : x.rows();
+  const RealScalar tol = relres;
+  const Index maxit = iter;
 
-			FullPivLU<DenseMatrixType> lu_solver;
+  bool trueres = false;
 
-			DenseMatrixType P;
-			{
-				HouseholderQR<DenseMatrixType> qr(DenseMatrixType::Random(N, S));
-			    P = (qr.householderQ() * DenseMatrixType::Identity(N, S));
-			}
+  FullPivLU<DenseMatrixType> lu_solver;
 
-			const RealScalar normb = b.norm();
+  DenseMatrixType P;
+  {
+    HouseholderQR<DenseMatrixType> qr(DenseMatrixType::Random(N, S));
+    P = (qr.householderQ() * DenseMatrixType::Identity(N, S));
+  }
 
-			if (internal::isApprox(normb, RealScalar(0)))
-			{
-				//Solution is the zero vector
-				x.setZero();
-				iter = 0;
-				relres = 0;
-				return true;
-			}
-			 // from http://homepage.tudelft.nl/1w5b5/IDRS/manual.pdf
-			 // A peak in the residual is considered dangerously high if‖ri‖/‖b‖> C(tol/epsilon).
-			 // With epsilon the
-             // relative machine precision. The factor tol/epsilon corresponds to the size of a
-             // finite precision number that is so large that the absolute round-off error in
-             // this number, when propagated through the process, makes it impossible to
-             // achieve the required accuracy.The factor C accounts for the accumulation of
-             // round-off errors. This parameter has beenset to 10−3.
-			 // mp is epsilon/C
-			 // 10^3 * eps is very conservative, so normally no residual replacements will take place. 
-			 // It only happens if things go very wrong. Too many restarts may ruin the convergence.
-			const RealScalar mp = RealScalar(1e3) * NumTraits<Scalar>::epsilon();
+  const RealScalar normb = b.stableNorm();
 
+  if (internal::isApprox(normb, RealScalar(0))) {
+    // Solution is the zero vector
+    x.setZero();
+    iter = 0;
+    relres = 0;
+    return true;
+  }
+  // from http://homepage.tudelft.nl/1w5b5/IDRS/manual.pdf
+  // A peak in the residual is considered dangerously high if‖ri‖/‖b‖> C(tol/epsilon).
+  // With epsilon the relative machine precision. The factor tol/epsilon corresponds
+  // to the size of a finite precision number that is so large that the absolute
+  // round-off error in this number, when propagated through the process, makes it
+  // impossible to achieve the required accuracy. The factor C accounts for the
+  // accumulation of round-off errors. This parameter has been set to 10^{-3}.
+  // mp is epsilon/C 10^3 * eps is very conservative, so normally no residual
+  // replacements will take place. It only happens if things go very wrong. Too many
+  // restarts may ruin the convergence.
+  const RealScalar mp = RealScalar(1e3) * NumTraits<Scalar>::epsilon();
 
+  // Compute initial residual
+  const RealScalar tolb = tol * normb;  // Relative tolerance
+  VectorType r = b - A * x;
 
-			//Compute initial residual
-			const RealScalar tolb = tol * normb; //Relative tolerance
-			VectorType r = b - A * x;
+  VectorType x_s, r_s;
 
-			VectorType x_s, r_s;
+  if (smoothing) {
+    x_s = x;
+    r_s = r;
+  }
 
-			if (smoothing)
-			{
-				x_s = x;
-				r_s = r;
-			}
+  RealScalar normr = r.stableNorm();
 
-			RealScalar normr = r.norm();
+  if (normr <= tolb) {
+    // Initial guess is a good enough solution
+    iter = 0;
+    relres = normr / normb;
+    return true;
+  }
 
-			if (normr <= tolb)
-			{
-				//Initial guess is a good enough solution
-				iter = 0;
-				relres = normr / normb;
-				return true;
-			}
+  DenseMatrixType G = DenseMatrixType::Zero(N, S);
+  DenseMatrixType U = DenseMatrixType::Zero(N, S);
+  DenseMatrixType M = DenseMatrixType::Identity(S, S);
+  VectorType t(N), v(N);
+  Scalar om = 1.;
 
-			DenseMatrixType G = DenseMatrixType::Zero(N, S);
-			DenseMatrixType U = DenseMatrixType::Zero(N, S);
-			DenseMatrixType M = DenseMatrixType::Identity(S, S);
-			VectorType t(N), v(N);
-			Scalar om = 1.;
+  // Main iteration loop, guild G-spaces:
+  iter = 0;
 
-			//Main iteration loop, guild G-spaces:
-			iter = 0;
+  while (normr > tolb && iter < maxit) {
+    // New right hand size for small system:
+    VectorType f = (r.adjoint() * P).adjoint();
 
-			while (normr > tolb && iter < maxit)
-			{
-				//New right hand size for small system:
-				VectorType f = (r.adjoint() * P).adjoint();
+    for (Index k = 0; k < S; ++k) {
+      // Solve small system and make v orthogonal to P:
+      // c = M(k:s,k:s)\f(k:s);
+      lu_solver.compute(M.block(k, k, S - k, S - k));
+      VectorType c = lu_solver.solve(f.segment(k, S - k));
+      // v = r - G(:,k:s)*c;
+      v = r - G.rightCols(S - k) * c;
+      // Preconditioning
+      v = precond.solve(v);
 
-				for (Index k = 0; k < S; ++k)
-				{
-					//Solve small system and make v orthogonal to P:
-					//c = M(k:s,k:s)\f(k:s);
-					lu_solver.compute(M.block(k , k , S -k, S - k ));
-					VectorType c = lu_solver.solve(f.segment(k , S - k ));
-					//v = r - G(:,k:s)*c;
-					v = r - G.rightCols(S - k ) * c;
-					//Preconditioning
-					v = precond.solve(v);
+      // Compute new U(:,k) and G(:,k), G(:,k) is in space G_j
+      U.col(k) = U.rightCols(S - k) * c + om * v;
+      G.col(k) = A * U.col(k);
 
-					//Compute new U(:,k) and G(:,k), G(:,k) is in space G_j
-					U.col(k) = U.rightCols(S - k ) * c + om * v;
-					G.col(k) = A * U.col(k );
+      // Bi-Orthogonalise the new basis vectors:
+      for (Index i = 0; i < k - 1; ++i) {
+        // alpha =  ( P(:,i)'*G(:,k) )/M(i,i);
+        Scalar alpha = P.col(i).dot(G.col(k)) / M(i, i);
+        G.col(k) = G.col(k) - alpha * G.col(i);
+        U.col(k) = U.col(k) - alpha * U.col(i);
+      }
 
-					//Bi-Orthogonalise the new basis vectors:
-					for (Index i = 0; i < k-1 ; ++i)
-					{
-						//alpha =  ( P(:,i)'*G(:,k) )/M(i,i);
-						Scalar alpha = P.col(i ).dot(G.col(k )) / M(i, i );
-						G.col(k ) = G.col(k ) - alpha * G.col(i );
-						U.col(k ) = U.col(k ) - alpha * U.col(i );
-					}
+      // New column of M = P'*G  (first k-1 entries are zero)
+      // M(k:s,k) = (G(:,k)'*P(:,k:s))';
+      M.block(k, k, S - k, 1) = (G.col(k).adjoint() * P.rightCols(S - k)).adjoint();
 
-					//New column of M = P'*G  (first k-1 entries are zero)
-					//M(k:s,k) = (G(:,k)'*P(:,k:s))';
-					M.block(k , k , S - k , 1) = (G.col(k ).adjoint() * P.rightCols(S - k )).adjoint();
+      if (internal::isApprox(M(k, k), Scalar(0))) {
+        return false;
+      }
 
-					if (internal::isApprox(M(k,k), Scalar(0)))
-					{
-						return false;
-					}
+      // Make r orthogonal to q_i, i = 0..k-1
+      Scalar beta = f(k) / M(k, k);
+      r = r - beta * G.col(k);
+      x = x + beta * U.col(k);
+      normr = r.stableNorm();
 
-					//Make r orthogonal to q_i, i = 0..k-1
-					Scalar beta = f(k ) / M(k , k );
-					r = r - beta * G.col(k );
-					x = x + beta * U.col(k );
-					normr = r.norm();
+      if (replacement && normr > tolb / mp) {
+        trueres = true;
+      }
 
-					if (replacement && normr > tolb / mp)
-					{
-						trueres = true;
-					}
+      // Smoothing:
+      if (smoothing) {
+        t = r_s - r;
+        // gamma is a Scalar, but the conversion is not allowed
+        Scalar gamma = t.dot(r_s) / t.stableNorm();
+        r_s = r_s - gamma * t;
+        x_s = x_s - gamma * (x_s - x);
+        normr = r_s.stableNorm();
+      }
 
-					//Smoothing:
-					if (smoothing)
-					{
-						t = r_s - r;
-						//gamma is a Scalar, but the conversion is not allowed
-						Scalar gamma = t.dot(r_s) / t.norm();
-						r_s = r_s - gamma * t;
-						x_s = x_s - gamma * (x_s - x);
-						normr = r_s.norm();
-					}
+      if (normr < tolb || iter == maxit) {
+        break;
+      }
 
-					if (normr < tolb || iter == maxit)
-					{
-						break;
-					}
+      // New f = P'*r (first k  components are zero)
+      if (k < S - 1) {
+        f.segment(k + 1, S - (k + 1)) = f.segment(k + 1, S - (k + 1)) - beta * M.block(k + 1, k, S - (k + 1), 1);
+      }
+    }  // end for
 
-					//New f = P'*r (first k  components are zero)
-					if (k < S-1)
-					{
-						f.segment(k + 1, S - (k + 1) ) = f.segment(k + 1 , S - (k + 1)) - beta * M.block(k + 1 , k , S - (k + 1), 1);
-					}
-				}//end for
+    if (normr < tolb || iter == maxit) {
+      break;
+    }
 
-				if (normr < tolb || iter == maxit)
-				{
-					break;
-				}
+    // Now we have sufficient vectors in G_j to compute residual in G_j+1
+    // Note: r is already perpendicular to P so v = r
+    // Preconditioning
+    v = r;
+    v = precond.solve(v);
 
-				//Now we have sufficient vectors in G_j to compute residual in G_j+1
-				//Note: r is already perpendicular to P so v = r
-				//Preconditioning
-				v = r;
-				v = precond.solve(v);
+    // Matrix-vector multiplication:
+    t = A * v;
 
-				//Matrix-vector multiplication:
-				t = A * v;
+    // Computation of a new omega
+    om = internal::omega(t, r, angle);
 
-				//Computation of a new omega
-				om = internal::omega(t, r, angle);
+    if (om == RealScalar(0.0)) {
+      return false;
+    }
 
-				if (om == RealScalar(0.0))
-				{
-					return false;
-				}
+    r = r - om * t;
+    x = x + om * v;
+    normr = r.stableNorm();
 
-				r = r - om * t;
-				x = x + om * v;
-				normr = r.norm();
+    if (replacement && normr > tolb / mp) {
+      trueres = true;
+    }
 
-				if (replacement && normr > tolb / mp)
-				{
-					trueres = true;
-				}
+    // Residual replacement?
+    if (trueres && normr < normb) {
+      r = b - A * x;
+      trueres = false;
+    }
 
-				//Residual replacement?
-				if (trueres && normr < normb)
-				{
-					r = b - A * x;
-					trueres = false;
-					replacements++;
-				}
+    // Smoothing:
+    if (smoothing) {
+      t = r_s - r;
+      Scalar gamma = t.dot(r_s) / t.stableNorm();
+      r_s = r_s - gamma * t;
+      x_s = x_s - gamma * (x_s - x);
+      normr = r_s.stableNorm();
+    }
 
-				//Smoothing:
-				if (smoothing)
-				{
-					t = r_s - r;
-					Scalar gamma = t.dot(r_s) /t.norm();
-					r_s = r_s - gamma * t;
-					x_s = x_s - gamma * (x_s - x);
-					normr = r_s.norm();
-				}
+    iter++;
 
-				iter++;
+  }  // end while
 
-			}//end while
+  if (smoothing) {
+    x = x_s;
+  }
+  relres = normr / normb;
+  return true;
+}
 
-			if (smoothing)
-			{
-				x = x_s;
-			}
-			relres=normr/normb;
-			return true;
-		}
+}  // namespace internal
 
-	}  // namespace internal
+template <typename MatrixType_, typename Preconditioner_ = DiagonalPreconditioner<typename MatrixType_::Scalar> >
+class IDRS;
 
-	template <typename _MatrixType, typename _Preconditioner = DiagonalPreconditioner<typename _MatrixType::Scalar> >
-	class IDRS;
+namespace internal {
 
-	namespace internal
-	{
-
-		template <typename _MatrixType, typename _Preconditioner>
-		struct traits<Eigen::IDRS<_MatrixType, _Preconditioner> >
-		{
-			typedef _MatrixType MatrixType;
-			typedef _Preconditioner Preconditioner;
-		};
-
-	}  // namespace internal
+template <typename MatrixType_, typename Preconditioner_>
+struct traits<Eigen::IDRS<MatrixType_, Preconditioner_> > {
+  typedef MatrixType_ MatrixType;
+  typedef Preconditioner_ Preconditioner;
+};
 
+}  // namespace internal
 
 /** \ingroup IterativeLinearSolvers_Module
-  * \brief The Induced Dimension Reduction method (IDR(s)) is a short-recurrences Krylov method for sparse square problems.
-  *
-  * This class allows to solve for A.x = b sparse linear problems. The vectors x and b can be either dense or sparse.
-  * he Induced Dimension Reduction method, IDR(), is a robust and efficient short-recurrence Krylov subspace method for
-  * solving large nonsymmetric systems of linear equations.
-  *
-  * For indefinite systems IDR(S) outperforms both BiCGStab and BiCGStab(L). Additionally, IDR(S) can handle matrices
-  * with complex eigenvalues more efficiently than BiCGStab.
-  *
-  * Many problems that do not converge for BiCGSTAB converge for IDR(s) (for larger values of s). And if both methods 
-  * converge the convergence for IDR(s) is typically much faster for difficult systems (for example indefinite problems). 
-  *
-  * IDR(s) is a limited memory finite termination method. In exact arithmetic it converges in at most N+N/s iterations,
-  * with N the system size.  It uses a fixed number of 4+3s vector. In comparison, BiCGSTAB terminates in 2N iterations 
-  * and uses 7 vectors. GMRES terminates in at most N iterations, and uses I+3 vectors, with I the number of iterations. 
-  * Restarting GMRES limits the memory consumption, but destroys the finite termination property.
-  *
-  * \tparam _MatrixType the type of the sparse matrix A, can be a dense or a sparse matrix.
-  * \tparam _Preconditioner the type of the preconditioner. Default is DiagonalPreconditioner
-  *
-  * \implsparsesolverconcept
-  *
-  * The maximal number of iterations and tolerance value can be controlled via the setMaxIterations()
-  * and setTolerance() methods. The defaults are the size of the problem for the maximal number of iterations
-  * and NumTraits<Scalar>::epsilon() for the tolerance.
-  *
-  * The tolerance corresponds to the relative residual error: |Ax-b|/|b|
-  *
-  * \b Performance: when using sparse matrices, best performance is achied for a row-major sparse matrix format.
-  * Moreover, in this case multi-threading can be exploited if the user code is compiled with OpenMP enabled.
-  * See \ref TopicMultiThreading for details.
-  *
-  * By default the iterations start with x=0 as an initial guess of the solution.
-  * One can control the start using the solveWithGuess() method.
-  *
-  * IDR(s) can also be used in a matrix-free context, see the following \link MatrixfreeSolverExample example \endlink.
-  *
-  * \sa class SimplicialCholesky, DiagonalPreconditioner, IdentityPreconditioner
+ * \brief The Induced Dimension Reduction method (IDR(s)) is a short-recurrences Krylov method for sparse square
+ * problems.
+ *
+ * This class allows to solve for A.x = b sparse linear problems. The vectors x and b can be either dense or sparse.
+ * he Induced Dimension Reduction method, IDR(), is a robust and efficient short-recurrence Krylov subspace method for
+ * solving large nonsymmetric systems of linear equations.
+ *
+ * For indefinite systems IDR(S) outperforms both BiCGStab and BiCGStab(L). Additionally, IDR(S) can handle matrices
+ * with complex eigenvalues more efficiently than BiCGStab.
+ *
+ * Many problems that do not converge for BiCGSTAB converge for IDR(s) (for larger values of s). And if both methods
+ * converge the convergence for IDR(s) is typically much faster for difficult systems (for example indefinite problems).
+ *
+ * IDR(s) is a limited memory finite termination method. In exact arithmetic it converges in at most N+N/s iterations,
+ * with N the system size.  It uses a fixed number of 4+3s vector. In comparison, BiCGSTAB terminates in 2N iterations
+ * and uses 7 vectors. GMRES terminates in at most N iterations, and uses I+3 vectors, with I the number of iterations.
+ * Restarting GMRES limits the memory consumption, but destroys the finite termination property.
+ *
+ * \tparam MatrixType_ the type of the sparse matrix A, can be a dense or a sparse matrix.
+ * \tparam Preconditioner_ the type of the preconditioner. Default is DiagonalPreconditioner
+ *
+ * \implsparsesolverconcept
+ *
+ * The maximal number of iterations and tolerance value can be controlled via the setMaxIterations()
+ * and setTolerance() methods. The defaults are the size of the problem for the maximal number of iterations
+ * and NumTraits<Scalar>::epsilon() for the tolerance.
+ *
+ * The tolerance corresponds to the relative residual error: |Ax-b|/|b|
+ *
+ * \b Performance: when using sparse matrices, best performance is achied for a row-major sparse matrix format.
+ * Moreover, in this case multi-threading can be exploited if the user code is compiled with OpenMP enabled.
+ * See \ref TopicMultiThreading for details.
+ *
+ * By default the iterations start with x=0 as an initial guess of the solution.
+ * One can control the start using the solveWithGuess() method.
+ *
+ * IDR(s) can also be used in a matrix-free context, see the following \link MatrixfreeSolverExample example \endlink.
+ *
+ * \sa class SimplicialCholesky, DiagonalPreconditioner, IdentityPreconditioner
+ */
+template <typename MatrixType_, typename Preconditioner_>
+class IDRS : public IterativeSolverBase<IDRS<MatrixType_, Preconditioner_> > {
+ public:
+  typedef MatrixType_ MatrixType;
+  typedef typename MatrixType::Scalar Scalar;
+  typedef typename MatrixType::RealScalar RealScalar;
+  typedef Preconditioner_ Preconditioner;
+
+ private:
+  typedef IterativeSolverBase<IDRS> Base;
+  using Base::m_error;
+  using Base::m_info;
+  using Base::m_isInitialized;
+  using Base::m_iterations;
+  using Base::matrix;
+  Index m_S;
+  bool m_smoothing;
+  RealScalar m_angle;
+  bool m_residual;
+
+ public:
+  /** Default constructor. */
+  IDRS() : m_S(4), m_smoothing(false), m_angle(RealScalar(0.7)), m_residual(false) {}
+
+  /**     Initialize the solver with matrix \a A for further \c Ax=b solving.
+
+          This constructor is a shortcut for the default constructor followed
+          by a call to compute().
+
+          \warning this class stores a reference to the matrix A as well as some
+          precomputed values that depend on it. Therefore, if \a A is changed
+          this class becomes invalid. Call compute() to update it with the new
+          matrix A, or modify a copy of A.
   */
-	template <typename _MatrixType, typename _Preconditioner>
-	class IDRS : public IterativeSolverBase<IDRS<_MatrixType, _Preconditioner> >
-	{
+  template <typename MatrixDerived>
+  explicit IDRS(const EigenBase<MatrixDerived>& A)
+      : Base(A.derived()), m_S(4), m_smoothing(false), m_angle(RealScalar(0.7)), m_residual(false) {}
 
-		public:
-			typedef _MatrixType MatrixType;
-			typedef typename MatrixType::Scalar Scalar;
-			typedef typename MatrixType::RealScalar RealScalar;
-			typedef _Preconditioner Preconditioner;
+  /** \internal */
+  /**     Loops over the number of columns of b and does the following:
+                  1. sets the tolerance and maxIterations
+                  2. Calls the function that has the core solver routine
+  */
+  template <typename Rhs, typename Dest>
+  void _solve_vector_with_guess_impl(const Rhs& b, Dest& x) const {
+    m_iterations = Base::maxIterations();
+    m_error = Base::m_tolerance;
 
-		private:
-			typedef IterativeSolverBase<IDRS> Base;
-			using Base::m_error;
-			using Base::m_info;
-			using Base::m_isInitialized;
-			using Base::m_iterations;
-			using Base::matrix;
-			Index m_S;
-			bool m_smoothing;
-			RealScalar m_angle;
-			bool m_residual;
+    bool ret = internal::idrs(matrix(), b, x, Base::m_preconditioner, m_iterations, m_error, m_S, m_smoothing, m_angle,
+                              m_residual);
 
-		public:
-			/** Default constructor. */
-			IDRS(): m_S(4), m_smoothing(false), m_angle(RealScalar(0.7)), m_residual(false) {}
+    m_info = (!ret) ? NumericalIssue : m_error <= Base::m_tolerance ? Success : NoConvergence;
+  }
 
-			/**     Initialize the solver with matrix \a A for further \c Ax=b solving.
+  /** Sets the parameter S, indicating the dimension of the shadow space. Default is 4*/
+  void setS(Index S) {
+    if (S < 1) {
+      S = 4;
+    }
 
-			        This constructor is a shortcut for the default constructor followed
-			        by a call to compute().
+    m_S = S;
+  }
 
-			        \warning this class stores a reference to the matrix A as well as some
-			        precomputed values that depend on it. Therefore, if \a A is changed
-			        this class becomes invalid. Call compute() to update it with the new
-			        matrix A, or modify a copy of A.
-			*/
-			template <typename MatrixDerived>
-			explicit IDRS(const EigenBase<MatrixDerived>& A) : Base(A.derived()), m_S(4), m_smoothing(false),
-															   m_angle(RealScalar(0.7)), m_residual(false) {}
+  /** Switches off and on smoothing.
+  Residual smoothing results in monotonically decreasing residual norms at
+  the expense of two extra vectors of storage and a few extra vector
+  operations. Although monotonic decrease of the residual norms is a
+  desirable property, the rate of convergence of the unsmoothed process and
+  the smoothed process is basically the same. Default is off */
+  void setSmoothing(bool smoothing) { m_smoothing = smoothing; }
 
+  /** The angle must be a real scalar. In IDR(s), a value for the
+  iteration parameter omega must be chosen in every s+1th step. The most
+  natural choice is to select a value to minimize the norm of the next residual.
+  This corresponds to the parameter omega = 0. In practice, this may lead to
+  values of omega that are so small that the other iteration parameters
+  cannot be computed with sufficient accuracy. In such cases it is better to
+  increase the value of omega sufficiently such that a compromise is reached
+  between accurate computations and reduction of the residual norm. The
+  parameter angle =0.7 (”maintaining the convergence strategy”)
+  results in such a compromise. */
+  void setAngle(RealScalar angle) { m_angle = angle; }
 
-			/** \internal */
-			/**     Loops over the number of columns of b and does the following:
-			                1. sets the tolerence and maxIterations
-			                2. Calls the function that has the core solver routine
-			*/
-			template <typename Rhs, typename Dest>
-			void _solve_vector_with_guess_impl(const Rhs& b, Dest& x) const
-			{
-				m_iterations = Base::maxIterations();
-				m_error = Base::m_tolerance;
-
-				bool ret = internal::idrs(matrix(), b, x, Base::m_preconditioner, m_iterations, m_error, m_S,m_smoothing,m_angle,m_residual);
-
-				m_info = (!ret) ? NumericalIssue : m_error <= Base::m_tolerance ? Success : NoConvergence;
-			}
-
-			/** Sets the parameter S, indicating the dimension of the shadow space. Default is 4*/
-			void setS(Index S)
-			{
-				if (S < 1)
-				{
-					S = 4;
-				}
-
-				m_S = S;
-			}
-
-			/** Switches off and on smoothing.
-			Residual smoothing results in monotonically decreasing residual norms at
-			the expense of two extra vectors of storage and a few extra vector
-			operations. Although monotonic decrease of the residual norms is a
-			desirable property, the rate of convergence of the unsmoothed process and
-			the smoothed process is basically the same. Default is off */
-			void setSmoothing(bool smoothing)
-			{
-				m_smoothing=smoothing;
-			}
-
-			/** The angle must be a real scalar. In IDR(s), a value for the
-			iteration parameter omega must be chosen in every s+1th step. The most
-			natural choice is to select a value to minimize the norm of the next residual.
-			This corresponds to the parameter omega = 0. In practice, this may lead to
-			values of omega that are so small that the other iteration parameters
-			cannot be computed with sufficient accuracy. In such cases it is better to
-			increase the value of omega sufficiently such that a compromise is reached
-			between accurate computations and reduction of the residual norm. The
-			parameter angle =0.7 (”maintaining the convergence strategy”)
-			results in such a compromise. */
-			void setAngle(RealScalar angle)
-			{
-				m_angle=angle;
-			}
-
-			/** The parameter replace is a logical that determines whether a
-			residual replacement strategy is employed to increase the accuracy of the
-			solution. */
-			void setResidualUpdate(bool update)
-			{
-				m_residual=update;
-			}
-
-	};
+  /** The parameter replace is a logical that determines whether a
+  residual replacement strategy is employed to increase the accuracy of the
+  solution. */
+  void setResidualUpdate(bool update) { m_residual = update; }
+};
 
 }  // namespace Eigen
 
diff --git a/libs/eigen/unsupported/Eigen/src/IterativeSolvers/IDRSTABL.h b/libs/eigen/unsupported/Eigen/src/IterativeSolvers/IDRSTABL.h
new file mode 100644
index 0000000..712c171
--- /dev/null
+++ b/libs/eigen/unsupported/Eigen/src/IterativeSolvers/IDRSTABL.h
@@ -0,0 +1,476 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2020 Chris Schoutrop <c.e.m.schoutrop@tue.nl>
+// Copyright (C) 2020 Mischa Senders <m.j.senders@student.tue.nl>
+// Copyright (C) 2020 Lex Kuijpers <l.kuijpers@student.tue.nl>
+// Copyright (C) 2020 Jens Wehner <j.wehner@esciencecenter.nl>
+// Copyright (C) 2020 Jan van Dijk <j.v.dijk@tue.nl>
+// Copyright (C) 2020 Adithya Vijaykumar
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+/*
+
+The IDR(S)Stab(L) method is a combination of IDR(S) and BiCGStab(L)
+
+This implementation of IDRSTABL is based on
+1. Aihara, K., Abe, K., & Ishiwata, E. (2014). A variant of IDRstab with
+reliable update strategies for solving sparse linear systems. Journal of
+Computational and Applied Mathematics, 259, 244-258.
+   doi:10.1016/j.cam.2013.08.028
+                2. Aihara, K., Abe, K., & Ishiwata, E. (2015). Preconditioned
+IDRSTABL Algorithms for Solving Nonsymmetric Linear Systems. International
+Journal of Applied Mathematics, 45(3).
+                3. Saad, Y. (2003). Iterative Methods for Sparse Linear Systems:
+Second Edition. Philadelphia, PA: SIAM.
+                4. Sonneveld, P., & Van Gijzen, M. B. (2009). IDR(s): A Family
+of Simple and Fast Algorithms for Solving Large Nonsymmetric Systems of Linear
+Equations. SIAM Journal on Scientific Computing, 31(2), 1035-1062.
+   doi:10.1137/070685804
+                5. Sonneveld, P. (2012). On the convergence behavior of IDR (s)
+and related methods. SIAM Journal on Scientific Computing, 34(5), A2576-A2598.
+
+    Right-preconditioning based on Ref. 3 is implemented here.
+*/
+
+#ifndef EIGEN_IDRSTABL_H
+#define EIGEN_IDRSTABL_H
+
+namespace Eigen {
+
+namespace internal {
+
+template <typename MatrixType, typename Rhs, typename Dest, typename Preconditioner>
+bool idrstabl(const MatrixType &mat, const Rhs &rhs, Dest &x, const Preconditioner &precond, Index &iters,
+              typename Dest::RealScalar &tol_error, Index L, Index S) {
+  /*
+    Setup and type definitions.
+  */
+  using numext::abs;
+  using numext::sqrt;
+  typedef typename Dest::Scalar Scalar;
+  typedef typename Dest::RealScalar RealScalar;
+  typedef Matrix<Scalar, Dynamic, 1> VectorType;
+  typedef Matrix<Scalar, Dynamic, Dynamic, ColMajor> DenseMatrixType;
+
+  const Index N = x.rows();
+
+  Index k = 0;  // Iteration counter
+  const Index maxIters = iters;
+
+  const RealScalar rhs_norm = rhs.stableNorm();
+  const RealScalar tol = tol_error * rhs_norm;
+
+  if (rhs_norm == 0) {
+    /*
+      If b==0, then the exact solution is x=0.
+      rhs_norm is needed for other calculations anyways, this exit is a freebie.
+    */
+    x.setZero();
+    tol_error = 0.0;
+    return true;
+  }
+  // Construct decomposition objects beforehand.
+  FullPivLU<DenseMatrixType> lu_solver;
+
+  if (S >= N || L >= N) {
+    /*
+      The matrix is very small, or the choice of L and S is very poor
+      in that case solving directly will be best.
+    */
+    lu_solver.compute(DenseMatrixType(mat));
+    x = lu_solver.solve(rhs);
+    tol_error = (rhs - mat * x).stableNorm() / rhs_norm;
+    return true;
+  }
+
+  // Define maximum sizes to prevent any reallocation later on.
+  DenseMatrixType u(N, L + 1);
+  DenseMatrixType r(N, L + 1);
+
+  DenseMatrixType V(N * (L + 1), S);
+
+  VectorType alpha(S);
+  VectorType gamma(L);
+  VectorType update(N);
+
+  /*
+    Main IDRSTABL algorithm
+  */
+  // Set up the initial residual
+  VectorType x0 = x;
+  r.col(0) = rhs - mat * x;
+  x.setZero();  // The final solution will be x0+x
+
+  tol_error = r.col(0).stableNorm();
+
+  // FOM = Full orthogonalisation method
+  DenseMatrixType h_FOM = DenseMatrixType::Zero(S, S - 1);
+
+  // Construct an initial U matrix of size N x S
+  DenseMatrixType U(N * (L + 1), S);
+  for (Index col_index = 0; col_index < S; ++col_index) {
+    // Arnoldi-like process to generate a set of orthogonal vectors spanning
+    // {u,A*u,A*A*u,...,A^(S-1)*u}. This construction can be combined with the
+    // Full Orthogonalization Method (FOM) from Ref.3 to provide a possible
+    // early exit with no additional MV.
+    if (col_index != 0) {
+      /*
+      Modified Gram-Schmidt strategy:
+      */
+      VectorType w = mat * precond.solve(u.col(0));
+      for (Index i = 0; i < col_index; ++i) {
+        auto v = U.col(i).head(N);
+        h_FOM(i, col_index - 1) = v.dot(w);
+        w -= h_FOM(i, col_index - 1) * v;
+      }
+      u.col(0) = w;
+      h_FOM(col_index, col_index - 1) = u.col(0).stableNorm();
+
+      if (abs(h_FOM(col_index, col_index - 1)) != RealScalar(0)) {
+        /*
+        This only happens if u is NOT exactly zero. In case it is exactly zero
+        it would imply that that this u has no component in the direction of the
+        current residual.
+
+        By then setting u to zero it will not contribute any further (as it
+        should). Whereas attempting to normalize results in division by zero.
+
+        Such cases occur if:
+        1. The basis of dimension <S is sufficient to exactly solve the linear
+        system. I.e. the current residual is in span{r,Ar,...A^{m-1}r}, where
+        (m-1)<=S.
+        2. Two vectors vectors generated from r, Ar,... are (numerically)
+        parallel.
+
+        In case 1, the exact solution to the system can be obtained from the
+        "Full Orthogonalization Method" (Algorithm 6.4 in the book of Saad),
+        without any additional MV.
+
+        Contrary to what one would suspect, the comparison with ==0.0 for
+        floating-point types is intended here. Any arbritary non-zero u is fine
+        to continue, however if u contains either NaN or Inf the algorithm will
+        break down.
+        */
+        u.col(0) /= h_FOM(col_index, col_index - 1);
+      }
+    } else {
+      u.col(0) = r.col(0);
+      u.col(0).normalize();
+    }
+
+    U.col(col_index).head(N) = u.col(0);
+  }
+
+  if (S > 1) {
+    // Check for early FOM exit.
+    Scalar beta = r.col(0).stableNorm();
+    VectorType e1 = VectorType::Zero(S - 1);
+    e1(0) = beta;
+    lu_solver.compute(h_FOM.topLeftCorner(S - 1, S - 1));
+    VectorType y = lu_solver.solve(e1);
+    VectorType x2 = x + U.topLeftCorner(N, S - 1) * y;
+
+    // Using proposition 6.7 in Saad, one MV can be saved to calculate the
+    // residual
+    RealScalar FOM_residual = (h_FOM(S - 1, S - 2) * y(S - 2) * U.col(S - 1).head(N)).stableNorm();
+
+    if (FOM_residual < tol) {
+      // Exit, the FOM algorithm was already accurate enough
+      iters = k;
+      // Convert back to the unpreconditioned solution
+      x = precond.solve(x2);
+      // x contains the updates to x0, add those back to obtain the solution
+      x += x0;
+      tol_error = FOM_residual / rhs_norm;
+      return true;
+    }
+  }
+
+  /*
+    Select an initial (N x S) matrix R0.
+    1. Generate random R0, orthonormalize the result.
+    2. This results in R0, however to save memory and compute we only need the
+    adjoint of R0. This is given by the matrix R_T.\ Additionally, the matrix
+    (mat.adjoint()*R_tilde).adjoint()=R_tilde.adjoint()*mat by the
+    anti-distributivity property of the adjoint. This results in AR_T, which is
+    constant if R_T does not have to be regenerated and can be precomputed.
+    Based on reference 4, this has zero probability in exact arithmetic.
+  */
+
+  // Original IDRSTABL and Kensuke choose S random vectors:
+  const HouseholderQR<DenseMatrixType> qr(DenseMatrixType::Random(N, S));
+  DenseMatrixType R_T = (qr.householderQ() * DenseMatrixType::Identity(N, S)).adjoint();
+  DenseMatrixType AR_T = DenseMatrixType(R_T * mat);
+
+  // Pre-allocate sigma.
+  DenseMatrixType sigma(S, S);
+
+  bool reset_while = false;  // Should the while loop be reset for some reason?
+
+  while (k < maxIters) {
+    for (Index j = 1; j <= L; ++j) {
+      /*
+        The IDR Step
+      */
+      // Construction of the sigma-matrix, and the decomposition of sigma.
+      for (Index i = 0; i < S; ++i) {
+        sigma.col(i).noalias() = AR_T * precond.solve(U.block(N * (j - 1), i, N, 1));
+      }
+
+      lu_solver.compute(sigma);
+      // Obtain the update coefficients alpha
+      if (j == 1) {
+        // alpha=inverse(sigma)*(R_T*r_0);
+        alpha.noalias() = lu_solver.solve(R_T * r.col(0));
+      } else {
+        // alpha=inverse(sigma)*(AR_T*r_{j-2})
+        alpha.noalias() = lu_solver.solve(AR_T * precond.solve(r.col(j - 2)));
+      }
+
+      // Obtain new solution and residual from this update
+      update.noalias() = U.topRows(N) * alpha;
+      r.col(0) -= mat * precond.solve(update);
+      x += update;
+
+      for (Index i = 1; i <= j - 2; ++i) {
+        // This only affects the case L>2
+        r.col(i) -= U.block(N * (i + 1), 0, N, S) * alpha;
+      }
+      if (j > 1) {
+        // r=[r;A*r_{j-2}]
+        r.col(j - 1).noalias() = mat * precond.solve(r.col(j - 2));
+      }
+      tol_error = r.col(0).stableNorm();
+
+      if (tol_error < tol) {
+        // If at this point the algorithm has converged, exit.
+        reset_while = true;
+        break;
+      }
+
+      bool break_normalization = false;
+      for (Index q = 1; q <= S; ++q) {
+        if (q == 1) {
+          // u = r;
+          u.leftCols(j + 1) = r.leftCols(j + 1);
+        } else {
+          // u=[u_1;u_2;...;u_j]
+          u.leftCols(j) = u.middleCols(1, j);
+        }
+
+        // Obtain the update coefficients beta implicitly
+        // beta=lu_sigma.solve(AR_T * u.block(N * (j - 1), 0, N, 1)
+        u.reshaped().head(u.rows() * j) -= U.topRows(N * j) * lu_solver.solve(AR_T * precond.solve(u.col(j - 1)));
+
+        // u=[u;Au_{j-1}]
+        u.col(j).noalias() = mat * precond.solve(u.col(j - 1));
+
+        // Orthonormalize u_j to the columns of V_j(:,1:q-1)
+        if (q > 1) {
+          /*
+          Modified Gram-Schmidt-like procedure to make u orthogonal to the
+          columns of V from Ref. 1.
+
+          The vector mu from Ref. 1 is obtained implicitly:
+          mu=V.block(N * j, 0, N, q - 1).adjoint() * u.block(N * j, 0, N, 1).
+          */
+          for (Index i = 0; i <= q - 2; ++i) {
+            auto v = V.col(i).segment(N * j, N);
+            Scalar h = v.squaredNorm();
+            h = v.dot(u.col(j)) / h;
+            u.reshaped().head(u.rows() * (j + 1)) -= h * V.block(0, i, N * (j + 1), 1);
+          }
+        }
+        // Normalize u and assign to a column of V
+        Scalar normalization_constant = u.col(j).stableNorm();
+        //  If u is exactly zero, this will lead to a NaN. Small, non-zero u is
+        //  fine.
+        if (normalization_constant == RealScalar(0.0)) {
+          break_normalization = true;
+          break;
+        } else {
+          u.leftCols(j + 1) /= normalization_constant;
+        }
+
+        V.block(0, q - 1, N * (j + 1), 1).noalias() = u.reshaped().head(u.rows() * (j + 1));
+      }
+
+      if (break_normalization == false) {
+        U = V;
+      }
+    }
+    if (reset_while) {
+      break;
+    }
+
+    // r=[r;mat*r_{L-1}]
+    r.col(L).noalias() = mat * precond.solve(r.col(L - 1));
+
+    /*
+            The polynomial step
+    */
+    ColPivHouseholderQR<DenseMatrixType> qr_solver(r.rightCols(L));
+    gamma.noalias() = qr_solver.solve(r.col(0));
+
+    // Update solution and residual using the "minimized residual coefficients"
+    update.noalias() = r.leftCols(L) * gamma;
+    x += update;
+    r.col(0) -= mat * precond.solve(update);
+
+    // Update iteration info
+    ++k;
+    tol_error = r.col(0).stableNorm();
+
+    if (tol_error < tol) {
+      // Slightly early exit by moving the criterion before the update of U,
+      // after the main while loop the result of that calculation would not be
+      // needed.
+      break;
+    }
+
+    /*
+    U=U0-sum(gamma_j*U_j)
+    Consider the first iteration. Then U only contains U0, so at the start of
+    the while-loop U should be U0. Therefore only the first N rows of U have to
+    be updated.
+    */
+    for (Index i = 1; i <= L; ++i) {
+      U.topRows(N) -= U.block(N * i, 0, N, S) * gamma(i - 1);
+    }
+  }
+
+  /*
+          Exit after the while loop terminated.
+  */
+  iters = k;
+  // Convert back to the unpreconditioned solution
+  x = precond.solve(x);
+  // x contains the updates to x0, add those back to obtain the solution
+  x += x0;
+  tol_error = tol_error / rhs_norm;
+  return true;
+}
+
+}  // namespace internal
+
+template <typename MatrixType_, typename Preconditioner_ = DiagonalPreconditioner<typename MatrixType_::Scalar>>
+class IDRSTABL;
+
+namespace internal {
+
+template <typename MatrixType_, typename Preconditioner_>
+struct traits<IDRSTABL<MatrixType_, Preconditioner_>> {
+  typedef MatrixType_ MatrixType;
+  typedef Preconditioner_ Preconditioner;
+};
+
+}  // namespace internal
+
+/** \ingroup IterativeLinearSolvers_Module
+ * \brief The IDR(s)STAB(l) is a combination of IDR(s) and BiCGSTAB(l). It is a
+ * short-recurrences Krylov method for sparse square problems. It can outperform
+ * both IDR(s) and BiCGSTAB(l). IDR(s)STAB(l) generally closely follows the
+ * optimal GMRES convergence in terms of the number of Matrix-Vector products.
+ * However, without the increasing cost per iteration of GMRES. IDR(s)STAB(l) is
+ * suitable for both indefinite systems and systems with complex eigenvalues.
+ *
+ * This class allows solving for A.x = b sparse linear problems. The vectors x
+ * and b can be either dense or sparse.
+ *
+ * \tparam MatrixType_ the type of the sparse matrix A, can be a dense or a
+ * sparse matrix. \tparam Preconditioner_ the type of the preconditioner.
+ * Default is DiagonalPreconditioner
+ *
+ * \implsparsesolverconcept
+ *
+ * The maximum number of iterations and tolerance value can be controlled via
+ * the setMaxIterations() and setTolerance() methods. The defaults are the size
+ * of the problem for the maximum number of iterations and
+ * NumTraits<Scalar>::epsilon() for the tolerance.
+ *
+ * The tolerance is the maximum relative residual error: |Ax-b|/|b| for which
+ * the linear system is considered solved.
+ *
+ * \b Performance: When using sparse matrices, best performance is achieved for
+ * a row-major sparse matrix format. Moreover, in this case multi-threading can
+ * be exploited if the user code is compiled with OpenMP enabled. See \ref
+ * TopicMultiThreading for details.
+ *
+ * By default the iterations start with x=0 as an initial guess of the solution.
+ * One can control the start using the solveWithGuess() method.
+ *
+ * IDR(s)STAB(l) can also be used in a matrix-free context, see the following
+ * \link MatrixfreeSolverExample example \endlink.
+ *
+ * \sa class SimplicialCholesky, DiagonalPreconditioner, IdentityPreconditioner
+ */
+
+template <typename MatrixType_, typename Preconditioner_>
+class IDRSTABL : public IterativeSolverBase<IDRSTABL<MatrixType_, Preconditioner_>> {
+  typedef IterativeSolverBase<IDRSTABL> Base;
+  using Base::m_error;
+  using Base::m_info;
+  using Base::m_isInitialized;
+  using Base::m_iterations;
+  using Base::matrix;
+  Index m_L;
+  Index m_S;
+
+ public:
+  typedef MatrixType_ MatrixType;
+  typedef typename MatrixType::Scalar Scalar;
+  typedef typename MatrixType::RealScalar RealScalar;
+  typedef Preconditioner_ Preconditioner;
+
+ public:
+  /** Default constructor. */
+  IDRSTABL() : m_L(2), m_S(4) {}
+
+  /**   Initialize the solver with matrix \a A for further \c Ax=b solving.
+
+  This constructor is a shortcut for the default constructor followed
+  by a call to compute().
+
+  \warning this class stores a reference to the matrix A as well as some
+  precomputed values that depend on it. Therefore, if \a A is changed
+  this class becomes invalid. Call compute() to update it with the new
+  matrix A, or modify a copy of A.
+          */
+  template <typename MatrixDerived>
+  explicit IDRSTABL(const EigenBase<MatrixDerived> &A) : Base(A.derived()), m_L(2), m_S(4) {}
+
+  /** \internal */
+  /**     Loops over the number of columns of b and does the following:
+                                  1. sets the tolerance and maxIterations
+                                  2. Calls the function that has the core solver
+     routine
+  */
+  template <typename Rhs, typename Dest>
+  void _solve_vector_with_guess_impl(const Rhs &b, Dest &x) const {
+    m_iterations = Base::maxIterations();
+    m_error = Base::m_tolerance;
+    bool ret = internal::idrstabl(matrix(), b, x, Base::m_preconditioner, m_iterations, m_error, m_L, m_S);
+
+    m_info = (!ret) ? NumericalIssue : m_error <= 10 * Base::m_tolerance ? Success : NoConvergence;
+  }
+
+  /** Sets the parameter L, indicating the amount of minimize residual steps are
+   * used. */
+  void setL(Index L) {
+    eigen_assert(L >= 1 && "L needs to be positive");
+    m_L = L;
+  }
+  /** Sets the parameter S, indicating the dimension of the shadow residual
+   * space.. */
+  void setS(Index S) {
+    eigen_assert(S >= 1 && "S needs to be positive");
+    m_S = S;
+  }
+};
+
+}  // namespace Eigen
+
+#endif /* EIGEN_IDRSTABL_H */
diff --git a/libs/eigen/unsupported/Eigen/src/IterativeSolvers/IncompleteLU.h b/libs/eigen/unsupported/Eigen/src/IterativeSolvers/IncompleteLU.h
index 7d08c35..3bf564c 100644
--- a/libs/eigen/unsupported/Eigen/src/IterativeSolvers/IncompleteLU.h
+++ b/libs/eigen/unsupported/Eigen/src/IterativeSolvers/IncompleteLU.h
@@ -10,16 +10,18 @@
 #ifndef EIGEN_INCOMPLETE_LU_H
 #define EIGEN_INCOMPLETE_LU_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
-template <typename _Scalar>
-class IncompleteLU : public SparseSolverBase<IncompleteLU<_Scalar> >
+template <typename Scalar_>
+class IncompleteLU : public SparseSolverBase<IncompleteLU<Scalar_> >
 {
   protected:
-    typedef SparseSolverBase<IncompleteLU<_Scalar> > Base;
+    typedef SparseSolverBase<IncompleteLU<Scalar_> > Base;
     using Base::m_isInitialized;
     
-    typedef _Scalar Scalar;
+    typedef Scalar_ Scalar;
     typedef Matrix<Scalar,Dynamic,1> Vector;
     typedef typename Vector::Index Index;
     typedef SparseMatrix<Scalar,RowMajor> FactorType;
diff --git a/libs/eigen/unsupported/Eigen/src/IterativeSolvers/InternalHeaderCheck.h b/libs/eigen/unsupported/Eigen/src/IterativeSolvers/InternalHeaderCheck.h
new file mode 100644
index 0000000..679c7fa
--- /dev/null
+++ b/libs/eigen/unsupported/Eigen/src/IterativeSolvers/InternalHeaderCheck.h
@@ -0,0 +1,3 @@
+#ifndef EIGEN_ITERATIVE_SOLVERS_MODULE_H
+#error "Please include unsupported/Eigen/IterativeSolvers instead of including headers inside the src directory directly."
+#endif
diff --git a/libs/eigen/unsupported/Eigen/src/IterativeSolvers/IterationController.h b/libs/eigen/unsupported/Eigen/src/IterativeSolvers/IterationController.h
index a116e09..8003ea2 100644
--- a/libs/eigen/unsupported/Eigen/src/IterativeSolvers/IterationController.h
+++ b/libs/eigen/unsupported/Eigen/src/IterativeSolvers/IterationController.h
@@ -58,6 +58,8 @@
 #ifndef EIGEN_ITERATION_CONTROLLER_H
 #define EIGEN_ITERATION_CONTROLLER_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 /** \ingroup IterativeLinearSolvers_Module
diff --git a/libs/eigen/unsupported/Eigen/src/IterativeSolvers/MINRES.h b/libs/eigen/unsupported/Eigen/src/IterativeSolvers/MINRES.h
index 5db454d..2282777 100644
--- a/libs/eigen/unsupported/Eigen/src/IterativeSolvers/MINRES.h
+++ b/libs/eigen/unsupported/Eigen/src/IterativeSolvers/MINRES.h
@@ -10,10 +10,12 @@
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 
-#ifndef EIGEN_MINRES_H_
-#define EIGEN_MINRES_H_
+#ifndef EIGEN_MINRES_H
+#define EIGEN_MINRES_H
 
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
     
     namespace internal {
@@ -138,17 +140,17 @@ namespace Eigen {
         
     }
     
-    template< typename _MatrixType, int _UpLo=Lower,
-    typename _Preconditioner = IdentityPreconditioner>
+    template< typename MatrixType_, int UpLo_=Lower,
+    typename Preconditioner_ = IdentityPreconditioner>
     class MINRES;
     
     namespace internal {
         
-        template< typename _MatrixType, int _UpLo, typename _Preconditioner>
-        struct traits<MINRES<_MatrixType,_UpLo,_Preconditioner> >
+        template< typename MatrixType_, int UpLo_, typename Preconditioner_>
+        struct traits<MINRES<MatrixType_,UpLo_,Preconditioner_> >
         {
-            typedef _MatrixType MatrixType;
-            typedef _Preconditioner Preconditioner;
+            typedef MatrixType_ MatrixType;
+            typedef Preconditioner_ Preconditioner;
         };
         
     }
@@ -160,10 +162,10 @@ namespace Eigen {
      * of Paige and Saunders (1975). The sparse matrix A must be symmetric (possibly indefinite).
      * The vectors x and b can be either dense or sparse.
      *
-     * \tparam _MatrixType the type of the sparse matrix A, can be a dense or a sparse matrix.
-     * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower,
+     * \tparam MatrixType_ the type of the sparse matrix A, can be a dense or a sparse matrix.
+     * \tparam UpLo_ the triangular part that will be used for the computations. It can be Lower,
      *               Upper, or Lower|Upper in which the full matrix entries will be considered. Default is Lower.
-     * \tparam _Preconditioner the type of the preconditioner. Default is DiagonalPreconditioner
+     * \tparam Preconditioner_ the type of the preconditioner. Default is DiagonalPreconditioner
      *
      * The maximal number of iterations and tolerance value can be controlled via the setMaxIterations()
      * and setTolerance() methods. The defaults are the size of the problem for the maximal number of iterations
@@ -191,8 +193,8 @@ namespace Eigen {
      *
      * \sa class ConjugateGradient, BiCGSTAB, SimplicialCholesky, DiagonalPreconditioner, IdentityPreconditioner
      */
-    template< typename _MatrixType, int _UpLo, typename _Preconditioner>
-    class MINRES : public IterativeSolverBase<MINRES<_MatrixType,_UpLo,_Preconditioner> >
+    template< typename MatrixType_, int UpLo_, typename Preconditioner_>
+    class MINRES : public IterativeSolverBase<MINRES<MatrixType_,UpLo_,Preconditioner_> >
     {
         
         typedef IterativeSolverBase<MINRES> Base;
@@ -203,12 +205,12 @@ namespace Eigen {
         using Base::m_isInitialized;
     public:
         using Base::_solve_impl;
-        typedef _MatrixType MatrixType;
+        typedef MatrixType_ MatrixType;
         typedef typename MatrixType::Scalar Scalar;
         typedef typename MatrixType::RealScalar RealScalar;
-        typedef _Preconditioner Preconditioner;
+        typedef Preconditioner_ Preconditioner;
         
-        enum {UpLo = _UpLo};
+        enum {UpLo = UpLo_};
         
     public:
         
@@ -243,12 +245,12 @@ namespace Eigen {
                               &&  (!MatrixType::IsRowMajor)
                               &&  (!NumTraits<Scalar>::IsComplex)
             };
-            typedef typename internal::conditional<TransposeInput,Transpose<const ActualMatrixType>, ActualMatrixType const&>::type RowMajorWrapper;
-            EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(MatrixWrapper::MatrixFree,UpLo==(Lower|Upper)),MATRIX_FREE_CONJUGATE_GRADIENT_IS_COMPATIBLE_WITH_UPPER_UNION_LOWER_MODE_ONLY);
-            typedef typename internal::conditional<UpLo==(Lower|Upper),
+            typedef std::conditional_t<TransposeInput,Transpose<const ActualMatrixType>, ActualMatrixType const&> RowMajorWrapper;
+            EIGEN_STATIC_ASSERT(internal::check_implication(MatrixWrapper::MatrixFree, UpLo==(Lower|Upper)),MATRIX_FREE_CONJUGATE_GRADIENT_IS_COMPATIBLE_WITH_UPPER_UNION_LOWER_MODE_ONLY);
+            typedef std::conditional_t<UpLo==(Lower|Upper),
                                                   RowMajorWrapper,
                                                   typename MatrixWrapper::template ConstSelfAdjointViewReturnType<UpLo>::Type
-                                            >::type SelfAdjointWrapper;
+                                            > SelfAdjointWrapper;
 
             m_iterations = Base::maxIterations();
             m_error = Base::m_tolerance;
diff --git a/libs/eigen/unsupported/Eigen/src/IterativeSolvers/Scaling.h b/libs/eigen/unsupported/Eigen/src/IterativeSolvers/Scaling.h
index 9b3eb53..9400e81 100644
--- a/libs/eigen/unsupported/Eigen/src/IterativeSolvers/Scaling.h
+++ b/libs/eigen/unsupported/Eigen/src/IterativeSolvers/Scaling.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_ITERSCALING_H
 #define EIGEN_ITERSCALING_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /**
@@ -38,17 +40,17 @@ namespace Eigen {
   * x = scal.RightScaling().cwiseProduct(x); 
   * \endcode
   * 
-  * \tparam _MatrixType the type of the matrix. It should be a real square sparsematrix
+  * \tparam MatrixType_ the type of the matrix. It should be a real square sparsematrix
   * 
   * References : D. Ruiz and B. Ucar, A Symmetry Preserving Algorithm for Matrix Scaling, INRIA Research report RR-7552
   * 
   * \sa \ref IncompleteLUT 
   */
-template<typename _MatrixType>
+template<typename MatrixType_>
 class IterScaling
 {
   public:
-    typedef _MatrixType MatrixType; 
+    typedef MatrixType_ MatrixType;
     typedef typename MatrixType::Scalar Scalar;
     typedef typename MatrixType::Index Index;
     
diff --git a/libs/eigen/unsupported/Eigen/src/KroneckerProduct/InternalHeaderCheck.h b/libs/eigen/unsupported/Eigen/src/KroneckerProduct/InternalHeaderCheck.h
new file mode 100644
index 0000000..3db2edc
--- /dev/null
+++ b/libs/eigen/unsupported/Eigen/src/KroneckerProduct/InternalHeaderCheck.h
@@ -0,0 +1,3 @@
+#ifndef EIGEN_KRONECKER_PRODUCT_MODULE_H
+#error "Please include unsupported/Eigen/KroneckerProduct instead of including headers inside the src directory directly."
+#endif
diff --git a/libs/eigen/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h b/libs/eigen/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h
index 6a9b0be..01a75ef 100644
--- a/libs/eigen/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h
+++ b/libs/eigen/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h
@@ -12,6 +12,8 @@
 #ifndef KRONECKER_TENSOR_PRODUCT_H
 #define KRONECKER_TENSOR_PRODUCT_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /*!
@@ -152,10 +154,10 @@ void KroneckerProductSparse<Lhs,Rhs>::evalTo(Dest& dst) const
   
   // 1 - evaluate the operands if needed:
   typedef typename internal::nested_eval<Lhs,Dynamic>::type Lhs1;
-  typedef typename internal::remove_all<Lhs1>::type Lhs1Cleaned;
+  typedef internal::remove_all_t<Lhs1> Lhs1Cleaned;
   const Lhs1 lhs1(m_A);
   typedef typename internal::nested_eval<Rhs,Dynamic>::type Rhs1;
-  typedef typename internal::remove_all<Rhs1>::type Rhs1Cleaned;
+  typedef internal::remove_all_t<Rhs1> Rhs1Cleaned;
   const Rhs1 rhs1(m_B);
     
   // 2 - construct respective iterators
@@ -198,30 +200,30 @@ void KroneckerProductSparse<Lhs,Rhs>::evalTo(Dest& dst) const
 
 namespace internal {
 
-template<typename _Lhs, typename _Rhs>
-struct traits<KroneckerProduct<_Lhs,_Rhs> >
+template<typename Lhs_, typename Rhs_>
+struct traits<KroneckerProduct<Lhs_,Rhs_> >
 {
-  typedef typename remove_all<_Lhs>::type Lhs;
-  typedef typename remove_all<_Rhs>::type Rhs;
+  typedef remove_all_t<Lhs_> Lhs;
+  typedef remove_all_t<Rhs_> Rhs;
   typedef typename ScalarBinaryOpTraits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType Scalar;
   typedef typename promote_index_type<typename Lhs::StorageIndex, typename Rhs::StorageIndex>::type StorageIndex;
 
   enum {
-    Rows = size_at_compile_time<traits<Lhs>::RowsAtCompileTime, traits<Rhs>::RowsAtCompileTime>::ret,
-    Cols = size_at_compile_time<traits<Lhs>::ColsAtCompileTime, traits<Rhs>::ColsAtCompileTime>::ret,
-    MaxRows = size_at_compile_time<traits<Lhs>::MaxRowsAtCompileTime, traits<Rhs>::MaxRowsAtCompileTime>::ret,
-    MaxCols = size_at_compile_time<traits<Lhs>::MaxColsAtCompileTime, traits<Rhs>::MaxColsAtCompileTime>::ret
+    Rows = size_at_compile_time(traits<Lhs>::RowsAtCompileTime, traits<Rhs>::RowsAtCompileTime),
+    Cols = size_at_compile_time(traits<Lhs>::ColsAtCompileTime, traits<Rhs>::ColsAtCompileTime),
+    MaxRows = size_at_compile_time(traits<Lhs>::MaxRowsAtCompileTime, traits<Rhs>::MaxRowsAtCompileTime),
+    MaxCols = size_at_compile_time(traits<Lhs>::MaxColsAtCompileTime, traits<Rhs>::MaxColsAtCompileTime)
   };
 
   typedef Matrix<Scalar,Rows,Cols> ReturnType;
 };
 
-template<typename _Lhs, typename _Rhs>
-struct traits<KroneckerProductSparse<_Lhs,_Rhs> >
+template<typename Lhs_, typename Rhs_>
+struct traits<KroneckerProductSparse<Lhs_,Rhs_> >
 {
   typedef MatrixXpr XprKind;
-  typedef typename remove_all<_Lhs>::type Lhs;
-  typedef typename remove_all<_Rhs>::type Rhs;
+  typedef remove_all_t<Lhs_> Lhs;
+  typedef remove_all_t<Rhs_> Rhs;
   typedef typename ScalarBinaryOpTraits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType Scalar;
   typedef typename cwise_promote_storage_type<typename traits<Lhs>::StorageKind, typename traits<Rhs>::StorageKind, scalar_product_op<typename Lhs::Scalar, typename Rhs::Scalar> >::ret StorageKind;
   typedef typename promote_index_type<typename Lhs::StorageIndex, typename Rhs::StorageIndex>::type StorageIndex;
@@ -230,10 +232,10 @@ struct traits<KroneckerProductSparse<_Lhs,_Rhs> >
     LhsFlags = Lhs::Flags,
     RhsFlags = Rhs::Flags,
 
-    RowsAtCompileTime = size_at_compile_time<traits<Lhs>::RowsAtCompileTime, traits<Rhs>::RowsAtCompileTime>::ret,
-    ColsAtCompileTime = size_at_compile_time<traits<Lhs>::ColsAtCompileTime, traits<Rhs>::ColsAtCompileTime>::ret,
-    MaxRowsAtCompileTime = size_at_compile_time<traits<Lhs>::MaxRowsAtCompileTime, traits<Rhs>::MaxRowsAtCompileTime>::ret,
-    MaxColsAtCompileTime = size_at_compile_time<traits<Lhs>::MaxColsAtCompileTime, traits<Rhs>::MaxColsAtCompileTime>::ret,
+    RowsAtCompileTime = size_at_compile_time(traits<Lhs>::RowsAtCompileTime, traits<Rhs>::RowsAtCompileTime),
+    ColsAtCompileTime = size_at_compile_time(traits<Lhs>::ColsAtCompileTime, traits<Rhs>::ColsAtCompileTime),
+    MaxRowsAtCompileTime = size_at_compile_time(traits<Lhs>::MaxRowsAtCompileTime, traits<Rhs>::MaxRowsAtCompileTime),
+    MaxColsAtCompileTime = size_at_compile_time(traits<Lhs>::MaxColsAtCompileTime, traits<Rhs>::MaxColsAtCompileTime),
 
     EvalToRowMajor = (int(LhsFlags) & int(RhsFlags) & RowMajorBit),
     RemovedBits = ~(EvalToRowMajor ? 0 : RowMajorBit),
diff --git a/libs/eigen/unsupported/Eigen/src/LevenbergMarquardt/InternalHeaderCheck.h b/libs/eigen/unsupported/Eigen/src/LevenbergMarquardt/InternalHeaderCheck.h
new file mode 100644
index 0000000..bc76f4e
--- /dev/null
+++ b/libs/eigen/unsupported/Eigen/src/LevenbergMarquardt/InternalHeaderCheck.h
@@ -0,0 +1,3 @@
+#ifndef EIGEN_LEVENBERGMARQUARDT_MODULE_H
+#error "Please include unsupported/Eigen/LevenbergMarquardt instead of including headers inside the src directory directly."
+#endif
diff --git a/libs/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMcovar.h b/libs/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMcovar.h
index b75bea2..344c22e 100644
--- a/libs/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMcovar.h
+++ b/libs/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMcovar.h
@@ -12,6 +12,8 @@
 #ifndef EIGEN_LMCOVAR_H
 #define EIGEN_LMCOVAR_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
diff --git a/libs/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMonestep.h b/libs/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMonestep.h
index 25b32ec..92cb6f6 100644
--- a/libs/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMonestep.h
+++ b/libs/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMonestep.h
@@ -14,6 +14,8 @@
 #ifndef EIGEN_LMONESTEP_H
 #define EIGEN_LMONESTEP_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 template<typename FunctorType>
diff --git a/libs/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMpar.h b/libs/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMpar.h
index 9a48365..74acc9a 100644
--- a/libs/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMpar.h
+++ b/libs/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMpar.h
@@ -12,6 +12,8 @@
 #ifndef EIGEN_LMPAR_H
 #define EIGEN_LMPAR_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
diff --git a/libs/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMqrsolv.h b/libs/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMqrsolv.h
index 1234858..385b7b5 100644
--- a/libs/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMqrsolv.h
+++ b/libs/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMqrsolv.h
@@ -15,6 +15,8 @@
 #ifndef EIGEN_LMQRSOLV_H
 #define EIGEN_LMQRSOLV_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
@@ -98,9 +100,9 @@ void lmqrsolv(
     x = iPerm * wa; 
 }
 
-template <typename Scalar, int _Options, typename Index>
+template <typename Scalar, int Options_, typename Index>
 void lmqrsolv(
-  SparseMatrix<Scalar,_Options,Index> &s,
+  SparseMatrix<Scalar,Options_,Index> &s,
   const PermutationMatrix<Dynamic,Dynamic> &iPerm,
   const Matrix<Scalar,Dynamic,1> &diag,
   const Matrix<Scalar,Dynamic,1> &qtb,
diff --git a/libs/eigen/unsupported/Eigen/src/LevenbergMarquardt/LevenbergMarquardt.h b/libs/eigen/unsupported/Eigen/src/LevenbergMarquardt/LevenbergMarquardt.h
index 62561da..f823ae0 100644
--- a/libs/eigen/unsupported/Eigen/src/LevenbergMarquardt/LevenbergMarquardt.h
+++ b/libs/eigen/unsupported/Eigen/src/LevenbergMarquardt/LevenbergMarquardt.h
@@ -20,6 +20,8 @@
 #define EIGEN_LEVENBERGMARQUARDT_H
 
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 namespace LevenbergMarquardtSpace {
     enum Status {
@@ -38,10 +40,10 @@ namespace LevenbergMarquardtSpace {
     };
 }
 
-template <typename _Scalar, int NX=Dynamic, int NY=Dynamic>
+template <typename Scalar_, int NX=Dynamic, int NY=Dynamic>
 struct DenseFunctor
 {
-  typedef _Scalar Scalar;
+  typedef Scalar_ Scalar;
   enum {
     InputsAtCompileTime = NX,
     ValuesAtCompileTime = NY
@@ -65,11 +67,11 @@ struct DenseFunctor
   // should be defined in derived classes
 };
 
-template <typename _Scalar, typename _Index>
+template <typename Scalar_, typename Index_>
 struct SparseFunctor
 {
-  typedef _Scalar Scalar;
-  typedef _Index Index;
+  typedef Scalar_ Scalar;
+  typedef Index_ Index;
   typedef Matrix<Scalar,Dynamic,1> InputType;
   typedef Matrix<Scalar,Dynamic,1> ValueType;
   typedef SparseMatrix<Scalar, ColMajor, Index> JacobianType;
@@ -106,11 +108,11 @@ void lmpar2(const QRSolver &qr, const VectorType  &diag, const VectorType  &qtb,
   * Check wikipedia for more information.
   * http://en.wikipedia.org/wiki/Levenberg%E2%80%93Marquardt_algorithm
   */
-template<typename _FunctorType>
+template<typename FunctorType_>
 class LevenbergMarquardt : internal::no_assignment_operator
 {
   public:
-    typedef _FunctorType FunctorType;
+    typedef FunctorType_ FunctorType;
     typedef typename FunctorType::QRSolver QRSolver;
     typedef typename FunctorType::JacobianType JacobianType;
     typedef typename JacobianType::Scalar Scalar;
diff --git a/libs/eigen/unsupported/Eigen/src/MatrixFunctions/InternalHeaderCheck.h b/libs/eigen/unsupported/Eigen/src/MatrixFunctions/InternalHeaderCheck.h
new file mode 100644
index 0000000..b737f3c
--- /dev/null
+++ b/libs/eigen/unsupported/Eigen/src/MatrixFunctions/InternalHeaderCheck.h
@@ -0,0 +1,3 @@
+#ifndef EIGEN_MATRIX_FUNCTIONS_MODULE_H
+#error "Please include unsupported/Eigen/MatrixFunctions instead of including headers inside the src directory directly."
+#endif
diff --git a/libs/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h b/libs/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h
index 02284b0..ab6284a 100644
--- a/libs/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h
+++ b/libs/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h
@@ -13,6 +13,8 @@
 
 #include "StemFunction.h"
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 namespace internal {
 
diff --git a/libs/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h b/libs/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h
index cc12ab6..8076bf5 100644
--- a/libs/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h
+++ b/libs/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h
@@ -13,6 +13,8 @@
 #include "StemFunction.h"
 
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
@@ -494,7 +496,7 @@ template<typename Derived> class MatrixFunctionReturnValue
     inline void evalTo(ResultType& result) const
     {
       typedef typename internal::nested_eval<Derived, 10>::type NestedEvalType;
-      typedef typename internal::remove_all<NestedEvalType>::type NestedEvalTypeClean;
+      typedef internal::remove_all_t<NestedEvalType> NestedEvalTypeClean;
       typedef internal::traits<NestedEvalTypeClean> Traits;
       typedef std::complex<typename NumTraits<Scalar>::Real> ComplexScalar;
       typedef Matrix<ComplexScalar, Dynamic, Dynamic, 0, Traits::RowsAtCompileTime, Traits::ColsAtCompileTime> DynMatrixType;
diff --git a/libs/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h b/libs/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h
index e917013..2036fe4 100644
--- a/libs/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h
+++ b/libs/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h
@@ -11,6 +11,8 @@
 #ifndef EIGEN_MATRIX_LOGARITHM
 #define EIGEN_MATRIX_LOGARITHM
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal { 
@@ -134,7 +136,7 @@ void matrix_log_compute_pade(MatrixType& result, const MatrixType& T, int degree
   typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar;
   const int minPadeDegree = 3;
   const int maxPadeDegree = 11;
-  assert(degree >= minPadeDegree && degree <= maxPadeDegree);
+  eigen_assert(degree >= minPadeDegree && degree <= maxPadeDegree);
   // FIXME this creates float-conversion-warnings if these are enabled.
   // Either manually convert each value, or disable the warning locally
   const RealScalar nodes[][maxPadeDegree] = { 
@@ -332,7 +334,7 @@ public:
   inline void evalTo(ResultType& result) const
   {
     typedef typename internal::nested_eval<Derived, 10>::type DerivedEvalType;
-    typedef typename internal::remove_all<DerivedEvalType>::type DerivedEvalTypeClean;
+    typedef internal::remove_all_t<DerivedEvalType> DerivedEvalTypeClean;
     typedef internal::traits<DerivedEvalTypeClean> Traits;
     typedef std::complex<typename NumTraits<Scalar>::Real> ComplexScalar;
     typedef Matrix<ComplexScalar, Dynamic, Dynamic, 0, Traits::RowsAtCompileTime, Traits::ColsAtCompileTime> DynMatrixType;
diff --git a/libs/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixPower.h b/libs/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixPower.h
index d7672d7..4eb8651 100644
--- a/libs/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixPower.h
+++ b/libs/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixPower.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_MATRIX_POWER
 #define EIGEN_MATRIX_POWER
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 template<typename MatrixType> class MatrixPower;
diff --git a/libs/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixSquareRoot.h b/libs/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixSquareRoot.h
index e363e77..6c58cae 100644
--- a/libs/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixSquareRoot.h
+++ b/libs/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixSquareRoot.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_MATRIX_SQUARE_ROOT
 #define EIGEN_MATRIX_SQUARE_ROOT
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
@@ -336,7 +338,7 @@ template<typename Derived> class MatrixSquareRootReturnValue
     inline void evalTo(ResultType& result) const
     {
       typedef typename internal::nested_eval<Derived, 10>::type DerivedEvalType;
-      typedef typename internal::remove_all<DerivedEvalType>::type DerivedEvalTypeClean;
+      typedef internal::remove_all_t<DerivedEvalType> DerivedEvalTypeClean;
       DerivedEvalType tmp(m_src);
       internal::matrix_sqrt_compute<DerivedEvalTypeClean>::run(tmp, result);
     }
diff --git a/libs/eigen/unsupported/Eigen/src/MatrixFunctions/StemFunction.h b/libs/eigen/unsupported/Eigen/src/MatrixFunctions/StemFunction.h
index 7604df9..25a3ef2 100644
--- a/libs/eigen/unsupported/Eigen/src/MatrixFunctions/StemFunction.h
+++ b/libs/eigen/unsupported/Eigen/src/MatrixFunctions/StemFunction.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_STEM_FUNCTION
 #define EIGEN_STEM_FUNCTION
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
diff --git a/libs/eigen/unsupported/Eigen/src/MoreVectorization/InternalHeaderCheck.h b/libs/eigen/unsupported/Eigen/src/MoreVectorization/InternalHeaderCheck.h
new file mode 100644
index 0000000..3c294b3
--- /dev/null
+++ b/libs/eigen/unsupported/Eigen/src/MoreVectorization/InternalHeaderCheck.h
@@ -0,0 +1,3 @@
+#ifndef EIGEN_MOREVECTORIZATION_MODULE_H
+#error "Please include unsupported/Eigen/MoreVectorization instead of including headers inside the src directory directly."
+#endif
diff --git a/libs/eigen/unsupported/Eigen/src/MoreVectorization/MathFunctions.h b/libs/eigen/unsupported/Eigen/src/MoreVectorization/MathFunctions.h
index 63cb28d..9084298 100644
--- a/libs/eigen/unsupported/Eigen/src/MoreVectorization/MathFunctions.h
+++ b/libs/eigen/unsupported/Eigen/src/MoreVectorization/MathFunctions.h
@@ -11,6 +11,8 @@
 #ifndef EIGEN_MOREVECTORIZATION_MATHFUNCTIONS_H
 #define EIGEN_MOREVECTORIZATION_MATHFUNCTIONS_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
@@ -22,20 +24,20 @@ template<typename Packet> inline static Packet pasin(Packet a) { return std::asi
 
 template<> EIGEN_DONT_INLINE Packet4f pasin(Packet4f x)
 {
-  _EIGEN_DECLARE_CONST_Packet4f(half, 0.5);
-  _EIGEN_DECLARE_CONST_Packet4f(minus_half, -0.5);
-  _EIGEN_DECLARE_CONST_Packet4f(3half, 1.5);
+  EIGEN_DECLARE_CONST_Packet4f(half, 0.5);
+  EIGEN_DECLARE_CONST_Packet4f(minus_half, -0.5);
+  EIGEN_DECLARE_CONST_Packet4f(3half, 1.5);
 
-  _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(sign_mask, 0x80000000);
+  EIGEN_DECLARE_CONST_Packet4f_FROM_INT(sign_mask, 0x80000000);
 
-  _EIGEN_DECLARE_CONST_Packet4f(pi, 3.141592654);
-  _EIGEN_DECLARE_CONST_Packet4f(pi_over_2, 3.141592654*0.5);
+  EIGEN_DECLARE_CONST_Packet4f(pi, 3.141592654);
+  EIGEN_DECLARE_CONST_Packet4f(pi_over_2, 3.141592654*0.5);
 
-  _EIGEN_DECLARE_CONST_Packet4f(asin1, 4.2163199048E-2);
-  _EIGEN_DECLARE_CONST_Packet4f(asin2, 2.4181311049E-2);
-  _EIGEN_DECLARE_CONST_Packet4f(asin3, 4.5470025998E-2);
-  _EIGEN_DECLARE_CONST_Packet4f(asin4, 7.4953002686E-2);
-  _EIGEN_DECLARE_CONST_Packet4f(asin5, 1.6666752422E-1);
+  EIGEN_DECLARE_CONST_Packet4f(asin1, 4.2163199048E-2);
+  EIGEN_DECLARE_CONST_Packet4f(asin2, 2.4181311049E-2);
+  EIGEN_DECLARE_CONST_Packet4f(asin3, 4.5470025998E-2);
+  EIGEN_DECLARE_CONST_Packet4f(asin4, 7.4953002686E-2);
+  EIGEN_DECLARE_CONST_Packet4f(asin5, 1.6666752422E-1);
 
   Packet4f a = pabs(x);//got the absolute value
 
diff --git a/libs/eigen/unsupported/Eigen/src/NonLinearOptimization/HybridNonLinearSolver.h b/libs/eigen/unsupported/Eigen/src/NonLinearOptimization/HybridNonLinearSolver.h
index 07c5ef0..4c38426 100644
--- a/libs/eigen/unsupported/Eigen/src/NonLinearOptimization/HybridNonLinearSolver.h
+++ b/libs/eigen/unsupported/Eigen/src/NonLinearOptimization/HybridNonLinearSolver.h
@@ -13,6 +13,8 @@
 #ifndef EIGEN_HYBRIDNONLINEARSOLVER_H
 #define EIGEN_HYBRIDNONLINEARSOLVER_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace HybridNonLinearSolverSpace { 
@@ -428,7 +430,7 @@ HybridNonLinearSolver<FunctorType,Scalar>::solveNumericalDiffOneStep(FVectorType
     using std::sqrt;
     using std::abs;
     
-    assert(x.size()==n); // check the caller is not cheating us
+    eigen_assert(x.size()==n); // check the caller is not cheating us
 
     Index j;
     std::vector<JacobiRotation<Scalar> > v_givens(n), w_givens(n);
diff --git a/libs/eigen/unsupported/Eigen/src/NonLinearOptimization/InternalHeaderCheck.h b/libs/eigen/unsupported/Eigen/src/NonLinearOptimization/InternalHeaderCheck.h
new file mode 100644
index 0000000..5b2276c
--- /dev/null
+++ b/libs/eigen/unsupported/Eigen/src/NonLinearOptimization/InternalHeaderCheck.h
@@ -0,0 +1,3 @@
+#ifndef EIGEN_NONLINEAROPTIMIZATION_MODULE_H
+#error "Please include unsupported/Eigen/NonLinearOptimization instead of including headers inside the src directory directly."
+#endif
diff --git a/libs/eigen/unsupported/Eigen/src/NonLinearOptimization/LevenbergMarquardt.h b/libs/eigen/unsupported/Eigen/src/NonLinearOptimization/LevenbergMarquardt.h
index fe3b79c..a176c25 100644
--- a/libs/eigen/unsupported/Eigen/src/NonLinearOptimization/LevenbergMarquardt.h
+++ b/libs/eigen/unsupported/Eigen/src/NonLinearOptimization/LevenbergMarquardt.h
@@ -13,6 +13,8 @@
 #ifndef EIGEN_LEVENBERGMARQUARDT__H
 #define EIGEN_LEVENBERGMARQUARDT__H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace LevenbergMarquardtSpace {
diff --git a/libs/eigen/unsupported/Eigen/src/NonLinearOptimization/chkder.h b/libs/eigen/unsupported/Eigen/src/NonLinearOptimization/chkder.h
index db8ff7d..d0974a0 100644
--- a/libs/eigen/unsupported/Eigen/src/NonLinearOptimization/chkder.h
+++ b/libs/eigen/unsupported/Eigen/src/NonLinearOptimization/chkder.h
@@ -1,6 +1,8 @@
 #define chkder_log10e 0.43429448190325182765
 #define chkder_factor 100.
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
diff --git a/libs/eigen/unsupported/Eigen/src/NonLinearOptimization/covar.h b/libs/eigen/unsupported/Eigen/src/NonLinearOptimization/covar.h
index 68260d1..181c4f3 100644
--- a/libs/eigen/unsupported/Eigen/src/NonLinearOptimization/covar.h
+++ b/libs/eigen/unsupported/Eigen/src/NonLinearOptimization/covar.h
@@ -1,3 +1,5 @@
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
diff --git a/libs/eigen/unsupported/Eigen/src/NonLinearOptimization/dogleg.h b/libs/eigen/unsupported/Eigen/src/NonLinearOptimization/dogleg.h
index 80c5d27..7c170a2 100644
--- a/libs/eigen/unsupported/Eigen/src/NonLinearOptimization/dogleg.h
+++ b/libs/eigen/unsupported/Eigen/src/NonLinearOptimization/dogleg.h
@@ -1,3 +1,5 @@
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
diff --git a/libs/eigen/unsupported/Eigen/src/NonLinearOptimization/fdjac1.h b/libs/eigen/unsupported/Eigen/src/NonLinearOptimization/fdjac1.h
index bb7cf26..317b157 100644
--- a/libs/eigen/unsupported/Eigen/src/NonLinearOptimization/fdjac1.h
+++ b/libs/eigen/unsupported/Eigen/src/NonLinearOptimization/fdjac1.h
@@ -1,3 +1,5 @@
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
diff --git a/libs/eigen/unsupported/Eigen/src/NonLinearOptimization/lmpar.h b/libs/eigen/unsupported/Eigen/src/NonLinearOptimization/lmpar.h
index 4c17d4c..64af0fd 100644
--- a/libs/eigen/unsupported/Eigen/src/NonLinearOptimization/lmpar.h
+++ b/libs/eigen/unsupported/Eigen/src/NonLinearOptimization/lmpar.h
@@ -1,3 +1,5 @@
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
diff --git a/libs/eigen/unsupported/Eigen/src/NonLinearOptimization/qrsolv.h b/libs/eigen/unsupported/Eigen/src/NonLinearOptimization/qrsolv.h
index 4f2f560..1cdcd4e 100644
--- a/libs/eigen/unsupported/Eigen/src/NonLinearOptimization/qrsolv.h
+++ b/libs/eigen/unsupported/Eigen/src/NonLinearOptimization/qrsolv.h
@@ -1,3 +1,5 @@
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
diff --git a/libs/eigen/unsupported/Eigen/src/NonLinearOptimization/r1mpyq.h b/libs/eigen/unsupported/Eigen/src/NonLinearOptimization/r1mpyq.h
index 36ff700..37ef258 100644
--- a/libs/eigen/unsupported/Eigen/src/NonLinearOptimization/r1mpyq.h
+++ b/libs/eigen/unsupported/Eigen/src/NonLinearOptimization/r1mpyq.h
@@ -1,3 +1,5 @@
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
diff --git a/libs/eigen/unsupported/Eigen/src/NonLinearOptimization/r1updt.h b/libs/eigen/unsupported/Eigen/src/NonLinearOptimization/r1updt.h
index 09fc652..7b3a599 100644
--- a/libs/eigen/unsupported/Eigen/src/NonLinearOptimization/r1updt.h
+++ b/libs/eigen/unsupported/Eigen/src/NonLinearOptimization/r1updt.h
@@ -1,3 +1,5 @@
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
diff --git a/libs/eigen/unsupported/Eigen/src/NonLinearOptimization/rwupdt.h b/libs/eigen/unsupported/Eigen/src/NonLinearOptimization/rwupdt.h
index 6ebf856..f9ca504 100644
--- a/libs/eigen/unsupported/Eigen/src/NonLinearOptimization/rwupdt.h
+++ b/libs/eigen/unsupported/Eigen/src/NonLinearOptimization/rwupdt.h
@@ -1,3 +1,5 @@
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
diff --git a/libs/eigen/unsupported/Eigen/src/NumericalDiff/InternalHeaderCheck.h b/libs/eigen/unsupported/Eigen/src/NumericalDiff/InternalHeaderCheck.h
new file mode 100644
index 0000000..8c513d2
--- /dev/null
+++ b/libs/eigen/unsupported/Eigen/src/NumericalDiff/InternalHeaderCheck.h
@@ -0,0 +1,3 @@
+#ifndef EIGEN_NUMERICALDIFF_MODULE_H
+#error "Please include unsupported/Eigen/NumericalDiff instead of including headers inside the src directory directly."
+#endif
diff --git a/libs/eigen/unsupported/Eigen/src/NumericalDiff/NumericalDiff.h b/libs/eigen/unsupported/Eigen/src/NumericalDiff/NumericalDiff.h
index ea5d8bc..daa4210 100644
--- a/libs/eigen/unsupported/Eigen/src/NumericalDiff/NumericalDiff.h
+++ b/libs/eigen/unsupported/Eigen/src/NumericalDiff/NumericalDiff.h
@@ -13,6 +13,8 @@
 #ifndef EIGEN_NUMERICAL_DIFF_H
 #define EIGEN_NUMERICAL_DIFF_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 enum NumericalDiffMode {
@@ -32,11 +34,11 @@ enum NumericalDiffMode {
   *
   * Currently only "Forward" and "Central" scheme are implemented.
   */
-template<typename _Functor, NumericalDiffMode mode=Forward>
-class NumericalDiff : public _Functor
+template<typename Functor_, NumericalDiffMode mode=Forward>
+class NumericalDiff : public Functor_
 {
 public:
-    typedef _Functor Functor;
+    typedef Functor_ Functor;
     typedef typename Functor::Scalar Scalar;
     typedef typename Functor::InputType InputType;
     typedef typename Functor::ValueType ValueType;
diff --git a/libs/eigen/unsupported/Eigen/src/Polynomials/Companion.h b/libs/eigen/unsupported/Eigen/src/Polynomials/Companion.h
index 59a15b0..23c281a 100644
--- a/libs/eigen/unsupported/Eigen/src/Polynomials/Companion.h
+++ b/libs/eigen/unsupported/Eigen/src/Polynomials/Companion.h
@@ -14,6 +14,8 @@
 // * Eigen/Core
 // * Eigen/src/PolynomialSolver.h
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal {
@@ -29,32 +31,32 @@ struct decrement_if_fixed_size
 
 #endif
 
-template< typename _Scalar, int _Deg >
+template< typename Scalar_, int Deg_ >
 class companion
 {
   public:
-    EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_Deg==Dynamic ? Dynamic : _Deg)
+    EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar_,Deg_==Dynamic ? Dynamic : Deg_)
 
     enum {
-      Deg = _Deg,
+      Deg = Deg_,
       Deg_1=decrement_if_fixed_size<Deg>::ret
     };
 
-    typedef _Scalar                                Scalar;
+    typedef Scalar_                                Scalar;
     typedef typename NumTraits<Scalar>::Real       RealScalar;
     typedef Matrix<Scalar, Deg, 1>                 RightColumn;
     //typedef DiagonalMatrix< Scalar, Deg_1, Deg_1 > BottomLeftDiagonal;
     typedef Matrix<Scalar, Deg_1, 1>               BottomLeftDiagonal;
 
     typedef Matrix<Scalar, Deg, Deg>               DenseCompanionMatrixType;
-    typedef Matrix< Scalar, _Deg, Deg_1 >          LeftBlock;
+    typedef Matrix< Scalar, Deg_, Deg_1 >          LeftBlock;
     typedef Matrix< Scalar, Deg_1, Deg_1 >         BottomLeftBlock;
     typedef Matrix< Scalar, 1, Deg_1 >             LeftBlockFirstRow;
 
     typedef DenseIndex Index;
 
   public:
-    EIGEN_STRONG_INLINE const _Scalar operator()(Index row, Index col ) const
+    EIGEN_STRONG_INLINE const Scalar_ operator()(Index row, Index col ) const
     {
       if( m_bl_diag.rows() > col )
       {
@@ -130,9 +132,9 @@ class companion
 
 
 
-template< typename _Scalar, int _Deg >
+template< typename Scalar_, int Deg_ >
 inline
-bool companion<_Scalar,_Deg>::balanced( RealScalar colNorm, RealScalar rowNorm,
+bool companion<Scalar_,Deg_>::balanced( RealScalar colNorm, RealScalar rowNorm,
     bool& isBalanced, RealScalar& colB, RealScalar& rowB )
 {
   if( RealScalar(0) == colNorm || RealScalar(0) == rowNorm 
@@ -184,9 +186,9 @@ bool companion<_Scalar,_Deg>::balanced( RealScalar colNorm, RealScalar rowNorm,
   }
 }
 
-template< typename _Scalar, int _Deg >
+template< typename Scalar_, int Deg_ >
 inline
-bool companion<_Scalar,_Deg>::balancedR( RealScalar colNorm, RealScalar rowNorm,
+bool companion<Scalar_,Deg_>::balancedR( RealScalar colNorm, RealScalar rowNorm,
     bool& isBalanced, RealScalar& colB, RealScalar& rowB )
 {
   if( RealScalar(0) == colNorm || RealScalar(0) == rowNorm ){ return true; }
@@ -197,7 +199,7 @@ bool companion<_Scalar,_Deg>::balancedR( RealScalar colNorm, RealScalar rowNorm,
      * of the row and column norm
      */
     const RealScalar q = colNorm/rowNorm;
-    if( !isApprox( q, _Scalar(1) ) )
+    if( !isApprox( q, Scalar_(1) ) )
     {
       rowB = sqrt( colNorm/rowNorm );
       colB = RealScalar(1)/rowB;
@@ -211,8 +213,8 @@ bool companion<_Scalar,_Deg>::balancedR( RealScalar colNorm, RealScalar rowNorm,
 }
 
 
-template< typename _Scalar, int _Deg >
-void companion<_Scalar,_Deg>::balance()
+template< typename Scalar_, int Deg_ >
+void companion<Scalar_,Deg_>::balance()
 {
   using std::abs;
   EIGEN_STATIC_ASSERT( Deg == Dynamic || 1 < Deg, YOU_MADE_A_PROGRAMMING_MISTAKE );
diff --git a/libs/eigen/unsupported/Eigen/src/Polynomials/InternalHeaderCheck.h b/libs/eigen/unsupported/Eigen/src/Polynomials/InternalHeaderCheck.h
new file mode 100644
index 0000000..b3aa50c
--- /dev/null
+++ b/libs/eigen/unsupported/Eigen/src/Polynomials/InternalHeaderCheck.h
@@ -0,0 +1,3 @@
+#ifndef EIGEN_POLYNOMIALS_MODULE_H
+#error "Please include unsupported/Eigen/Polynomials instead of including headers inside the src directory directly."
+#endif
diff --git a/libs/eigen/unsupported/Eigen/src/Polynomials/PolynomialSolver.h b/libs/eigen/unsupported/Eigen/src/Polynomials/PolynomialSolver.h
index 5e0ecbb..64cab12 100644
--- a/libs/eigen/unsupported/Eigen/src/Polynomials/PolynomialSolver.h
+++ b/libs/eigen/unsupported/Eigen/src/Polynomials/PolynomialSolver.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_POLYNOMIAL_SOLVER_H
 #define EIGEN_POLYNOMIAL_SOLVER_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 /** \ingroup Polynomials_Module
@@ -25,16 +27,16 @@ namespace Eigen {
  * It stores the set of roots as a vector of complexes.
  *
  */
-template< typename _Scalar, int _Deg >
+template< typename Scalar_, int Deg_ >
 class PolynomialSolverBase
 {
   public:
-    EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_Deg==Dynamic ? Dynamic : _Deg)
+    EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar_,Deg_==Dynamic ? Dynamic : Deg_)
 
-    typedef _Scalar                             Scalar;
+    typedef Scalar_                             Scalar;
     typedef typename NumTraits<Scalar>::Real    RealScalar;
     typedef std::complex<RealScalar>            RootType;
-    typedef Matrix<RootType,_Deg,1>             RootsType;
+    typedef Matrix<RootType,Deg_,1>             RootsType;
 
     typedef DenseIndex Index;
 
@@ -59,7 +61,7 @@ class PolynomialSolverBase
      * i.e. the real part of the complex roots that have an imaginary part which
      * absolute value is smaller than absImaginaryThreshold.
      * absImaginaryThreshold takes the dummy_precision associated
-     * with the _Scalar template parameter of the PolynomialSolver class as the default value.
+     * with the Scalar_ template parameter of the PolynomialSolver class as the default value.
      *
      * \param[out] bi_seq : the back insertion sequence (stl concept)
      * \param[in]  absImaginaryThreshold : the maximum bound of the imaginary part of a complex
@@ -200,7 +202,7 @@ class PolynomialSolverBase
      * A real root is defined as the real part of a complex root with absolute imaginary
      * part smallest than absImaginaryThreshold.
      * absImaginaryThreshold takes the dummy_precision associated
-     * with the _Scalar template parameter of the PolynomialSolver class as the default value.
+     * with the Scalar_ template parameter of the PolynomialSolver class as the default value.
      * If no real root is found the boolean hasArealRoot is set to false and the real part of
      * the root with smallest absolute imaginary part is returned instead.
      *
@@ -223,7 +225,7 @@ class PolynomialSolverBase
      * A real root is defined as the real part of a complex root with absolute imaginary
      * part smallest than absImaginaryThreshold.
      * absImaginaryThreshold takes the dummy_precision associated
-     * with the _Scalar template parameter of the PolynomialSolver class as the default value.
+     * with the Scalar_ template parameter of the PolynomialSolver class as the default value.
      * If no real root is found the boolean hasArealRoot is set to false and the real part of
      * the root with smallest absolute imaginary part is returned instead.
      *
@@ -246,7 +248,7 @@ class PolynomialSolverBase
      * A real root is defined as the real part of a complex root with absolute imaginary
      * part smallest than absImaginaryThreshold.
      * absImaginaryThreshold takes the dummy_precision associated
-     * with the _Scalar template parameter of the PolynomialSolver class as the default value.
+     * with the Scalar_ template parameter of the PolynomialSolver class as the default value.
      * If no real root is found the boolean hasArealRoot is set to false and the real part of
      * the root with smallest absolute imaginary part is returned instead.
      *
@@ -269,7 +271,7 @@ class PolynomialSolverBase
      * A real root is defined as the real part of a complex root with absolute imaginary
      * part smallest than absImaginaryThreshold.
      * absImaginaryThreshold takes the dummy_precision associated
-     * with the _Scalar template parameter of the PolynomialSolver class as the default value.
+     * with the Scalar_ template parameter of the PolynomialSolver class as the default value.
      * If no real root is found the boolean hasArealRoot is set to false and the real part of
      * the root with smallest absolute imaginary part is returned instead.
      *
@@ -306,9 +308,9 @@ class PolynomialSolverBase
   *
   * Computes the complex roots of a real polynomial.
   *
-  * \param _Scalar the scalar type, i.e., the type of the polynomial coefficients
-  * \param _Deg the degree of the polynomial, can be a compile time value or Dynamic.
-  *             Notice that the number of polynomial coefficients is _Deg+1.
+  * \param Scalar_ the scalar type, i.e., the type of the polynomial coefficients
+  * \param Deg_ the degree of the polynomial, can be a compile time value or Dynamic.
+  *             Notice that the number of polynomial coefficients is Deg_+1.
   *
   * This class implements a polynomial solver and provides convenient methods such as
   * - real roots,
@@ -327,20 +329,20 @@ class PolynomialSolverBase
   * However, almost always, correct accuracy is reached even in these cases for 64bit
   * (double) floating types and small polynomial degree (<20).
   */
-template<typename _Scalar, int _Deg>
-class PolynomialSolver : public PolynomialSolverBase<_Scalar,_Deg>
+template<typename Scalar_, int Deg_>
+class PolynomialSolver : public PolynomialSolverBase<Scalar_,Deg_>
 {
   public:
-    EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_Deg==Dynamic ? Dynamic : _Deg)
+    EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar_,Deg_==Dynamic ? Dynamic : Deg_)
 
-    typedef PolynomialSolverBase<_Scalar,_Deg>    PS_Base;
+    typedef PolynomialSolverBase<Scalar_,Deg_>    PS_Base;
     EIGEN_POLYNOMIAL_SOLVER_BASE_INHERITED_TYPES( PS_Base )
 
-    typedef Matrix<Scalar,_Deg,_Deg>                 CompanionMatrixType;
-    typedef typename internal::conditional<NumTraits<Scalar>::IsComplex,
-                                          ComplexEigenSolver<CompanionMatrixType>,
-                                          EigenSolver<CompanionMatrixType> >::type EigenSolverType;
-    typedef typename internal::conditional<NumTraits<Scalar>::IsComplex, Scalar, std::complex<Scalar> >::type ComplexScalar;
+    typedef Matrix<Scalar,Deg_,Deg_>                 CompanionMatrixType;
+    typedef std::conditional_t<NumTraits<Scalar>::IsComplex,
+                                    ComplexEigenSolver<CompanionMatrixType>,
+                                    EigenSolver<CompanionMatrixType> > EigenSolverType;
+    typedef std::conditional_t<NumTraits<Scalar>::IsComplex, Scalar, std::complex<Scalar> > ComplexScalar;
 
   public:
     /** Computes the complex roots of a new polynomial. */
@@ -351,7 +353,7 @@ class PolynomialSolver : public PolynomialSolverBase<_Scalar,_Deg>
       eigen_assert( poly.size() > 1 );
       if(poly.size() >  2 )
       {
-        internal::companion<Scalar,_Deg> companion( poly );
+        internal::companion<Scalar,Deg_> companion( poly );
         companion.balance();
         m_eigenSolver.compute( companion.denseMatrix() );
         m_roots = m_eigenSolver.eigenvalues();
@@ -395,11 +397,11 @@ class PolynomialSolver : public PolynomialSolverBase<_Scalar,_Deg>
 };
 
 
-template< typename _Scalar >
-class PolynomialSolver<_Scalar,1> : public PolynomialSolverBase<_Scalar,1>
+template< typename Scalar_ >
+class PolynomialSolver<Scalar_,1> : public PolynomialSolverBase<Scalar_,1>
 {
   public:
-    typedef PolynomialSolverBase<_Scalar,1>    PS_Base;
+    typedef PolynomialSolverBase<Scalar_,1>    PS_Base;
     EIGEN_POLYNOMIAL_SOLVER_BASE_INHERITED_TYPES( PS_Base )
 
   public:
diff --git a/libs/eigen/unsupported/Eigen/src/Polynomials/PolynomialUtils.h b/libs/eigen/unsupported/Eigen/src/Polynomials/PolynomialUtils.h
index 394e857..6af8613 100644
--- a/libs/eigen/unsupported/Eigen/src/Polynomials/PolynomialUtils.h
+++ b/libs/eigen/unsupported/Eigen/src/Polynomials/PolynomialUtils.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_POLYNOMIAL_UTILS_H
 #define EIGEN_POLYNOMIAL_UTILS_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 /** \ingroup Polynomials_Module
diff --git a/libs/eigen/unsupported/Eigen/src/Skyline/InternalHeaderCheck.h b/libs/eigen/unsupported/Eigen/src/Skyline/InternalHeaderCheck.h
new file mode 100644
index 0000000..bddf0b0
--- /dev/null
+++ b/libs/eigen/unsupported/Eigen/src/Skyline/InternalHeaderCheck.h
@@ -0,0 +1,3 @@
+#ifndef EIGEN_SKYLINE_MODULE_H
+#error "Please include unsupported/Eigen/Skyline instead of including headers inside the src directory directly."
+#endif
diff --git a/libs/eigen/unsupported/Eigen/src/Skyline/SkylineInplaceLU.h b/libs/eigen/unsupported/Eigen/src/Skyline/SkylineInplaceLU.h
index 6d0370d..ef90438 100644
--- a/libs/eigen/unsupported/Eigen/src/Skyline/SkylineInplaceLU.h
+++ b/libs/eigen/unsupported/Eigen/src/Skyline/SkylineInplaceLU.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_SKYLINEINPLACELU_H
 #define EIGEN_SKYLINEINPLACELU_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 /** \ingroup Skyline_Module
@@ -116,7 +118,7 @@ protected:
  * using the default algorithm.
  */
 template<typename MatrixType>
-//template<typename _Scalar>
+//template<typename Scalar_>
 void SkylineInplaceLU<MatrixType>::compute() {
     const size_t rows = m_lu.rows();
     const size_t cols = m_lu.cols();
diff --git a/libs/eigen/unsupported/Eigen/src/Skyline/SkylineMatrix.h b/libs/eigen/unsupported/Eigen/src/Skyline/SkylineMatrix.h
index 7c7eace..887bf83 100644
--- a/libs/eigen/unsupported/Eigen/src/Skyline/SkylineMatrix.h
+++ b/libs/eigen/unsupported/Eigen/src/Skyline/SkylineMatrix.h
@@ -13,6 +13,8 @@
 #include "SkylineStorage.h"
 #include "SkylineMatrixBase.h"
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 /** \ingroup Skyline_Module
@@ -24,16 +26,16 @@ namespace Eigen {
  * This class implements a skyline matrix using the very uncommon storage
  * scheme.
  *
- * \param _Scalar the scalar type, i.e. the type of the coefficients
- * \param _Options Union of bit flags controlling the storage scheme. Currently the only possibility
+ * \param Scalar_ the scalar type, i.e. the type of the coefficients
+ * \param Options_ Union of bit flags controlling the storage scheme. Currently the only possibility
  *                 is RowMajor. The default is 0 which means column-major.
  *
  *
  */
 namespace internal {
-template<typename _Scalar, int _Options>
-struct traits<SkylineMatrix<_Scalar, _Options> > {
-    typedef _Scalar Scalar;
+template<typename Scalar_, int Options_>
+struct traits<SkylineMatrix<Scalar_, Options_> > {
+    typedef Scalar_ Scalar;
     typedef Sparse StorageKind;
 
     enum {
@@ -41,15 +43,15 @@ struct traits<SkylineMatrix<_Scalar, _Options> > {
         ColsAtCompileTime = Dynamic,
         MaxRowsAtCompileTime = Dynamic,
         MaxColsAtCompileTime = Dynamic,
-        Flags = SkylineBit | _Options,
+        Flags = SkylineBit | Options_,
         CoeffReadCost = NumTraits<Scalar>::ReadCost,
     };
 };
 }
 
-template<typename _Scalar, int _Options>
+template<typename Scalar_, int Options_>
 class SkylineMatrix
-: public SkylineMatrixBase<SkylineMatrix<_Scalar, _Options> > {
+: public SkylineMatrixBase<SkylineMatrix<Scalar_, Options_> > {
 public:
     EIGEN_SKYLINE_GENERIC_PUBLIC_INTERFACE(SkylineMatrix)
     EIGEN_SKYLINE_INHERIT_ASSIGNMENT_OPERATOR(SkylineMatrix, +=)
@@ -375,8 +377,8 @@ public:
     /** Removes all non zeros */
     inline void setZero() {
         m_data.clear();
-        memset(m_colStartIndex, 0, (m_outerSize + 1) * sizeof (Index));
-        memset(m_rowStartIndex, 0, (m_outerSize + 1) * sizeof (Index));
+        std::fill_n(m_colStartIndex, m_outerSize + 1, Index(0));
+        std::fill_n(m_rowStartIndex, m_outerSize + 1, Index(0));
     }
 
     /** \returns the number of non zero coefficients */
@@ -435,7 +437,7 @@ public:
                     }
 
                     //zeros new data
-                    memset(this->_upperPtr() + start, 0, (bandIncrement - 1) * sizeof (Scalar));
+                    std::fill_n(this->_upperPtr() + start, bandIncrement - 1, Scalar(0));
 
                     return m_data.upper(m_colStartIndex[inner]);
                 } else {
@@ -466,7 +468,7 @@ public:
                     }
 
                     //zeros new data
-                    memset(this->_lowerPtr() + start, 0, (bandIncrement - 1) * sizeof (Scalar));
+                    std::fill_n(this->_lowerPtr() + start, bandIncrement - 1, Scalar(0));
                     return m_data.lower(m_rowStartIndex[outer]);
                 } else {
                     return m_data.lower(m_rowStartIndex[outer] + inner - (outer - m_data.lowerProfile(outer)));
@@ -493,7 +495,7 @@ public:
                     for (Index innerIdx = inner + 1; innerIdx < outerSize() + 1; innerIdx++) {
                         m_rowStartIndex[innerIdx] += bandIncrement;
                     }
-                    memset(this->_upperPtr() + m_rowStartIndex[inner] + previousProfile + 1, 0, (bandIncrement - 1) * sizeof (Scalar));
+                    std::fill_n(this->_upperPtr() + m_rowStartIndex[inner] + previousProfile + 1, bandIncrement - 1, Scalar(0));
                     return m_data.upper(m_rowStartIndex[inner] + m_data.upperProfile(inner));
                 } else {
                     return m_data.upper(m_rowStartIndex[inner] + (outer - inner));
@@ -520,7 +522,7 @@ public:
                     for (Index innerIdx = outer + 1; innerIdx < outerSize() + 1; innerIdx++) {
                         m_colStartIndex[innerIdx] += bandIncrement;
                     }
-                    memset(this->_lowerPtr() + m_colStartIndex[outer] + previousProfile + 1, 0, (bandIncrement - 1) * sizeof (Scalar));
+                    std::fill_n(this->_lowerPtr() + m_colStartIndex[outer] + previousProfile + 1, bandIncrement - 1, Scalar(0));
                     return m_data.lower(m_colStartIndex[outer] + m_data.lowerProfile(outer));
                 } else {
                     return m_data.lower(m_colStartIndex[outer] + (inner - outer));
@@ -619,8 +621,8 @@ public:
         m_data.clear();
 
         m_outerSize = diagSize;
-        memset(m_colStartIndex, 0, (cols + 1) * sizeof (Index));
-        memset(m_rowStartIndex, 0, (rows + 1) * sizeof (Index));
+        std::fill_n(m_colStartIndex, cols + 1, Index(0));
+        std::fill_n(m_rowStartIndex, rows + 1, Index(0));
     }
 
     void resizeNonZeros(Index size) {
@@ -731,15 +733,15 @@ public:
     Scalar sum() const;
 };
 
-template<typename Scalar, int _Options>
-class SkylineMatrix<Scalar, _Options>::InnerUpperIterator {
+template<typename Scalar, int Options_>
+class SkylineMatrix<Scalar, Options_>::InnerUpperIterator {
 public:
 
     InnerUpperIterator(const SkylineMatrix& mat, Index outer)
     : m_matrix(mat), m_outer(outer),
-    m_id(_Options == RowMajor ? mat.m_colStartIndex[outer] : mat.m_rowStartIndex[outer] + 1),
+    m_id(Options_ == RowMajor ? mat.m_colStartIndex[outer] : mat.m_rowStartIndex[outer] + 1),
     m_start(m_id),
-    m_end(_Options == RowMajor ? mat.m_colStartIndex[outer + 1] : mat.m_rowStartIndex[outer + 1] + 1) {
+    m_end(Options_ == RowMajor ? mat.m_colStartIndex[outer + 1] : mat.m_rowStartIndex[outer + 1] + 1) {
     }
 
     inline InnerUpperIterator & operator++() {
@@ -793,16 +795,16 @@ protected:
     const Index m_end;
 };
 
-template<typename Scalar, int _Options>
-class SkylineMatrix<Scalar, _Options>::InnerLowerIterator {
+template<typename Scalar, int Options_>
+class SkylineMatrix<Scalar, Options_>::InnerLowerIterator {
 public:
 
     InnerLowerIterator(const SkylineMatrix& mat, Index outer)
     : m_matrix(mat),
     m_outer(outer),
-    m_id(_Options == RowMajor ? mat.m_rowStartIndex[outer] : mat.m_colStartIndex[outer] + 1),
+    m_id(Options_ == RowMajor ? mat.m_rowStartIndex[outer] : mat.m_colStartIndex[outer] + 1),
     m_start(m_id),
-    m_end(_Options == RowMajor ? mat.m_rowStartIndex[outer + 1] : mat.m_colStartIndex[outer + 1] + 1) {
+    m_end(Options_ == RowMajor ? mat.m_rowStartIndex[outer + 1] : mat.m_colStartIndex[outer + 1] + 1) {
     }
 
     inline InnerLowerIterator & operator++() {
diff --git a/libs/eigen/unsupported/Eigen/src/Skyline/SkylineMatrixBase.h b/libs/eigen/unsupported/Eigen/src/Skyline/SkylineMatrixBase.h
index b0d5e10..86fd10d 100644
--- a/libs/eigen/unsupported/Eigen/src/Skyline/SkylineMatrixBase.h
+++ b/libs/eigen/unsupported/Eigen/src/Skyline/SkylineMatrixBase.h
@@ -12,6 +12,8 @@
 
 #include "SkylineUtil.h"
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /** \ingroup Skyline_Module
@@ -44,8 +46,7 @@ public:
          * \sa MatrixBase::rows(), MatrixBase::cols(), RowsAtCompileTime, SizeAtCompileTime */
 
 
-        SizeAtCompileTime = (internal::size_at_compile_time<internal::traits<Derived>::RowsAtCompileTime,
-        internal::traits<Derived>::ColsAtCompileTime>::ret),
+        SizeAtCompileTime = (internal::size_of_xpr_at_compile_time<Derived>::ret),
         /**< This is equal to the number of coefficients, i.e. the number of
          * rows times the number of columns, or to \a Dynamic if this is not
          * known at compile-time. \sa RowsAtCompileTime, ColsAtCompileTime */
@@ -53,8 +54,8 @@ public:
         MaxRowsAtCompileTime = RowsAtCompileTime,
         MaxColsAtCompileTime = ColsAtCompileTime,
 
-        MaxSizeAtCompileTime = (internal::size_at_compile_time<MaxRowsAtCompileTime,
-        MaxColsAtCompileTime>::ret),
+        MaxSizeAtCompileTime = (internal::size_at_compile_time(MaxRowsAtCompileTime,
+        MaxColsAtCompileTime)),
 
         IsVectorAtCompileTime = RowsAtCompileTime == 1 || ColsAtCompileTime == 1,
         /**< This is set to true if either the number of rows or the number of
@@ -85,8 +86,8 @@ public:
     typedef typename NumTraits<Scalar>::Real RealScalar;
 
     /** type of the equivalent square matrix */
-    typedef Matrix<Scalar, EIGEN_SIZE_MAX(RowsAtCompileTime, ColsAtCompileTime),
-                           EIGEN_SIZE_MAX(RowsAtCompileTime, ColsAtCompileTime) > SquareMatrixType;
+    typedef Matrix<Scalar, internal::max_size_prefer_dynamic(RowsAtCompileTime, ColsAtCompileTime),
+                           internal::max_size_prefer_dynamic(RowsAtCompileTime, ColsAtCompileTime) > SquareMatrixType;
 
     inline const Derived& derived() const {
         return *static_cast<const Derived*> (this);
diff --git a/libs/eigen/unsupported/Eigen/src/Skyline/SkylineProduct.h b/libs/eigen/unsupported/Eigen/src/Skyline/SkylineProduct.h
index d9eb814..03f4269 100644
--- a/libs/eigen/unsupported/Eigen/src/Skyline/SkylineProduct.h
+++ b/libs/eigen/unsupported/Eigen/src/Skyline/SkylineProduct.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_SKYLINEPRODUCT_H
 #define EIGEN_SKYLINEPRODUCT_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 template<typename Lhs, typename Rhs, int ProductMode>
@@ -23,22 +25,22 @@ struct SkylineProductReturnType {
 template<typename LhsNested, typename RhsNested, int ProductMode>
 struct internal::traits<SkylineProduct<LhsNested, RhsNested, ProductMode> > {
     // clean the nested types:
-    typedef typename internal::remove_all<LhsNested>::type _LhsNested;
-    typedef typename internal::remove_all<RhsNested>::type _RhsNested;
-    typedef typename _LhsNested::Scalar Scalar;
+    typedef internal::remove_all_t<LhsNested> LhsNested_;
+    typedef internal::remove_all_t<RhsNested> RhsNested_;
+    typedef typename LhsNested_::Scalar Scalar;
 
     enum {
-        LhsCoeffReadCost = _LhsNested::CoeffReadCost,
-        RhsCoeffReadCost = _RhsNested::CoeffReadCost,
-        LhsFlags = _LhsNested::Flags,
-        RhsFlags = _RhsNested::Flags,
+        LhsCoeffReadCost = LhsNested_::CoeffReadCost,
+        RhsCoeffReadCost = RhsNested_::CoeffReadCost,
+        LhsFlags = LhsNested_::Flags,
+        RhsFlags = RhsNested_::Flags,
 
-        RowsAtCompileTime = _LhsNested::RowsAtCompileTime,
-        ColsAtCompileTime = _RhsNested::ColsAtCompileTime,
-        InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(_LhsNested::ColsAtCompileTime, _RhsNested::RowsAtCompileTime),
+        RowsAtCompileTime = LhsNested_::RowsAtCompileTime,
+        ColsAtCompileTime = RhsNested_::ColsAtCompileTime,
+        InnerSize = internal::min_size_prefer_fixed(LhsNested_::ColsAtCompileTime, RhsNested_::RowsAtCompileTime),
 
-        MaxRowsAtCompileTime = _LhsNested::MaxRowsAtCompileTime,
-        MaxColsAtCompileTime = _RhsNested::MaxColsAtCompileTime,
+        MaxRowsAtCompileTime = LhsNested_::MaxRowsAtCompileTime,
+        MaxColsAtCompileTime = RhsNested_::MaxColsAtCompileTime,
 
         EvalToRowMajor = (RhsFlags & LhsFlags & RowMajorBit),
         ResultIsSkyline = ProductMode == SkylineTimeSkylineProduct,
@@ -52,9 +54,9 @@ struct internal::traits<SkylineProduct<LhsNested, RhsNested, ProductMode> > {
         CoeffReadCost = HugeCost
     };
 
-    typedef typename internal::conditional<ResultIsSkyline,
+    typedef std::conditional_t<ResultIsSkyline,
             SkylineMatrixBase<SkylineProduct<LhsNested, RhsNested, ProductMode> >,
-            MatrixBase<SkylineProduct<LhsNested, RhsNested, ProductMode> > >::type Base;
+            MatrixBase<SkylineProduct<LhsNested, RhsNested, ProductMode> > > Base;
 };
 
 namespace internal {
@@ -67,8 +69,8 @@ public:
 
 private:
 
-    typedef typename traits<SkylineProduct>::_LhsNested _LhsNested;
-    typedef typename traits<SkylineProduct>::_RhsNested _RhsNested;
+    typedef typename traits<SkylineProduct>::LhsNested_ LhsNested_;
+    typedef typename traits<SkylineProduct>::RhsNested_ RhsNested_;
 
 public:
 
@@ -78,11 +80,11 @@ public:
         eigen_assert(lhs.cols() == rhs.rows());
 
         enum {
-            ProductIsValid = _LhsNested::ColsAtCompileTime == Dynamic
-            || _RhsNested::RowsAtCompileTime == Dynamic
-            || int(_LhsNested::ColsAtCompileTime) == int(_RhsNested::RowsAtCompileTime),
-            AreVectors = _LhsNested::IsVectorAtCompileTime && _RhsNested::IsVectorAtCompileTime,
-            SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(_LhsNested, _RhsNested)
+            ProductIsValid = LhsNested_::ColsAtCompileTime == Dynamic
+            || RhsNested_::RowsAtCompileTime == Dynamic
+            || int(LhsNested_::ColsAtCompileTime) == int(RhsNested_::RowsAtCompileTime),
+            AreVectors = LhsNested_::IsVectorAtCompileTime && RhsNested_::IsVectorAtCompileTime,
+            SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(LhsNested_, RhsNested_)
         };
         // note to the lost user:
         //    * for a dot product use: v1.dot(v2)
@@ -102,11 +104,11 @@ public:
         return m_rhs.cols();
     }
 
-    EIGEN_STRONG_INLINE const _LhsNested& lhs() const {
+    EIGEN_STRONG_INLINE const LhsNested_& lhs() const {
         return m_lhs;
     }
 
-    EIGEN_STRONG_INLINE const _RhsNested& rhs() const {
+    EIGEN_STRONG_INLINE const RhsNested_& rhs() const {
         return m_rhs;
     }
 
@@ -120,17 +122,17 @@ protected:
 
 template<typename Lhs, typename Rhs, typename Dest>
 EIGEN_DONT_INLINE void skyline_row_major_time_dense_product(const Lhs& lhs, const Rhs& rhs, Dest& dst) {
-    typedef typename remove_all<Lhs>::type _Lhs;
-    typedef typename remove_all<Rhs>::type _Rhs;
+    typedef remove_all_t<Lhs> Lhs_;
+    typedef remove_all_t<Rhs> Rhs_;
     typedef typename traits<Lhs>::Scalar Scalar;
 
     enum {
-        LhsIsRowMajor = (_Lhs::Flags & RowMajorBit) == RowMajorBit,
-        LhsIsSelfAdjoint = (_Lhs::Flags & SelfAdjointBit) == SelfAdjointBit,
+        LhsIsRowMajor = (Lhs_::Flags & RowMajorBit) == RowMajorBit,
+        LhsIsSelfAdjoint = (Lhs_::Flags & SelfAdjointBit) == SelfAdjointBit,
         ProcessFirstHalf = LhsIsSelfAdjoint
-        && (((_Lhs::Flags & (UpperTriangularBit | LowerTriangularBit)) == 0)
-        || ((_Lhs::Flags & UpperTriangularBit) && !LhsIsRowMajor)
-        || ((_Lhs::Flags & LowerTriangularBit) && LhsIsRowMajor)),
+        && (((Lhs_::Flags & (UpperTriangularBit | LowerTriangularBit)) == 0)
+        || ((Lhs_::Flags & UpperTriangularBit) && !LhsIsRowMajor)
+        || ((Lhs_::Flags & LowerTriangularBit) && LhsIsRowMajor)),
         ProcessSecondHalf = LhsIsSelfAdjoint && (!ProcessFirstHalf)
     };
 
@@ -142,7 +144,7 @@ EIGEN_DONT_INLINE void skyline_row_major_time_dense_product(const Lhs& lhs, cons
     }
     //Use matrix lower triangular part
     for (Index row = 0; row < lhs.rows(); row++) {
-        typename _Lhs::InnerLowerIterator lIt(lhs, row);
+        typename Lhs_::InnerLowerIterator lIt(lhs, row);
         const Index stop = lIt.col() + lIt.size();
         for (Index col = 0; col < rhs.cols(); col++) {
 
@@ -162,7 +164,7 @@ EIGEN_DONT_INLINE void skyline_row_major_time_dense_product(const Lhs& lhs, cons
 
     //Use matrix upper triangular part
     for (Index lhscol = 0; lhscol < lhs.cols(); lhscol++) {
-        typename _Lhs::InnerUpperIterator uIt(lhs, lhscol);
+        typename Lhs_::InnerUpperIterator uIt(lhs, lhscol);
         const Index stop = uIt.size() + uIt.row();
         for (Index rhscol = 0; rhscol < rhs.cols(); rhscol++) {
 
@@ -183,17 +185,17 @@ EIGEN_DONT_INLINE void skyline_row_major_time_dense_product(const Lhs& lhs, cons
 
 template<typename Lhs, typename Rhs, typename Dest>
 EIGEN_DONT_INLINE void skyline_col_major_time_dense_product(const Lhs& lhs, const Rhs& rhs, Dest& dst) {
-    typedef typename remove_all<Lhs>::type _Lhs;
-    typedef typename remove_all<Rhs>::type _Rhs;
+    typedef remove_all_t<Lhs> Lhs_;
+    typedef remove_all_t<Rhs> Rhs_;
     typedef typename traits<Lhs>::Scalar Scalar;
 
     enum {
-        LhsIsRowMajor = (_Lhs::Flags & RowMajorBit) == RowMajorBit,
-        LhsIsSelfAdjoint = (_Lhs::Flags & SelfAdjointBit) == SelfAdjointBit,
+        LhsIsRowMajor = (Lhs_::Flags & RowMajorBit) == RowMajorBit,
+        LhsIsSelfAdjoint = (Lhs_::Flags & SelfAdjointBit) == SelfAdjointBit,
         ProcessFirstHalf = LhsIsSelfAdjoint
-        && (((_Lhs::Flags & (UpperTriangularBit | LowerTriangularBit)) == 0)
-        || ((_Lhs::Flags & UpperTriangularBit) && !LhsIsRowMajor)
-        || ((_Lhs::Flags & LowerTriangularBit) && LhsIsRowMajor)),
+        && (((Lhs_::Flags & (UpperTriangularBit | LowerTriangularBit)) == 0)
+        || ((Lhs_::Flags & UpperTriangularBit) && !LhsIsRowMajor)
+        || ((Lhs_::Flags & LowerTriangularBit) && LhsIsRowMajor)),
         ProcessSecondHalf = LhsIsSelfAdjoint && (!ProcessFirstHalf)
     };
 
@@ -206,7 +208,7 @@ EIGEN_DONT_INLINE void skyline_col_major_time_dense_product(const Lhs& lhs, cons
 
     //Use matrix upper triangular part
     for (Index row = 0; row < lhs.rows(); row++) {
-        typename _Lhs::InnerUpperIterator uIt(lhs, row);
+        typename Lhs_::InnerUpperIterator uIt(lhs, row);
         const Index stop = uIt.col() + uIt.size();
         for (Index col = 0; col < rhs.cols(); col++) {
 
@@ -227,7 +229,7 @@ EIGEN_DONT_INLINE void skyline_col_major_time_dense_product(const Lhs& lhs, cons
 
     //Use matrix lower triangular part
     for (Index lhscol = 0; lhscol < lhs.cols(); lhscol++) {
-        typename _Lhs::InnerLowerIterator lIt(lhs, lhscol);
+        typename Lhs_::InnerLowerIterator lIt(lhs, lhscol);
         const Index stop = lIt.size() + lIt.row();
         for (Index rhscol = 0; rhscol < rhs.cols(); rhscol++) {
 
@@ -251,7 +253,7 @@ template<typename Lhs, typename Rhs, typename ResultType,
 
 template<typename Lhs, typename Rhs, typename ResultType>
 struct skyline_product_selector<Lhs, Rhs, ResultType, RowMajor> {
-    typedef typename traits<typename remove_all<Lhs>::type>::Scalar Scalar;
+    typedef typename traits<remove_all_t<Lhs>>::Scalar Scalar;
 
     static void run(const Lhs& lhs, const Rhs& rhs, ResultType & res) {
         skyline_row_major_time_dense_product<Lhs, Rhs, ResultType > (lhs, rhs, res);
@@ -260,7 +262,7 @@ struct skyline_product_selector<Lhs, Rhs, ResultType, RowMajor> {
 
 template<typename Lhs, typename Rhs, typename ResultType>
 struct skyline_product_selector<Lhs, Rhs, ResultType, ColMajor> {
-    typedef typename traits<typename remove_all<Lhs>::type>::Scalar Scalar;
+    typedef typename traits<remove_all_t<Lhs>>::Scalar Scalar;
 
     static void run(const Lhs& lhs, const Rhs& rhs, ResultType & res) {
         skyline_col_major_time_dense_product<Lhs, Rhs, ResultType > (lhs, rhs, res);
@@ -272,9 +274,9 @@ struct skyline_product_selector<Lhs, Rhs, ResultType, ColMajor> {
 // template<typename Derived>
 // template<typename Lhs, typename Rhs >
 // Derived & MatrixBase<Derived>::lazyAssign(const SkylineProduct<Lhs, Rhs, SkylineTimeDenseProduct>& product) {
-//     typedef typename internal::remove_all<Lhs>::type _Lhs;
-//     internal::skyline_product_selector<typename internal::remove_all<Lhs>::type,
-//             typename internal::remove_all<Rhs>::type,
+//     typedef internal::remove_all_t<Lhs> Lhs_;
+//     internal::skyline_product_selector<internal::remove_all_t<Lhs>,
+//             internal::remove_all_t<Rhs>,
 //             Derived>::run(product.lhs(), product.rhs(), derived());
 // 
 //     return derived();
diff --git a/libs/eigen/unsupported/Eigen/src/Skyline/SkylineStorage.h b/libs/eigen/unsupported/Eigen/src/Skyline/SkylineStorage.h
index cc7514f..73b15a6 100644
--- a/libs/eigen/unsupported/Eigen/src/Skyline/SkylineStorage.h
+++ b/libs/eigen/unsupported/Eigen/src/Skyline/SkylineStorage.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_SKYLINE_STORAGE_H
 #define EIGEN_SKYLINE_STORAGE_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 /** Stores a skyline set of values in three structures :
@@ -187,11 +189,11 @@ public:
     }
 
     inline void reset() {
-        memset(m_diag, 0, m_diagSize * sizeof (Scalar));
-        memset(m_upper, 0, m_upperSize * sizeof (Scalar));
-        memset(m_lower, 0, m_lowerSize * sizeof (Scalar));
-        memset(m_upperProfile, 0, m_diagSize * sizeof (Index));
-        memset(m_lowerProfile, 0, m_diagSize * sizeof (Index));
+        std::fill_n(m_diag, m_diagSize, Scalar(0));
+        std::fill_n(m_upper, m_upperSize, Scalar(0));
+        std::fill_n(m_lower, m_lowerSize, Scalar(0));
+        std::fill_n(m_upperProfile, m_diagSize, Index(0));
+        std::fill_n(m_lowerProfile, m_diagSize, Index(0));
     }
 
     void prune(Scalar reference, RealScalar epsilon = dummy_precision<RealScalar>()) {
diff --git a/libs/eigen/unsupported/Eigen/src/Skyline/SkylineUtil.h b/libs/eigen/unsupported/Eigen/src/Skyline/SkylineUtil.h
index 75eb612..47fb4e3 100644
--- a/libs/eigen/unsupported/Eigen/src/Skyline/SkylineUtil.h
+++ b/libs/eigen/unsupported/Eigen/src/Skyline/SkylineUtil.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_SKYLINEUTIL_H
 #define EIGEN_SKYLINEUTIL_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 #ifdef NDEBUG
@@ -49,7 +51,7 @@ EIGEN_STRONG_INLINE Derived& operator Op(const Other& scalar) \
   EIGEN_SKYLINE_INHERIT_SCALAR_ASSIGNMENT_OPERATOR(Derived, *=) \
   EIGEN_SKYLINE_INHERIT_SCALAR_ASSIGNMENT_OPERATOR(Derived, /=)
 
-#define _EIGEN_SKYLINE_GENERIC_PUBLIC_INTERFACE(Derived, BaseClass) \
+#define EIGEN_SKYLINE_GENERIC_PUBLIC_INTERFACE_(Derived, BaseClass) \
   typedef BaseClass Base; \
   typedef typename Eigen::internal::traits<Derived>::Scalar Scalar; \
   typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; \
@@ -58,13 +60,13 @@ EIGEN_STRONG_INLINE Derived& operator Op(const Other& scalar) \
   enum {  Flags = Eigen::internal::traits<Derived>::Flags, };
 
 #define EIGEN_SKYLINE_GENERIC_PUBLIC_INTERFACE(Derived) \
-  _EIGEN_SKYLINE_GENERIC_PUBLIC_INTERFACE(Derived, Eigen::SkylineMatrixBase<Derived>)
+  EIGEN_SKYLINE_GENERIC_PUBLIC_INTERFACE_(Derived, Eigen::SkylineMatrixBase<Derived>)
 
 template<typename Derived> class SkylineMatrixBase;
-template<typename _Scalar, int _Flags = 0> class SkylineMatrix;
-template<typename _Scalar, int _Flags = 0> class DynamicSkylineMatrix;
-template<typename _Scalar, int _Flags = 0> class SkylineVector;
-template<typename _Scalar, int _Flags = 0> class MappedSkylineMatrix;
+template<typename Scalar_, int Flags_ = 0> class SkylineMatrix;
+template<typename Scalar_, int Flags_ = 0> class DynamicSkylineMatrix;
+template<typename Scalar_, int Flags_ = 0> class SkylineVector;
+template<typename Scalar_, int Flags_ = 0> class MappedSkylineMatrix;
 
 namespace internal {
 
@@ -73,13 +75,13 @@ template<typename Lhs, typename Rhs, int ProductMode = skyline_product_mode<Lhs,
 
 template<typename T> class eval<T,IsSkyline>
 {
-    typedef typename traits<T>::Scalar _Scalar;
+    typedef typename traits<T>::Scalar Scalar_;
     enum {
-          _Flags = traits<T>::Flags
+          Flags_ = traits<T>::Flags
     };
 
   public:
-    typedef SkylineMatrix<_Scalar, _Flags> type;
+    typedef SkylineMatrix<Scalar_, Flags_> type;
 };
 
 } // end namespace internal
diff --git a/libs/eigen/unsupported/Eigen/src/SparseExtra/BlockOfDynamicSparseMatrix.h b/libs/eigen/unsupported/Eigen/src/SparseExtra/BlockOfDynamicSparseMatrix.h
deleted file mode 100644
index e9ec746..0000000
--- a/libs/eigen/unsupported/Eigen/src/SparseExtra/BlockOfDynamicSparseMatrix.h
+++ /dev/null
@@ -1,122 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_SPARSE_BLOCKFORDYNAMICMATRIX_H
-#define EIGEN_SPARSE_BLOCKFORDYNAMICMATRIX_H
-
-namespace Eigen { 
-
-#if 0
-
-// NOTE Have to be reimplemented as a specialization of BlockImpl< DynamicSparseMatrix<_Scalar, _Options, _Index>, ... >
-// See SparseBlock.h for an example
-
-
-/***************************************************************************
-* specialisation for DynamicSparseMatrix
-***************************************************************************/
-
-template<typename _Scalar, int _Options, typename _Index, int Size>
-class SparseInnerVectorSet<DynamicSparseMatrix<_Scalar, _Options, _Index>, Size>
-  : public SparseMatrixBase<SparseInnerVectorSet<DynamicSparseMatrix<_Scalar, _Options, _Index>, Size> >
-{
-    typedef DynamicSparseMatrix<_Scalar, _Options, _Index> MatrixType;
-  public:
-
-    enum { IsRowMajor = internal::traits<SparseInnerVectorSet>::IsRowMajor };
-
-    EIGEN_SPARSE_PUBLIC_INTERFACE(SparseInnerVectorSet)
-    class InnerIterator: public MatrixType::InnerIterator
-    {
-      public:
-        inline InnerIterator(const SparseInnerVectorSet& xpr, Index outer)
-          : MatrixType::InnerIterator(xpr.m_matrix, xpr.m_outerStart + outer), m_outer(outer)
-        {}
-        inline Index row() const { return IsRowMajor ? m_outer : this->index(); }
-        inline Index col() const { return IsRowMajor ? this->index() : m_outer; }
-      protected:
-        Index m_outer;
-    };
-
-    inline SparseInnerVectorSet(const MatrixType& matrix, Index outerStart, Index outerSize)
-      : m_matrix(matrix), m_outerStart(outerStart), m_outerSize(outerSize)
-    {
-      eigen_assert( (outerStart>=0) && ((outerStart+outerSize)<=matrix.outerSize()) );
-    }
-
-    inline SparseInnerVectorSet(const MatrixType& matrix, Index outer)
-      : m_matrix(matrix), m_outerStart(outer), m_outerSize(Size)
-    {
-      eigen_assert(Size!=Dynamic);
-      eigen_assert( (outer>=0) && (outer<matrix.outerSize()) );
-    }
-
-    template<typename OtherDerived>
-    inline SparseInnerVectorSet& operator=(const SparseMatrixBase<OtherDerived>& other)
-    {
-      if (IsRowMajor != ((OtherDerived::Flags&RowMajorBit)==RowMajorBit))
-      {
-        // need to transpose => perform a block evaluation followed by a big swap
-        DynamicSparseMatrix<Scalar,IsRowMajor?RowMajorBit:0> aux(other);
-        *this = aux.markAsRValue();
-      }
-      else
-      {
-        // evaluate/copy vector per vector
-        for (Index j=0; j<m_outerSize.value(); ++j)
-        {
-          SparseVector<Scalar,IsRowMajor ? RowMajorBit : 0> aux(other.innerVector(j));
-          m_matrix.const_cast_derived()._data()[m_outerStart+j].swap(aux._data());
-        }
-      }
-      return *this;
-    }
-
-    inline SparseInnerVectorSet& operator=(const SparseInnerVectorSet& other)
-    {
-      return operator=<SparseInnerVectorSet>(other);
-    }
-
-    Index nonZeros() const
-    {
-      Index count = 0;
-      for (Index j=0; j<m_outerSize.value(); ++j)
-        count += m_matrix._data()[m_outerStart+j].size();
-      return count;
-    }
-
-    const Scalar& lastCoeff() const
-    {
-      EIGEN_STATIC_ASSERT_VECTOR_ONLY(SparseInnerVectorSet);
-      eigen_assert(m_matrix.data()[m_outerStart].size()>0);
-      return m_matrix.data()[m_outerStart].vale(m_matrix.data()[m_outerStart].size()-1);
-    }
-
-//     template<typename Sparse>
-//     inline SparseInnerVectorSet& operator=(const SparseMatrixBase<OtherDerived>& other)
-//     {
-//       return *this;
-//     }
-
-    EIGEN_STRONG_INLINE Index rows() const { return IsRowMajor ? m_outerSize.value() : m_matrix.rows(); }
-    EIGEN_STRONG_INLINE Index cols() const { return IsRowMajor ? m_matrix.cols() : m_outerSize.value(); }
-
-  protected:
-
-    const typename MatrixType::Nested m_matrix;
-    Index m_outerStart;
-    const internal::variable_if_dynamic<Index, Size> m_outerSize;
-
-};
-
-#endif
-
-} // end namespace Eigen
-
-#endif // EIGEN_SPARSE_BLOCKFORDYNAMICMATRIX_H
diff --git a/libs/eigen/unsupported/Eigen/src/SparseExtra/BlockSparseMatrix.h b/libs/eigen/unsupported/Eigen/src/SparseExtra/BlockSparseMatrix.h
index 536a0c3..853fbb0 100644
--- a/libs/eigen/unsupported/Eigen/src/SparseExtra/BlockSparseMatrix.h
+++ b/libs/eigen/unsupported/Eigen/src/SparseExtra/BlockSparseMatrix.h
@@ -11,6 +11,8 @@
 #ifndef EIGEN_SPARSEBLOCKMATRIX_H
 #define EIGEN_SPARSEBLOCKMATRIX_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 /** \ingroup SparseCore_Module
   *
@@ -46,21 +48,21 @@ namespace Eigen {
   * It is obviously required to describe the block layout beforehand by calling either
   * setBlockSize() for fixed-size blocks or setBlockLayout for variable-size blocks.
   *
-  * \tparam _Scalar The Scalar type
+  * \tparam Scalar_ The Scalar type
   * \tparam _BlockAtCompileTime The block layout option. It takes the following values
   * Dynamic : block size known at runtime
   * a numeric number : fixed-size block known at compile time
   */
-template<typename _Scalar, int _BlockAtCompileTime=Dynamic, int _Options=ColMajor, typename _StorageIndex=int> class BlockSparseMatrix;
+template<typename Scalar_, int _BlockAtCompileTime=Dynamic, int Options_=ColMajor, typename StorageIndex_=int> class BlockSparseMatrix;
 
 template<typename BlockSparseMatrixT> class BlockSparseMatrixView;
 
 namespace internal {
-template<typename _Scalar, int _BlockAtCompileTime, int _Options, typename _Index>
-struct traits<BlockSparseMatrix<_Scalar,_BlockAtCompileTime,_Options, _Index> >
+template<typename Scalar_, int _BlockAtCompileTime, int Options_, typename Index_>
+struct traits<BlockSparseMatrix<Scalar_,_BlockAtCompileTime,Options_, Index_> >
 {
-  typedef _Scalar Scalar;
-  typedef _Index Index;
+  typedef Scalar_ Scalar;
+  typedef Index_ Index;
   typedef Sparse StorageKind; // FIXME Where is it used ??
   typedef MatrixXpr XprKind;
   enum {
@@ -69,7 +71,7 @@ struct traits<BlockSparseMatrix<_Scalar,_BlockAtCompileTime,_Options, _Index> >
     MaxRowsAtCompileTime = Dynamic,
     MaxColsAtCompileTime = Dynamic,
     BlockSize = _BlockAtCompileTime,
-    Flags = _Options | NestByRefBit | LvalueBit,
+    Flags = Options_ | NestByRefBit | LvalueBit,
     CoeffReadCost = NumTraits<Scalar>::ReadCost,
     SupportedAccessPatterns = InnerRandomAccessPattern
   };
@@ -280,17 +282,17 @@ class BlockSparseTimeDenseProduct
     BlockSparseTimeDenseProduct& operator=(const BlockSparseTimeDenseProduct&);
 };
 
-template<typename _Scalar, int _BlockAtCompileTime, int _Options, typename _StorageIndex>
-class BlockSparseMatrix : public SparseMatrixBase<BlockSparseMatrix<_Scalar,_BlockAtCompileTime, _Options,_StorageIndex> >
+template<typename Scalar_, int _BlockAtCompileTime, int Options_, typename StorageIndex_>
+class BlockSparseMatrix : public SparseMatrixBase<BlockSparseMatrix<Scalar_,_BlockAtCompileTime, Options_,StorageIndex_> >
 {
   public:
-    typedef _Scalar Scalar;
+    typedef Scalar_ Scalar;
     typedef typename NumTraits<Scalar>::Real RealScalar;
-    typedef _StorageIndex StorageIndex;
-    typedef typename internal::ref_selector<BlockSparseMatrix<_Scalar, _BlockAtCompileTime, _Options, _StorageIndex> >::type Nested;
+    typedef StorageIndex_ StorageIndex;
+    typedef typename internal::ref_selector<BlockSparseMatrix<Scalar_, _BlockAtCompileTime, Options_, StorageIndex_> >::type Nested;
 
     enum {
-      Options = _Options,
+      Options = Options_,
       Flags = Options,
       BlockSize=_BlockAtCompileTime,
       RowsAtCompileTime = Dynamic,
@@ -302,7 +304,7 @@ class BlockSparseMatrix : public SparseMatrixBase<BlockSparseMatrix<_Scalar,_Blo
     };
     typedef Matrix<Scalar, _BlockAtCompileTime, _BlockAtCompileTime,IsColMajor ? ColMajor : RowMajor> BlockScalar;
     typedef Matrix<RealScalar, _BlockAtCompileTime, _BlockAtCompileTime,IsColMajor ? ColMajor : RowMajor> BlockRealScalar;
-    typedef typename internal::conditional<_BlockAtCompileTime==Dynamic, Scalar, BlockScalar>::type BlockScalarReturnType;
+    typedef std::conditional_t<_BlockAtCompileTime==Dynamic, Scalar, BlockScalar> BlockScalarReturnType;
     typedef BlockSparseMatrix<Scalar, BlockSize, IsColMajor ? ColMajor : RowMajor, StorageIndex> PlainObject;
   public:
     // Default constructor
@@ -937,7 +939,7 @@ class BlockSparseMatrix : public SparseMatrixBase<BlockSparseMatrix<_Scalar,_Blo
     {
       if(m_blockSize == Dynamic) return m_blockPtr[id];
       else return id * m_blockSize * m_blockSize;
-      //return blockDynIdx(id, typename internal::conditional<(BlockSize==Dynamic), internal::true_type, internal::false_type>::type());
+      //return blockDynIdx(id, std::conditional_t<(BlockSize==Dynamic), internal::true_type, internal::false_type>());
     }
 
 
@@ -968,13 +970,13 @@ class BlockSparseMatrix : public SparseMatrixBase<BlockSparseMatrix<_Scalar,_Blo
     Index m_blockSize; // Size of a block for fixed-size blocks, otherwise -1
 };
 
-template<typename _Scalar, int _BlockAtCompileTime, int _Options, typename _StorageIndex>
-class BlockSparseMatrix<_Scalar, _BlockAtCompileTime, _Options, _StorageIndex>::BlockInnerIterator
+template<typename Scalar_, int _BlockAtCompileTime, int Options_, typename StorageIndex_>
+class BlockSparseMatrix<Scalar_, _BlockAtCompileTime, Options_, StorageIndex_>::BlockInnerIterator
 {
   public:
 
     enum{
-      Flags = _Options
+      Flags = Options_
     };
 
     BlockInnerIterator(const BlockSparseMatrix& mat, const Index outer)
@@ -1010,14 +1012,14 @@ class BlockSparseMatrix<_Scalar, _BlockAtCompileTime, _Options, _StorageIndex>::
     inline operator bool() const { return (m_id < m_end); }
 
   protected:
-    const BlockSparseMatrix<_Scalar, _BlockAtCompileTime, _Options, StorageIndex>& m_mat;
+    const BlockSparseMatrix<Scalar_, _BlockAtCompileTime, Options_, StorageIndex>& m_mat;
     const Index m_outer;
     Index m_id;
     Index m_end;
 };
 
-template<typename _Scalar, int _BlockAtCompileTime, int _Options, typename _StorageIndex>
-class BlockSparseMatrix<_Scalar, _BlockAtCompileTime, _Options, _StorageIndex>::InnerIterator
+template<typename Scalar_, int _BlockAtCompileTime, int Options_, typename StorageIndex_>
+class BlockSparseMatrix<Scalar_, _BlockAtCompileTime, Options_, StorageIndex_>::InnerIterator
 {
   public:
     InnerIterator(const BlockSparseMatrix& mat, Index outer)
diff --git a/libs/eigen/unsupported/Eigen/src/SparseExtra/DynamicSparseMatrix.h b/libs/eigen/unsupported/Eigen/src/SparseExtra/DynamicSparseMatrix.h
deleted file mode 100644
index 42c99e4..0000000
--- a/libs/eigen/unsupported/Eigen/src/SparseExtra/DynamicSparseMatrix.h
+++ /dev/null
@@ -1,404 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_DYNAMIC_SPARSEMATRIX_H
-#define EIGEN_DYNAMIC_SPARSEMATRIX_H
-
-namespace Eigen { 
-
-/** \deprecated use a SparseMatrix in an uncompressed mode
-  *
-  * \class DynamicSparseMatrix
-  *
-  * \brief A sparse matrix class designed for matrix assembly purpose
-  *
-  * \param _Scalar the scalar type, i.e. the type of the coefficients
-  *
-  * Unlike SparseMatrix, this class provides a much higher degree of flexibility. In particular, it allows
-  * random read/write accesses in log(rho*outer_size) where \c rho is the probability that a coefficient is
-  * nonzero and outer_size is the number of columns if the matrix is column-major and the number of rows
-  * otherwise.
-  *
-  * Internally, the data are stored as a std::vector of compressed vector. The performances of random writes might
-  * decrease as the number of nonzeros per inner-vector increase. In practice, we observed very good performance
-  * till about 100 nonzeros/vector, and the performance remains relatively good till 500 nonzeros/vectors.
-  *
-  * \see SparseMatrix
-  */
-
-namespace internal {
-template<typename _Scalar, int _Options, typename _StorageIndex>
-struct traits<DynamicSparseMatrix<_Scalar, _Options, _StorageIndex> >
-{
-  typedef _Scalar Scalar;
-  typedef _StorageIndex StorageIndex;
-  typedef Sparse StorageKind;
-  typedef MatrixXpr XprKind;
-  enum {
-    RowsAtCompileTime = Dynamic,
-    ColsAtCompileTime = Dynamic,
-    MaxRowsAtCompileTime = Dynamic,
-    MaxColsAtCompileTime = Dynamic,
-    Flags = _Options | NestByRefBit | LvalueBit,
-    CoeffReadCost = NumTraits<Scalar>::ReadCost,
-    SupportedAccessPatterns = OuterRandomAccessPattern
-  };
-};
-}
-
-template<typename _Scalar, int _Options, typename _StorageIndex>
- class  DynamicSparseMatrix
-  : public SparseMatrixBase<DynamicSparseMatrix<_Scalar, _Options, _StorageIndex> >
-{
-    typedef SparseMatrixBase<DynamicSparseMatrix> Base;
-    using Base::convert_index;
-  public:
-    EIGEN_SPARSE_PUBLIC_INTERFACE(DynamicSparseMatrix)
-    // FIXME: why are these operator already alvailable ???
-    // EIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATOR(DynamicSparseMatrix, +=)
-    // EIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATOR(DynamicSparseMatrix, -=)
-    typedef MappedSparseMatrix<Scalar,Flags> Map;
-    using Base::IsRowMajor;
-    using Base::operator=;
-    enum {
-      Options = _Options
-    };
-
-  protected:
-
-    typedef DynamicSparseMatrix<Scalar,(Flags&~RowMajorBit)|(IsRowMajor?RowMajorBit:0), StorageIndex> TransposedSparseMatrix;
-
-    Index m_innerSize;
-    std::vector<internal::CompressedStorage<Scalar,StorageIndex> > m_data;
-
-  public:
-
-    inline Index rows() const { return IsRowMajor ? outerSize() : m_innerSize; }
-    inline Index cols() const { return IsRowMajor ? m_innerSize : outerSize(); }
-    inline Index innerSize() const { return m_innerSize; }
-    inline Index outerSize() const { return convert_index(m_data.size()); }
-    inline Index innerNonZeros(Index j) const { return m_data[j].size(); }
-
-    std::vector<internal::CompressedStorage<Scalar,StorageIndex> >& _data() { return m_data; }
-    const std::vector<internal::CompressedStorage<Scalar,StorageIndex> >& _data() const { return m_data; }
-
-    /** \returns the coefficient value at given position \a row, \a col
-      * This operation involes a log(rho*outer_size) binary search.
-      */
-    inline Scalar coeff(Index row, Index col) const
-    {
-      const Index outer = IsRowMajor ? row : col;
-      const Index inner = IsRowMajor ? col : row;
-      return m_data[outer].at(inner);
-    }
-
-    /** \returns a reference to the coefficient value at given position \a row, \a col
-      * This operation involes a log(rho*outer_size) binary search. If the coefficient does not
-      * exist yet, then a sorted insertion into a sequential buffer is performed.
-      */
-    inline Scalar& coeffRef(Index row, Index col)
-    {
-      const Index outer = IsRowMajor ? row : col;
-      const Index inner = IsRowMajor ? col : row;
-      return m_data[outer].atWithInsertion(inner);
-    }
-
-    class InnerIterator;
-    class ReverseInnerIterator;
-
-    void setZero()
-    {
-      for (Index j=0; j<outerSize(); ++j)
-        m_data[j].clear();
-    }
-
-    /** \returns the number of non zero coefficients */
-    Index nonZeros() const
-    {
-      Index res = 0;
-      for (Index j=0; j<outerSize(); ++j)
-        res += m_data[j].size();
-      return res;
-    }
-
-
-
-    void reserve(Index reserveSize = 1000)
-    {
-      if (outerSize()>0)
-      {
-        Index reserveSizePerVector = (std::max)(reserveSize/outerSize(),Index(4));
-        for (Index j=0; j<outerSize(); ++j)
-        {
-          m_data[j].reserve(reserveSizePerVector);
-        }
-      }
-    }
-
-    /** Does nothing: provided for compatibility with SparseMatrix */
-    inline void startVec(Index /*outer*/) {}
-
-    /** \returns a reference to the non zero coefficient at position \a row, \a col assuming that:
-      * - the nonzero does not already exist
-      * - the new coefficient is the last one of the given inner vector.
-      *
-      * \sa insert, insertBackByOuterInner */
-    inline Scalar& insertBack(Index row, Index col)
-    {
-      return insertBackByOuterInner(IsRowMajor?row:col, IsRowMajor?col:row);
-    }
-
-    /** \sa insertBack */
-    inline Scalar& insertBackByOuterInner(Index outer, Index inner)
-    {
-      eigen_assert(outer<Index(m_data.size()) && inner<m_innerSize && "out of range");
-      eigen_assert(((m_data[outer].size()==0) || (m_data[outer].index(m_data[outer].size()-1)<inner))
-                && "wrong sorted insertion");
-      m_data[outer].append(0, inner);
-      return m_data[outer].value(m_data[outer].size()-1);
-    }
-
-    inline Scalar& insert(Index row, Index col)
-    {
-      const Index outer = IsRowMajor ? row : col;
-      const Index inner = IsRowMajor ? col : row;
-
-      Index startId = 0;
-      Index id = static_cast<Index>(m_data[outer].size()) - 1;
-      m_data[outer].resize(id+2,1);
-
-      while ( (id >= startId) && (m_data[outer].index(id) > inner) )
-      {
-        m_data[outer].index(id+1) = m_data[outer].index(id);
-        m_data[outer].value(id+1) = m_data[outer].value(id);
-        --id;
-      }
-      m_data[outer].index(id+1) = inner;
-      m_data[outer].value(id+1) = 0;
-      return m_data[outer].value(id+1);
-    }
-
-    /** Does nothing: provided for compatibility with SparseMatrix */
-    inline void finalize() {}
-
-    /** Suppress all nonzeros which are smaller than \a reference under the tolerance \a epsilon */
-    void prune(Scalar reference, RealScalar epsilon = NumTraits<RealScalar>::dummy_precision())
-    {
-      for (Index j=0; j<outerSize(); ++j)
-        m_data[j].prune(reference,epsilon);
-    }
-
-    /** Resize the matrix without preserving the data (the matrix is set to zero)
-      */
-    void resize(Index rows, Index cols)
-    {
-      const Index outerSize = IsRowMajor ? rows : cols;
-      m_innerSize = convert_index(IsRowMajor ? cols : rows);
-      setZero();
-      if (Index(m_data.size()) != outerSize)
-      {
-        m_data.resize(outerSize);
-      }
-    }
-
-    void resizeAndKeepData(Index rows, Index cols)
-    {
-      const Index outerSize = IsRowMajor ? rows : cols;
-      const Index innerSize = IsRowMajor ? cols : rows;
-      if (m_innerSize>innerSize)
-      {
-        // remove all coefficients with innerCoord>=innerSize
-        // TODO
-        //std::cerr << "not implemented yet\n";
-        exit(2);
-      }
-      if (m_data.size() != outerSize)
-      {
-        m_data.resize(outerSize);
-      }
-    }
-
-    /** The class DynamicSparseMatrix is deprecated */
-    EIGEN_DEPRECATED inline DynamicSparseMatrix()
-      : m_innerSize(0), m_data(0)
-    {
-      #ifdef EIGEN_SPARSE_CREATE_TEMPORARY_PLUGIN
-        EIGEN_SPARSE_CREATE_TEMPORARY_PLUGIN
-      #endif
-      eigen_assert(innerSize()==0 && outerSize()==0);
-    }
-
-    /** The class DynamicSparseMatrix is deprecated */
-    EIGEN_DEPRECATED inline DynamicSparseMatrix(Index rows, Index cols)
-      : m_innerSize(0)
-    {
-      #ifdef EIGEN_SPARSE_CREATE_TEMPORARY_PLUGIN
-        EIGEN_SPARSE_CREATE_TEMPORARY_PLUGIN
-      #endif
-      resize(rows, cols);
-    }
-
-    /** The class DynamicSparseMatrix is deprecated */
-    template<typename OtherDerived>
-    EIGEN_DEPRECATED explicit inline DynamicSparseMatrix(const SparseMatrixBase<OtherDerived>& other)
-      : m_innerSize(0)
-    {
-      #ifdef EIGEN_SPARSE_CREATE_TEMPORARY_PLUGIN
-        EIGEN_SPARSE_CREATE_TEMPORARY_PLUGIN
-      #endif
-      Base::operator=(other.derived());
-    }
-
-    inline DynamicSparseMatrix(const DynamicSparseMatrix& other)
-      : Base(), m_innerSize(0)
-    {
-      #ifdef EIGEN_SPARSE_CREATE_TEMPORARY_PLUGIN
-        EIGEN_SPARSE_CREATE_TEMPORARY_PLUGIN
-      #endif
-      *this = other.derived();
-    }
-
-    inline void swap(DynamicSparseMatrix& other)
-    {
-      //EIGEN_DBG_SPARSE(std::cout << "SparseMatrix:: swap\n");
-      std::swap(m_innerSize, other.m_innerSize);
-      //std::swap(m_outerSize, other.m_outerSize);
-      m_data.swap(other.m_data);
-    }
-
-    inline DynamicSparseMatrix& operator=(const DynamicSparseMatrix& other)
-    {
-      if (other.isRValue())
-      {
-        swap(other.const_cast_derived());
-      }
-      else
-      {
-        resize(other.rows(), other.cols());
-        m_data = other.m_data;
-      }
-      return *this;
-    }
-
-    /** Destructor */
-    inline ~DynamicSparseMatrix() {}
-
-  public:
-
-    /** \deprecated
-      * Set the matrix to zero and reserve the memory for \a reserveSize nonzero coefficients. */
-    EIGEN_DEPRECATED void startFill(Index reserveSize = 1000)
-    {
-      setZero();
-      reserve(reserveSize);
-    }
-
-    /** \deprecated use insert()
-      * inserts a nonzero coefficient at given coordinates \a row, \a col and returns its reference assuming that:
-      *  1 - the coefficient does not exist yet
-      *  2 - this the coefficient with greater inner coordinate for the given outer coordinate.
-      * In other words, assuming \c *this is column-major, then there must not exists any nonzero coefficient of coordinates
-      * \c i \c x \a col such that \c i >= \a row. Otherwise the matrix is invalid.
-      *
-      * \see fillrand(), coeffRef()
-      */
-    EIGEN_DEPRECATED Scalar& fill(Index row, Index col)
-    {
-      const Index outer = IsRowMajor ? row : col;
-      const Index inner = IsRowMajor ? col : row;
-      return insertBack(outer,inner);
-    }
-
-    /** \deprecated use insert()
-      * Like fill() but with random inner coordinates.
-      * Compared to the generic coeffRef(), the unique limitation is that we assume
-      * the coefficient does not exist yet.
-      */
-    EIGEN_DEPRECATED Scalar& fillrand(Index row, Index col)
-    {
-      return insert(row,col);
-    }
-
-    /** \deprecated use finalize()
-      * Does nothing. Provided for compatibility with SparseMatrix. */
-    EIGEN_DEPRECATED void endFill() {}
-    
-#   ifdef EIGEN_DYNAMICSPARSEMATRIX_PLUGIN
-#     include EIGEN_DYNAMICSPARSEMATRIX_PLUGIN
-#   endif
- };
-
-template<typename Scalar, int _Options, typename _StorageIndex>
-class DynamicSparseMatrix<Scalar,_Options,_StorageIndex>::InnerIterator : public SparseVector<Scalar,_Options,_StorageIndex>::InnerIterator
-{
-    typedef typename SparseVector<Scalar,_Options,_StorageIndex>::InnerIterator Base;
-  public:
-    InnerIterator(const DynamicSparseMatrix& mat, Index outer)
-      : Base(mat.m_data[outer]), m_outer(outer)
-    {}
-
-    inline Index row() const { return IsRowMajor ? m_outer : Base::index(); }
-    inline Index col() const { return IsRowMajor ? Base::index() : m_outer; }
-    inline Index outer() const { return m_outer; }
-
-  protected:
-    const Index m_outer;
-};
-
-template<typename Scalar, int _Options, typename _StorageIndex>
-class DynamicSparseMatrix<Scalar,_Options,_StorageIndex>::ReverseInnerIterator : public SparseVector<Scalar,_Options,_StorageIndex>::ReverseInnerIterator
-{
-    typedef typename SparseVector<Scalar,_Options,_StorageIndex>::ReverseInnerIterator Base;
-  public:
-    ReverseInnerIterator(const DynamicSparseMatrix& mat, Index outer)
-      : Base(mat.m_data[outer]), m_outer(outer)
-    {}
-
-    inline Index row() const { return IsRowMajor ? m_outer : Base::index(); }
-    inline Index col() const { return IsRowMajor ? Base::index() : m_outer; }
-    inline Index outer() const { return m_outer; }
-
-  protected:
-    const Index m_outer;
-};
-
-namespace internal {
-
-template<typename _Scalar, int _Options, typename _StorageIndex>
-struct evaluator<DynamicSparseMatrix<_Scalar,_Options,_StorageIndex> >
-  : evaluator_base<DynamicSparseMatrix<_Scalar,_Options,_StorageIndex> >
-{
-  typedef _Scalar Scalar;
-  typedef DynamicSparseMatrix<_Scalar,_Options,_StorageIndex> SparseMatrixType;
-  typedef typename SparseMatrixType::InnerIterator InnerIterator;
-  typedef typename SparseMatrixType::ReverseInnerIterator ReverseInnerIterator;
-  
-  enum {
-    CoeffReadCost = NumTraits<_Scalar>::ReadCost,
-    Flags = SparseMatrixType::Flags
-  };
-  
-  evaluator() : m_matrix(0) {}
-  evaluator(const SparseMatrixType &mat) : m_matrix(&mat) {}
-  
-  operator SparseMatrixType&() { return m_matrix->const_cast_derived(); }
-  operator const SparseMatrixType&() const { return *m_matrix; }
-  
-  Scalar coeff(Index row, Index col) const { return m_matrix->coeff(row,col); }
-  
-  Index nonZerosEstimate() const { return m_matrix->nonZeros(); }
-
-  const SparseMatrixType *m_matrix;
-};
-
-}
-
-} // end namespace Eigen
-
-#endif // EIGEN_DYNAMIC_SPARSEMATRIX_H
diff --git a/libs/eigen/unsupported/Eigen/src/SparseExtra/InternalHeaderCheck.h b/libs/eigen/unsupported/Eigen/src/SparseExtra/InternalHeaderCheck.h
new file mode 100644
index 0000000..0e55251
--- /dev/null
+++ b/libs/eigen/unsupported/Eigen/src/SparseExtra/InternalHeaderCheck.h
@@ -0,0 +1,3 @@
+#ifndef EIGEN_SPARSE_EXTRA_MODULE_H
+#error "Please include unsupported/Eigen/SparseExtra instead of including headers inside the src directory directly."
+#endif
diff --git a/libs/eigen/unsupported/Eigen/src/SparseExtra/MarketIO.h b/libs/eigen/unsupported/Eigen/src/SparseExtra/MarketIO.h
index dd786d5..cf5828e 100644
--- a/libs/eigen/unsupported/Eigen/src/SparseExtra/MarketIO.h
+++ b/libs/eigen/unsupported/Eigen/src/SparseExtra/MarketIO.h
@@ -14,6 +14,8 @@
 #include <iostream>
 #include <vector>
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen { 
 
 namespace internal 
@@ -47,14 +49,14 @@ namespace internal
   }
 
   template <typename RealScalar>
-  inline void  GetVectorElt (const std::string& line, RealScalar& val)
+  inline void  GetDenseElt (const std::string& line, RealScalar& val)
   {
     std::istringstream newline(line);
     newline >> val;  
   }
 
   template <typename RealScalar>
-  inline void GetVectorElt (const std::string& line, std::complex<RealScalar>& val)
+  inline void GetDenseElt (const std::string& line, std::complex<RealScalar>& val)
   {
     RealScalar valR, valI; 
     std::istringstream newline(line);
@@ -94,23 +96,34 @@ namespace internal
 
 
   template<typename Scalar>
-  inline void putVectorElt(Scalar value, std::ofstream& out)
+  inline void putDenseElt(Scalar value, std::ofstream& out)
   {
     out << value << "\n"; 
   }
   template<typename Scalar>
-  inline void putVectorElt(std::complex<Scalar> value, std::ofstream& out)
+  inline void putDenseElt(std::complex<Scalar> value, std::ofstream& out)
   {
     out << value.real() << " " << value.imag()<< "\n"; 
   }
 
 } // end namespace internal
 
-inline bool getMarketHeader(const std::string& filename, int& sym, bool& iscomplex, bool& isvector)
+
+/**
+ * \ingroup SparseExtra_Module
+ * @brief Reads the header of a matrixmarket file and determines the properties of a matrix
+ * 
+ * @param filename of the file
+ * @param sym if the matrix is hermitian,symmetric or none of the latter (sym=0) 
+ * @param iscomplex if the matrix has complex or real coefficients 
+ * @param isdense if the matrix is dense or sparse
+ * @return true if the file was found
+ */
+inline bool getMarketHeader(const std::string& filename, int& sym, bool& iscomplex, bool& isdense)
 {
   sym = 0; 
   iscomplex = false;
-  isvector = false;
+  isdense = false;
   std::ifstream in(filename.c_str(),std::ios::in);
   if(!in)
     return false;
@@ -122,14 +135,22 @@ inline bool getMarketHeader(const std::string& filename, int& sym, bool& iscompl
   std::stringstream fmtline(line); 
   std::string substr[5];
   fmtline>> substr[0] >> substr[1] >> substr[2] >> substr[3] >> substr[4];
-  if(substr[2].compare("array") == 0) isvector = true;
+  if(substr[2].compare("array") == 0) isdense = true;
   if(substr[3].compare("complex") == 0) iscomplex = true;
   if(substr[4].compare("symmetric") == 0) sym = Symmetric;
   else if (substr[4].compare("Hermitian") == 0) sym = SelfAdjoint;
   
   return true;
 }
-  
+/**
+ * \ingroup SparseExtra_Module
+ * @brief Loads a sparse matrix from a matrixmarket format file.
+ * 
+ * @tparam SparseMatrixType to read into, symmetries are not supported
+ * @param mat SparseMatrix to read into, current values are overwritten
+ * @param filename to parse matrix from
+ * @return returns true if file exists. Returns false if the parsing did not succeed.
+ */
 template<typename SparseMatrixType>
 bool loadMarket(SparseMatrixType& mat, const std::string& filename)
 {
@@ -184,50 +205,108 @@ bool loadMarket(SparseMatrixType& mat, const std::string& filename)
         elements.push_back(T(i,j,value));
       }
       else
-        std::cerr << "Invalid read: " << i << "," << j << "\n";        
+      {
+        std::cerr << "Invalid read: " << i << "," << j << "\n";   
+        return false;
+      }     
     }
   }
 
   mat.setFromTriplets(elements.begin(), elements.end());
-  if(count!=NNZ)
+  if(count!=NNZ){
     std::cerr << count << "!=" << NNZ << "\n";
-  
+    return false;
+  }
   input.close();
   return true;
 }
 
-template<typename VectorType>
-bool loadMarketVector(VectorType& vec, const std::string& filename)
+
+/**
+ * \ingroup SparseExtra_Module
+ * @brief Loads a dense Matrix or Vector from a matrixmarket file. If a statically sized matrix has to be parsed and the file contains the wrong dimensions it is undefined behaviour.
+ * 
+ * @tparam DenseMatrixType to read into
+ * @param mat DenseMatrix to read into, current values are overwritten, symmetries are not supported
+ * @param filename to parse matrix from
+ * @return true if parsing was successful. Returns false if the parsing did not succeed.
+ */
+template<typename DenseType>
+bool loadMarketDense(DenseType& mat, const std::string& filename)
 {
-   typedef typename VectorType::Scalar Scalar;
+   typedef typename DenseType::Scalar Scalar;
   std::ifstream in(filename.c_str(), std::ios::in);
   if(!in)
     return false;
   
   std::string line; 
-  int n(0), col(0); 
+  Index rows(0), cols(0); 
   do 
   { // Skip comments
     std::getline(in, line); eigen_assert(in.good());
   } while (line[0] == '%');
   std::istringstream newline(line);
-  newline  >> n >> col; 
-  eigen_assert(n>0 && col>0);
-  vec.resize(n);
-  int i = 0; 
+  newline  >> rows >> cols; 
+
+  bool sizes_not_positive=(rows<1 || cols<1);
+  bool wrong_input_rows = (DenseType::MaxRowsAtCompileTime != Dynamic && rows > DenseType::MaxRowsAtCompileTime) ||
+                          (DenseType::RowsAtCompileTime!=Dynamic && rows!=DenseType::RowsAtCompileTime);
+  bool wrong_input_cols = (DenseType::MaxColsAtCompileTime != Dynamic && cols > DenseType::MaxColsAtCompileTime) ||
+                          (DenseType::ColsAtCompileTime!=Dynamic && cols!=DenseType::ColsAtCompileTime);
+
+  if(sizes_not_positive || wrong_input_rows || wrong_input_cols){
+    if(sizes_not_positive){
+      std::cerr<< "non-positive row or column size in file" << filename << "\n";
+    }else{
+      std::cerr<< "Input matrix can not be resized to"<<rows<<" x "<<cols<< "as given in " << filename << "\n";
+    }
+    in.close();
+    return false;
+  }
+
+  mat.resize(rows,cols);
+  Index row = 0;
+  Index col = 0; 
+  Index n=0;
   Scalar value; 
-  while ( std::getline(in, line) && (i < n) ){
-    internal::GetVectorElt(line, value); 
-    vec(i++) = value; 
+  while ( std::getline(in, line) && (row < rows) && (col < cols)){
+    internal::GetDenseElt(line, value); 
+    //matrixmarket format is column major
+    mat(row,col) = value; 
+    row++;
+    if(row==rows){
+      row=0;
+      col++;
+    }
+    n++;
   }
   in.close();
-  if (i!=n){
+  if (n!=mat.size()){
     std::cerr<< "Unable to read all elements from file " << filename << "\n";
     return false;
   }
   return true;
 }
+/**
+ * \ingroup SparseExtra_Module
+ * @brief Same functionality as loadMarketDense, deprecated
+ */
+template<typename VectorType>
+bool loadMarketVector(VectorType& vec, const std::string& filename)
+{
+ return loadMarketDense(vec, filename);
+}
 
+/**
+ * \ingroup SparseExtra_Module
+ * @brief writes a sparse Matrix to a marketmarket format file
+ * 
+ * @tparam SparseMatrixType to write to file
+ * @param mat matrix to write to file
+ * @param filename filename to write to 
+ * @param sym at the moment no symmetry operations are supported
+ * @return true if writing succeeded
+ */
 template<typename SparseMatrixType>
 bool saveMarket(const SparseMatrixType& mat, const std::string& filename, int sym = 0)
 {
@@ -254,11 +333,22 @@ bool saveMarket(const SparseMatrixType& mat, const std::string& filename, int sy
   return true;
 }
 
-template<typename VectorType>
-bool saveMarketVector (const VectorType& vec, const std::string& filename)
+
+/**
+ * \ingroup SparseExtra_Module
+ * @brief writes a dense Matrix or vector to a marketmarket format file
+ * 
+ * @tparam DenseMatrixType to write to file
+ * @param mat matrix to write to file
+ * @param filename filename to write to 
+ * @return true if writing succeeded
+ */
+
+template<typename DenseType>
+bool saveMarketDense (const DenseType& mat, const std::string& filename)
 {
- typedef typename VectorType::Scalar Scalar;
- typedef typename VectorType::RealScalar RealScalar;
+ typedef typename DenseType::Scalar Scalar;
+ typedef typename DenseType::RealScalar RealScalar;
  std::ofstream out(filename.c_str(),std::ios::out);
   if(!out)
     return false;
@@ -269,14 +359,26 @@ bool saveMarketVector (const VectorType& vec, const std::string& filename)
       out << "%%MatrixMarket matrix array complex general\n"; 
   else
     out << "%%MatrixMarket matrix array real general\n"; 
-  out << vec.size() << " "<< 1 << "\n";
-  for (int i=0; i < vec.size(); i++){
-    internal::putVectorElt(vec(i), out); 
+  out << mat.rows() << " "<< mat.cols() << "\n";
+  for (Index i=0; i < mat.cols(); i++){
+    for (Index j=0; j < mat.rows(); j++){
+      internal::putDenseElt(mat(j,i), out); 
+    }
   }
   out.close();
   return true; 
 }
 
+/**
+ * \ingroup SparseExtra_Module
+ * @brief Same functionality as saveMarketDense, deprecated
+ */
+template<typename VectorType>
+bool saveMarketVector (const VectorType& vec, const std::string& filename)
+{
+  return saveMarketDense(vec, filename);
+}
+
 } // end namespace Eigen
 
 #endif // EIGEN_SPARSE_MARKET_IO_H
diff --git a/libs/eigen/unsupported/Eigen/src/SparseExtra/MatrixMarketIterator.h b/libs/eigen/unsupported/Eigen/src/SparseExtra/MatrixMarketIterator.h
index 02916ea..12e4cb4 100644
--- a/libs/eigen/unsupported/Eigen/src/SparseExtra/MatrixMarketIterator.h
+++ b/libs/eigen/unsupported/Eigen/src/SparseExtra/MatrixMarketIterator.h
@@ -11,6 +11,8 @@
 #ifndef EIGEN_BROWSE_MATRICES_H
 #define EIGEN_BROWSE_MATRICES_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 enum {
diff --git a/libs/eigen/unsupported/Eigen/src/SparseExtra/RandomSetter.h b/libs/eigen/unsupported/Eigen/src/SparseExtra/RandomSetter.h
index 985702b..9476ee3 100644
--- a/libs/eigen/unsupported/Eigen/src/SparseExtra/RandomSetter.h
+++ b/libs/eigen/unsupported/Eigen/src/SparseExtra/RandomSetter.h
@@ -16,6 +16,8 @@
 namespace google {}
 #endif
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /** Represents a std::map
@@ -33,21 +35,8 @@ template<typename Scalar> struct StdMapTraits
   static void setInvalidKey(Type&, const KeyType&) {}
 };
 
-#ifdef EIGEN_UNORDERED_MAP_SUPPORT
+
 /** Represents a std::unordered_map
-  *
-  * To use it you need to both define EIGEN_UNORDERED_MAP_SUPPORT and include the unordered_map header file
-  * yourself making sure that unordered_map is defined in the std namespace.
-  *
-  * For instance, with current version of gcc you can either enable C++0x standard (-std=c++0x) or do:
-  * \code
-  * #include <tr1/unordered_map>
-  * #define EIGEN_UNORDERED_MAP_SUPPORT
-  * namespace std {
-  *   using std::tr1::unordered_map;
-  * }
-  * \endcode
-  *
   * \see RandomSetter
   */
 template<typename Scalar> struct StdUnorderedMapTraits
@@ -60,7 +49,6 @@ template<typename Scalar> struct StdUnorderedMapTraits
 
   static void setInvalidKey(Type&, const KeyType&) {}
 };
-#endif // EIGEN_UNORDERED_MAP_SUPPORT
 
 #if defined(EIGEN_GOOGLEHASH_SUPPORT)
 
@@ -115,7 +103,7 @@ template<typename Scalar> struct GoogleSparseHashMapTraits
 #endif
 
 /** \class RandomSetter
-  *
+  * \ingroup SparseExtra_Module
   * \brief The RandomSetter is a wrapper object allowing to set/update a sparse matrix with random access
   *
   * \tparam SparseMatrixType the type of the sparse matrix we are updating
@@ -149,12 +137,12 @@ template<typename Scalar> struct GoogleSparseHashMapTraits
   *
   * The possible values for the template parameter MapTraits are:
   *  - \b StdMapTraits: corresponds to std::map. (does not perform very well)
-  *  - \b GnuHashMapTraits: corresponds to __gnu_cxx::hash_map (available only with GCC)
+  *  - \b StdUnorderedMapTraits: corresponds to std::unordered_map
   *  - \b GoogleDenseHashMapTraits: corresponds to google::dense_hash_map (best efficiency, reasonable memory consumption)
   *  - \b GoogleSparseHashMapTraits: corresponds to google::sparse_hash_map (best memory consumption, relatively good performance)
   *
   * The default map implementation depends on the availability, and the preferred order is:
-  * GoogleSparseHashMapTraits, GnuHashMapTraits, and finally StdMapTraits.
+  * GoogleSparseHashMapTraits, StdUnorderedMapTraits, and finally StdMapTraits.
   *
   * For performance and memory consumption reasons it is highly recommended to use one of
   * Google's hash_map implementations. To enable the support for them, you must define
@@ -167,10 +155,8 @@ template<typename SparseMatrixType,
          template <typename T> class MapTraits =
 #if defined(EIGEN_GOOGLEHASH_SUPPORT)
           GoogleDenseHashMapTraits
-#elif defined(_HASH_MAP)
-          GnuHashMapTraits
 #else
-          StdMapTraits
+          StdUnorderedMapTraits
 #endif
          ,int OuterPacketBits = 6>
 class RandomSetter
@@ -185,7 +171,7 @@ class RandomSetter
     };
     typedef typename MapTraits<ScalarWrapper>::KeyType KeyType;
     typedef typename MapTraits<ScalarWrapper>::Type HashMapType;
-    static const int OuterPacketMask = (1 << OuterPacketBits) - 1;
+    static constexpr int OuterPacketMask = (1 << OuterPacketBits) - 1;
     enum {
       SwapStorage = 1 - MapTraits<ScalarWrapper>::IsSorted,
       TargetRowMajor = (SparseMatrixType::Flags & RowMajorBit) ? 1 : 0,
diff --git a/libs/eigen/unsupported/Eigen/src/SparseExtra/SparseInverse.h b/libs/eigen/unsupported/Eigen/src/SparseExtra/SparseInverse.h
new file mode 100644
index 0000000..c8a4920
--- /dev/null
+++ b/libs/eigen/unsupported/Eigen/src/SparseExtra/SparseInverse.h
@@ -0,0 +1,231 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2022 Julian Kent <jkflying@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_SPARSEINVERSE_H
+#define EIGEN_SPARSEINVERSE_H
+
+#include "./InternalHeaderCheck.h"
+
+#include "../../../../Eigen/Sparse"
+#include "../../../../Eigen/SparseLU"
+
+namespace Eigen {
+
+/**
+ * @brief Kahan algorithm based accumulator
+ *
+ * The Kahan sum algorithm guarantees to bound the error from floating point
+ * accumulation to a fixed value, regardless of the number of accumulations
+ * performed. Naive accumulation accumulates errors O(N), and pairwise O(logN).
+ * However pairwise also requires O(logN) memory while Kahan summation requires
+ * O(1) memory, but 4x the operations / latency.
+ *
+ * NB! Do not enable associative math optimizations, they may cause the Kahan
+ * summation to be optimized out leaving you with naive summation again.
+ *
+ */
+template <typename Scalar>
+class KahanSum {
+  // Straighforward Kahan summation for accurate accumulation of a sum of numbers
+  Scalar _sum{};
+  Scalar _correction{};
+
+ public:
+  Scalar value() { return _sum; }
+
+  void operator+=(Scalar increment) {
+    const Scalar correctedIncrement = increment + _correction;
+    const Scalar previousSum = _sum;
+    _sum += correctedIncrement;
+    _correction = correctedIncrement - (_sum - previousSum);
+  }
+};
+template <typename Scalar, Index Width = 16>
+class FABSum {
+  // https://epubs.siam.org/doi/pdf/10.1137/19M1257780
+  // Fast and Accurate Blocked Summation
+  // Uses naive summation for the fast sum, and Kahan summation for the accurate sum
+  // Theoretically SIMD sum could be changed to a tree sum which would improve accuracy
+  // over naive summation
+  KahanSum<Scalar> _totalSum;
+  Matrix<Scalar, Width, 1> _block;
+  Index _blockUsed{};
+
+ public:
+  Scalar value() { return _block.topRows(_blockUsed).sum() + _totalSum.value(); }
+
+  void operator+=(Scalar increment) {
+    _block(_blockUsed++, 0) = increment;
+    if (_blockUsed == Width) {
+      _totalSum += _block.sum();
+      _blockUsed = 0;
+    }
+  }
+};
+
+/**
+ * @brief computes an accurate dot product on two sparse vectors
+ *
+ * Uses an accurate summation algorithm for the accumulator in order to
+ * compute an accurate dot product for two sparse vectors.
+ *
+ */
+template <typename Derived, typename OtherDerived>
+typename Derived::Scalar accurateDot(const SparseMatrixBase<Derived>& A, const SparseMatrixBase<OtherDerived>& other) {
+  typedef typename Derived::Scalar Scalar;
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
+  EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived, OtherDerived)
+  static_assert(internal::is_same<Scalar, typename OtherDerived::Scalar>::value, "mismatched types");
+
+  internal::evaluator<Derived> thisEval(A.derived());
+  typename Derived::ReverseInnerIterator i(thisEval, 0);
+
+  internal::evaluator<OtherDerived> otherEval(other.derived());
+  typename OtherDerived::ReverseInnerIterator j(otherEval, 0);
+
+  FABSum<Scalar> res;
+  while (i && j) {
+    if (i.index() == j.index()) {
+      res += numext::conj(i.value()) * j.value();
+      --i;
+      --j;
+    } else if (i.index() > j.index())
+      --i;
+    else
+      --j;
+  }
+  return res.value();
+}
+
+/**
+ * @brief calculate sparse subset of inverse of sparse matrix
+ *
+ * This class returns a sparse subset of the inverse of the input matrix.
+ * The nonzeros correspond to the nonzeros of the input, plus any additional
+ * elements required due to fill-in of the internal LU factorization. This is
+ * is minimized via a applying a fill-reducing permutation as part of the LU
+ * factorization.
+ *
+ * If there are specific entries of the input matrix which you need inverse
+ * values for, which are zero for the input, you need to insert entries into
+ * the input sparse matrix for them to be calculated.
+ *
+ * Due to the sensitive nature of matrix inversion, particularly on large
+ * matrices which are made possible via sparsity, high accuracy dot products
+ * based on Kahan summation are used to reduce numerical error. If you still
+ * encounter numerical errors you may with to equilibrate your matrix before
+ * calculating the inverse, as well as making sure it is actually full rank.
+ */
+template <typename Scalar>
+class SparseInverse {
+ public:
+  typedef SparseMatrix<Scalar, ColMajor> MatrixType;
+  typedef SparseMatrix<Scalar, RowMajor> RowMatrixType;
+
+  SparseInverse() {}
+
+  /**
+   * @brief This Constructor is for if you already have a factored SparseLU and would like to use it to calculate a
+   * sparse inverse.
+   *
+   * Just call this constructor with your already factored SparseLU class and you can directly call the .inverse()
+   * method to get the result.
+   */
+  SparseInverse(const SparseLU<MatrixType>& slu) { _result = computeInverse(slu); }
+
+  /**
+   * @brief Calculate the sparse inverse from a given sparse input
+   */
+  SparseInverse& compute(const SparseMatrix<Scalar>& A) {
+    SparseLU<MatrixType> slu;
+    slu.compute(A);
+    _result = computeInverse(slu);
+    return *this;
+  }
+
+  /**
+   * @brief return the already-calculated sparse inverse, or a 0x0 matrix if it could not be computed
+   */
+  const MatrixType& inverse() const { return _result; }
+
+  /**
+   * @brief Internal function to calculate the sparse inverse in a functional way
+   * @return A sparse inverse representation, or, if the decomposition didn't complete, a 0x0 matrix.
+   */
+  static MatrixType computeInverse(const SparseLU<MatrixType>& slu) {
+    if (slu.info() != Success) {
+      return MatrixType(0, 0);
+    }
+
+    // Extract from SparseLU and decompose into L, inverse D and U terms
+    Matrix<Scalar, Dynamic, 1> invD;
+    RowMatrixType Upper;
+    {
+      RowMatrixType DU = slu.matrixU().toSparse();
+      invD = DU.diagonal().cwiseInverse();
+      Upper = (invD.asDiagonal() * DU).template triangularView<StrictlyUpper>();
+    }
+    MatrixType Lower = slu.matrixL().toSparse().template triangularView<StrictlyLower>();
+
+    // Compute the inverse and reapply the permutation matrix from the LU decomposition
+    return slu.colsPermutation().transpose() * computeInverse(Upper, invD, Lower) * slu.rowsPermutation();
+  }
+
+  /**
+   * @brief Internal function to calculate the inverse from strictly upper, diagonal and strictly lower components
+   */
+  static MatrixType computeInverse(const RowMatrixType& Upper, const Matrix<Scalar, Dynamic, 1>& inverseDiagonal,
+                                   const MatrixType& Lower) {
+    // Calculate the 'minimal set', which is the nonzeros of (L+U).transpose()
+    // It could be zeroed, but we will overwrite all non-zeros anyways.
+    MatrixType colInv = Lower.transpose().template triangularView<UnitUpper>();
+    colInv += Upper.transpose();
+
+    // We also need rowmajor representation in order to do efficient row-wise dot products
+    RowMatrixType rowInv = Upper.transpose().template triangularView<UnitLower>();
+    rowInv += Lower.transpose();
+
+    // Use the Takahashi algorithm to build the supporting elements of the inverse
+    // upwards and to the left, from the bottom right element, 1 col/row at a time
+    for (Index recurseLevel = Upper.cols() - 1; recurseLevel >= 0; recurseLevel--) {
+      const auto& col = Lower.col(recurseLevel);
+      const auto& row = Upper.row(recurseLevel);
+
+      // Calculate the inverse values for the nonzeros in this column
+      typename MatrixType::ReverseInnerIterator colIter(colInv, recurseLevel);
+      for (; recurseLevel < colIter.index(); --colIter) {
+        const Scalar element = -accurateDot(col, rowInv.row(colIter.index()));
+        colIter.valueRef() = element;
+        rowInv.coeffRef(colIter.index(), recurseLevel) = element;
+      }
+
+      // Calculate the inverse values for the nonzeros in this row
+      typename RowMatrixType::ReverseInnerIterator rowIter(rowInv, recurseLevel);
+      for (; recurseLevel < rowIter.index(); --rowIter) {
+        const Scalar element = -accurateDot(row, colInv.col(rowIter.index()));
+        rowIter.valueRef() = element;
+        colInv.coeffRef(recurseLevel, rowIter.index()) = element;
+      }
+
+      // And finally the diagonal, which corresponds to both row and col iterator now
+      const Scalar diag = inverseDiagonal(recurseLevel) - accurateDot(row, colInv.col(recurseLevel));
+      rowIter.valueRef() = diag;
+      colIter.valueRef() = diag;
+    }
+
+    return colInv;
+  }
+
+ private:
+  MatrixType _result;
+};
+
+}  // namespace Eigen
+#endif
diff --git a/libs/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsArrayAPI.h b/libs/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsArrayAPI.h
index 41d2bf6..7fa85e2 100644
--- a/libs/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsArrayAPI.h
+++ b/libs/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsArrayAPI.h
@@ -11,6 +11,8 @@
 #ifndef EIGEN_BESSELFUNCTIONS_ARRAYAPI_H
 #define EIGEN_BESSELFUNCTIONS_ARRAYAPI_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /** \returns an expression of the coefficient-wise i0(\a x) to the given
diff --git a/libs/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsBFloat16.h b/libs/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsBFloat16.h
index 6049cc2..c016004 100644
--- a/libs/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsBFloat16.h
+++ b/libs/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsBFloat16.h
@@ -8,6 +8,8 @@
 #ifndef EIGEN_BESSELFUNCTIONS_BFLOAT16_H
 #define EIGEN_BESSELFUNCTIONS_BFLOAT16_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 namespace numext {
 
diff --git a/libs/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsFunctors.h b/libs/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsFunctors.h
index 8606a9f..2d6b386 100644
--- a/libs/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsFunctors.h
+++ b/libs/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsFunctors.h
@@ -11,6 +11,8 @@
 #ifndef EIGEN_BESSELFUNCTIONS_FUNCTORS_H
 #define EIGEN_BESSELFUNCTIONS_FUNCTORS_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
@@ -22,7 +24,6 @@ namespace internal {
  */
 template <typename Scalar>
 struct scalar_bessel_i0_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_bessel_i0_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& x) const {
     using numext::bessel_i0;
     return bessel_i0(x);
@@ -50,7 +51,6 @@ struct functor_traits<scalar_bessel_i0_op<Scalar> > {
  */
 template <typename Scalar>
 struct scalar_bessel_i0e_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_bessel_i0e_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& x) const {
     using numext::bessel_i0e;
     return bessel_i0e(x);
@@ -77,7 +77,6 @@ struct functor_traits<scalar_bessel_i0e_op<Scalar> > {
  */
 template <typename Scalar>
 struct scalar_bessel_i1_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_bessel_i1_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& x) const {
     using numext::bessel_i1;
     return bessel_i1(x);
@@ -105,7 +104,6 @@ struct functor_traits<scalar_bessel_i1_op<Scalar> > {
  */
 template <typename Scalar>
 struct scalar_bessel_i1e_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_bessel_i1e_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& x) const {
     using numext::bessel_i1e;
     return bessel_i1e(x);
@@ -132,7 +130,6 @@ struct functor_traits<scalar_bessel_i1e_op<Scalar> > {
  */
 template <typename Scalar>
 struct scalar_bessel_j0_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_bessel_j0_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& x) const {
     using numext::bessel_j0;
     return bessel_j0(x);
@@ -160,7 +157,6 @@ struct functor_traits<scalar_bessel_j0_op<Scalar> > {
  */
 template <typename Scalar>
 struct scalar_bessel_y0_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_bessel_y0_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& x) const {
     using numext::bessel_y0;
     return bessel_y0(x);
@@ -188,7 +184,6 @@ struct functor_traits<scalar_bessel_y0_op<Scalar> > {
  */
 template <typename Scalar>
 struct scalar_bessel_j1_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_bessel_j1_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& x) const {
     using numext::bessel_j1;
     return bessel_j1(x);
@@ -216,7 +211,6 @@ struct functor_traits<scalar_bessel_j1_op<Scalar> > {
  */
 template <typename Scalar>
 struct scalar_bessel_y1_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_bessel_y1_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& x) const {
     using numext::bessel_y1;
     return bessel_y1(x);
@@ -244,7 +238,6 @@ struct functor_traits<scalar_bessel_y1_op<Scalar> > {
  */
 template <typename Scalar>
 struct scalar_bessel_k0_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_bessel_k0_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& x) const {
     using numext::bessel_k0;
     return bessel_k0(x);
@@ -272,7 +265,6 @@ struct functor_traits<scalar_bessel_k0_op<Scalar> > {
  */
 template <typename Scalar>
 struct scalar_bessel_k0e_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_bessel_k0e_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& x) const {
     using numext::bessel_k0e;
     return bessel_k0e(x);
@@ -300,7 +292,6 @@ struct functor_traits<scalar_bessel_k0e_op<Scalar> > {
  */
 template <typename Scalar>
 struct scalar_bessel_k1_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_bessel_k1_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& x) const {
     using numext::bessel_k1;
     return bessel_k1(x);
@@ -328,7 +319,6 @@ struct functor_traits<scalar_bessel_k1_op<Scalar> > {
  */
 template <typename Scalar>
 struct scalar_bessel_k1e_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_bessel_k1e_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& x) const {
     using numext::bessel_k1e;
     return bessel_k1e(x);
diff --git a/libs/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsHalf.h b/libs/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsHalf.h
index 8930d1a..4d50503 100644
--- a/libs/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsHalf.h
+++ b/libs/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsHalf.h
@@ -8,6 +8,8 @@
 #ifndef EIGEN_BESSELFUNCTIONS_HALF_H
 #define EIGEN_BESSELFUNCTIONS_HALF_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 namespace numext {
 
diff --git a/libs/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsImpl.h b/libs/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsImpl.h
index 24812be..b600079 100644
--- a/libs/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsImpl.h
+++ b/libs/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsImpl.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_BESSEL_FUNCTIONS_H
 #define EIGEN_BESSEL_FUNCTIONS_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 namespace internal {
 
@@ -48,10 +50,10 @@ struct bessel_i0e_retval {
 
 template <typename T, typename ScalarType = typename unpacket_traits<T>::type>
 struct generic_i0e {
-  EIGEN_DEVICE_FUNC
-  static EIGEN_STRONG_INLINE T run(const T&) {
-    EIGEN_STATIC_ASSERT((internal::is_same<T, T>::value == false),
-                        THIS_TYPE_IS_NOT_SUPPORTED);
+  EIGEN_STATIC_ASSERT((internal::is_same<T, T>::value == false),
+                      THIS_TYPE_IS_NOT_SUPPORTED)
+
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T run(const T&) {
     return ScalarType(0);
   }
 };
@@ -239,10 +241,10 @@ struct bessel_i1e_retval {
 
 template <typename T, typename ScalarType = typename unpacket_traits<T>::type >
 struct generic_i1e {
-  EIGEN_DEVICE_FUNC
-  static EIGEN_STRONG_INLINE T run(const T&) {
-    EIGEN_STATIC_ASSERT((internal::is_same<T, T>::value == false),
-                        THIS_TYPE_IS_NOT_SUPPORTED);
+  EIGEN_STATIC_ASSERT((internal::is_same<T, T>::value == false),
+                      THIS_TYPE_IS_NOT_SUPPORTED)
+
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T run(const T&) {
     return ScalarType(0);
   }
 };
@@ -434,10 +436,10 @@ struct bessel_k0e_retval {
 
 template <typename T, typename ScalarType = typename unpacket_traits<T>::type>
 struct generic_k0e {
-  EIGEN_DEVICE_FUNC
-  static EIGEN_STRONG_INLINE T run(const T&) {
-    EIGEN_STATIC_ASSERT((internal::is_same<T, T>::value == false),
-                        THIS_TYPE_IS_NOT_SUPPORTED);
+  EIGEN_STATIC_ASSERT((internal::is_same<T, T>::value == false),
+                      THIS_TYPE_IS_NOT_SUPPORTED)
+
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T run(const T&) {
     return ScalarType(0);
   }
 };
@@ -597,10 +599,10 @@ struct bessel_k0_retval {
 
 template <typename T, typename ScalarType = typename unpacket_traits<T>::type>
 struct generic_k0 {
-  EIGEN_DEVICE_FUNC
-  static EIGEN_STRONG_INLINE T run(const T&) {
-    EIGEN_STATIC_ASSERT((internal::is_same<T, T>::value == false),
-                        THIS_TYPE_IS_NOT_SUPPORTED);
+  EIGEN_STATIC_ASSERT((internal::is_same<T, T>::value == false),
+                      THIS_TYPE_IS_NOT_SUPPORTED)
+
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T run(const T&) {
     return ScalarType(0);
   }
 };
@@ -769,10 +771,10 @@ struct bessel_k1e_retval {
 
 template <typename T, typename ScalarType = typename unpacket_traits<T>::type>
 struct generic_k1e {
-  EIGEN_DEVICE_FUNC
-  static EIGEN_STRONG_INLINE T run(const T&) {
-    EIGEN_STATIC_ASSERT((internal::is_same<T, T>::value == false),
-                        THIS_TYPE_IS_NOT_SUPPORTED);
+  EIGEN_STATIC_ASSERT((internal::is_same<T, T>::value == false),
+                      THIS_TYPE_IS_NOT_SUPPORTED)
+
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T run(const T&) {
     return ScalarType(0);
   }
 };
@@ -925,10 +927,10 @@ struct bessel_k1_retval {
 
 template <typename T, typename ScalarType = typename unpacket_traits<T>::type>
 struct generic_k1 {
-  EIGEN_DEVICE_FUNC
-  static EIGEN_STRONG_INLINE T run(const T&) {
-    EIGEN_STATIC_ASSERT((internal::is_same<T, T>::value == false),
-                        THIS_TYPE_IS_NOT_SUPPORTED);
+  EIGEN_STATIC_ASSERT((internal::is_same<T, T>::value == false),
+                      THIS_TYPE_IS_NOT_SUPPORTED)
+
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T run(const T&) {
     return ScalarType(0);
   }
 };
@@ -1091,10 +1093,10 @@ struct bessel_j0_retval {
 
 template <typename T, typename ScalarType = typename unpacket_traits<T>::type>
 struct generic_j0 {
-  EIGEN_DEVICE_FUNC
-  static EIGEN_STRONG_INLINE T run(const T&) {
-    EIGEN_STATIC_ASSERT((internal::is_same<T, T>::value == false),
-                        THIS_TYPE_IS_NOT_SUPPORTED);
+  EIGEN_STATIC_ASSERT((internal::is_same<T, T>::value == false),
+                      THIS_TYPE_IS_NOT_SUPPORTED)
+
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T run(const T&) {
     return ScalarType(0);
   }
 };
@@ -1291,10 +1293,10 @@ struct bessel_y0_retval {
 
 template <typename T, typename ScalarType = typename unpacket_traits<T>::type>
 struct generic_y0 {
-  EIGEN_DEVICE_FUNC
-  static EIGEN_STRONG_INLINE T run(const T&) {
-    EIGEN_STATIC_ASSERT((internal::is_same<T, T>::value == false),
-                        THIS_TYPE_IS_NOT_SUPPORTED);
+  EIGEN_STATIC_ASSERT((internal::is_same<T, T>::value == false),
+                      THIS_TYPE_IS_NOT_SUPPORTED)
+
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T run(const T&) {
     return ScalarType(0);
   }
 };
@@ -1489,10 +1491,10 @@ struct bessel_j1_retval {
 
 template <typename T, typename ScalarType = typename unpacket_traits<T>::type>
 struct generic_j1 {
-  EIGEN_DEVICE_FUNC
-  static EIGEN_STRONG_INLINE T run(const T&) {
-    EIGEN_STATIC_ASSERT((internal::is_same<T, T>::value == false),
-                        THIS_TYPE_IS_NOT_SUPPORTED);
+  EIGEN_STATIC_ASSERT((internal::is_same<T, T>::value == false),
+                      THIS_TYPE_IS_NOT_SUPPORTED)
+
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T run(const T&) {
     return ScalarType(0);
   }
 };
@@ -1680,10 +1682,10 @@ struct bessel_y1_retval {
 
 template <typename T, typename ScalarType = typename unpacket_traits<T>::type>
 struct generic_y1 {
-  EIGEN_DEVICE_FUNC
-  static EIGEN_STRONG_INLINE T run(const T&) {
-    EIGEN_STATIC_ASSERT((internal::is_same<T, T>::value == false),
-                        THIS_TYPE_IS_NOT_SUPPORTED);
+  EIGEN_STATIC_ASSERT((internal::is_same<T, T>::value == false),
+                      THIS_TYPE_IS_NOT_SUPPORTED)
+
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T run(const T&) {
     return ScalarType(0);
   }
 };
diff --git a/libs/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsPacketMath.h b/libs/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsPacketMath.h
index 943d10f..ac8c57b 100644
--- a/libs/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsPacketMath.h
+++ b/libs/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsPacketMath.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_BESSELFUNCTIONS_PACKETMATH_H
 #define EIGEN_BESSELFUNCTIONS_PACKETMATH_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
diff --git a/libs/eigen/unsupported/Eigen/src/SpecialFunctions/HipVectorCompatibility.h b/libs/eigen/unsupported/Eigen/src/SpecialFunctions/HipVectorCompatibility.h
index d7b231a..bc6f912 100644
--- a/libs/eigen/unsupported/Eigen/src/SpecialFunctions/HipVectorCompatibility.h
+++ b/libs/eigen/unsupported/Eigen/src/SpecialFunctions/HipVectorCompatibility.h
@@ -5,6 +5,8 @@ namespace hip_impl {
   template <typename, typename, unsigned int> struct Scalar_accessor;
 }   // end namespace hip_impl
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 namespace internal {
 
diff --git a/libs/eigen/unsupported/Eigen/src/SpecialFunctions/InternalHeaderCheck.h b/libs/eigen/unsupported/Eigen/src/SpecialFunctions/InternalHeaderCheck.h
new file mode 100644
index 0000000..c80ebd8
--- /dev/null
+++ b/libs/eigen/unsupported/Eigen/src/SpecialFunctions/InternalHeaderCheck.h
@@ -0,0 +1,3 @@
+#ifndef EIGEN_SPECIALFUNCTIONS_MODULE_H
+#error "Please include unsupported/Eigen/SpecialFunctions instead of including headers inside the src directory directly."
+#endif
diff --git a/libs/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h b/libs/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h
index 691ff4d..4429333 100644
--- a/libs/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h
+++ b/libs/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h
@@ -11,6 +11,8 @@
 #ifndef EIGEN_SPECIALFUNCTIONS_ARRAYAPI_H
 #define EIGEN_SPECIALFUNCTIONS_ARRAYAPI_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 /** \cpp11 \returns an expression of the coefficient-wise igamma(\a a, \a x) to the given arrays.
diff --git a/libs/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsBFloat16.h b/libs/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsBFloat16.h
index 2d94231..56e0b95 100644
--- a/libs/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsBFloat16.h
+++ b/libs/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsBFloat16.h
@@ -8,6 +8,8 @@
 #ifndef EIGEN_SPECIALFUNCTIONS_BFLOAT16_H
 #define EIGEN_SPECIALFUNCTIONS_BFLOAT16_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 namespace numext {
 
diff --git a/libs/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h b/libs/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h
index abefe99..0955c43 100644
--- a/libs/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h
+++ b/libs/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h
@@ -11,6 +11,8 @@
 #ifndef EIGEN_SPECIALFUNCTIONS_FUNCTORS_H
 #define EIGEN_SPECIALFUNCTIONS_FUNCTORS_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
@@ -23,7 +25,6 @@ namespace internal {
   */
 template<typename Scalar> struct scalar_igamma_op : binary_op_base<Scalar,Scalar>
 {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_igamma_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& x) const {
     using numext::igamma; return igamma(a, x);
   }
@@ -49,7 +50,6 @@ struct functor_traits<scalar_igamma_op<Scalar> > {
   */
 template <typename Scalar>
 struct scalar_igamma_der_a_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_igamma_der_a_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& a, const Scalar& x) const {
     using numext::igamma_der_a;
     return igamma_der_a(a, x);
@@ -77,7 +77,6 @@ struct functor_traits<scalar_igamma_der_a_op<Scalar> > {
   */
 template <typename Scalar>
 struct scalar_gamma_sample_der_alpha_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_gamma_sample_der_alpha_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& alpha, const Scalar& sample) const {
     using numext::gamma_sample_der_alpha;
     return gamma_sample_der_alpha(alpha, sample);
@@ -103,7 +102,6 @@ struct functor_traits<scalar_gamma_sample_der_alpha_op<Scalar> > {
   */
 template<typename Scalar> struct scalar_igammac_op : binary_op_base<Scalar,Scalar>
 {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_igammac_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& x) const {
     using numext::igammac; return igammac(a, x);
   }
@@ -128,7 +126,6 @@ struct functor_traits<scalar_igammac_op<Scalar> > {
   *
   */
 template<typename Scalar> struct scalar_betainc_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_betainc_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& x, const Scalar& a, const Scalar& b) const {
     using numext::betainc; return betainc(x, a, b);
   }
@@ -154,7 +151,6 @@ struct functor_traits<scalar_betainc_op<Scalar> > {
  * \sa class CwiseUnaryOp, Cwise::lgamma()
  */
 template<typename Scalar> struct scalar_lgamma_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_lgamma_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const {
     using numext::lgamma; return lgamma(a);
   }
@@ -176,7 +172,6 @@ struct functor_traits<scalar_lgamma_op<Scalar> >
  * \sa class CwiseUnaryOp, Cwise::digamma()
  */
 template<typename Scalar> struct scalar_digamma_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_digamma_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const {
     using numext::digamma; return digamma(a);
   }
@@ -198,7 +193,6 @@ struct functor_traits<scalar_digamma_op<Scalar> >
  * \sa class CwiseUnaryOp, Cwise::zeta()
  */
 template<typename Scalar> struct scalar_zeta_op {
-    EIGEN_EMPTY_STRUCT_CTOR(scalar_zeta_op)
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& x, const Scalar& q) const {
         using numext::zeta; return zeta(x, q);
     }
@@ -220,7 +214,6 @@ struct functor_traits<scalar_zeta_op<Scalar> >
  * \sa class CwiseUnaryOp, Cwise::polygamma()
  */
 template<typename Scalar> struct scalar_polygamma_op {
-    EIGEN_EMPTY_STRUCT_CTOR(scalar_polygamma_op)
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& n, const Scalar& x) const {
         using numext::polygamma; return polygamma(n, x);
     }
@@ -242,7 +235,6 @@ struct functor_traits<scalar_polygamma_op<Scalar> >
  * \sa class CwiseUnaryOp, ArrayBase::erf()
  */
 template<typename Scalar> struct scalar_erf_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_erf_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar
   operator()(const Scalar& a) const {
     return numext::erf(a);
@@ -281,7 +273,6 @@ struct functor_traits<scalar_erf_op<Scalar> > {
  * \sa class CwiseUnaryOp, Cwise::erfc()
  */
 template<typename Scalar> struct scalar_erfc_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_erfc_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const {
     using numext::erfc; return erfc(a);
   }
@@ -304,7 +295,6 @@ struct functor_traits<scalar_erfc_op<Scalar> >
  * \sa class CwiseUnaryOp, Cwise::ndtri()
  */
 template<typename Scalar> struct scalar_ndtri_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_ndtri_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const {
     using numext::ndtri; return ndtri(a);
   }
diff --git a/libs/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h b/libs/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h
index 2a3a531..4b8a35f 100644
--- a/libs/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h
+++ b/libs/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h
@@ -8,6 +8,8 @@
 #ifndef EIGEN_SPECIALFUNCTIONS_HALF_H
 #define EIGEN_SPECIALFUNCTIONS_HALF_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 namespace numext {
 
diff --git a/libs/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h b/libs/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h
index f1c260e..d85729d 100644
--- a/libs/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h
+++ b/libs/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_SPECIAL_FUNCTIONS_H
 #define EIGEN_SPECIAL_FUNCTIONS_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 namespace internal {
 
@@ -43,10 +45,10 @@ namespace internal {
 
 template <typename Scalar>
 struct lgamma_impl {
-  EIGEN_DEVICE_FUNC
-  static EIGEN_STRONG_INLINE Scalar run(const Scalar) {
-    EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
-                        THIS_TYPE_IS_NOT_SUPPORTED);
+  EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
+                      THIS_TYPE_IS_NOT_SUPPORTED)
+
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Scalar) {
     return Scalar(0);
   }
 };
@@ -126,10 +128,10 @@ struct digamma_retval {
  */
 template <typename Scalar>
 struct digamma_impl_maybe_poly {
-  EIGEN_DEVICE_FUNC
-  static EIGEN_STRONG_INLINE Scalar run(const Scalar) {
-    EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
-                        THIS_TYPE_IS_NOT_SUPPORTED);
+  EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
+                        THIS_TYPE_IS_NOT_SUPPORTED)
+
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Scalar) {
     return Scalar(0);
   }
 };
@@ -390,10 +392,10 @@ struct erf_impl<double> {
 
 template <typename Scalar>
 struct erfc_impl {
-  EIGEN_DEVICE_FUNC
-  static EIGEN_STRONG_INLINE Scalar run(const Scalar) {
-    EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
-                        THIS_TYPE_IS_NOT_SUPPORTED);
+  EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
+                      THIS_TYPE_IS_NOT_SUPPORTED)
+
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Scalar) {
     return Scalar(0);
   }
 };
@@ -599,13 +601,12 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T generic_ndtri_lt_exp_neg_two(
     ScalarType(6.79019408009981274425e-9)
   };
   const T eight = pset1<T>(ScalarType(8.0));
-  const T one = pset1<T>(ScalarType(1));
   const T neg_two = pset1<T>(ScalarType(-2));
   T x, x0, x1, z;
 
   x = psqrt(pmul(neg_two, plog(b)));
   x0 = psub(x, pdiv(plog(x), x));
-  z = pdiv(one, x);
+  z = preciprocal(x);
   x1 = pmul(
       z, pselect(
           pcmp_lt(x, eight),
@@ -650,10 +651,10 @@ struct ndtri_retval {
 
 template <typename Scalar>
 struct ndtri_impl {
-  EIGEN_DEVICE_FUNC
-  static EIGEN_STRONG_INLINE Scalar run(const Scalar) {
-    EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
-                        THIS_TYPE_IS_NOT_SUPPORTED);
+  EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
+                      THIS_TYPE_IS_NOT_SUPPORTED)
+
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Scalar) {
     return Scalar(0);
   }
 };
@@ -684,11 +685,11 @@ struct igammac_retval {
 template <typename Scalar>
 struct cephes_helper {
   EIGEN_DEVICE_FUNC
-  static EIGEN_STRONG_INLINE Scalar machep() { assert(false && "machep not supported for this type"); return 0.0; }
+  static EIGEN_STRONG_INLINE Scalar machep() { eigen_assert(false && "machep not supported for this type"); return 0.0; }
   EIGEN_DEVICE_FUNC
-  static EIGEN_STRONG_INLINE Scalar big() { assert(false && "big not supported for this type"); return 0.0; }
+  static EIGEN_STRONG_INLINE Scalar big() { eigen_assert(false && "big not supported for this type"); return 0.0; }
   EIGEN_DEVICE_FUNC
-  static EIGEN_STRONG_INLINE Scalar biginv() { assert(false && "biginv not supported for this type"); return 0.0; }
+  static EIGEN_STRONG_INLINE Scalar biginv() { eigen_assert(false && "biginv not supported for this type"); return 0.0; }
 };
 
 template <>
@@ -786,7 +787,7 @@ struct igammac_cf_impl {
     Scalar ax = main_igamma_term<Scalar>(a, x);
     // This is independent of mode. If this value is zero,
     // then the function value is zero. If the function value is zero,
-    // then we are in a neighborhood where the function value evalutes to zero,
+    // then we are in a neighborhood where the function value evaluates to zero,
     // so the derivative is zero.
     if (ax == zero) {
       return zero;
@@ -897,7 +898,7 @@ struct igamma_series_impl {
 
     // This is independent of mode. If this value is zero,
     // then the function value is zero. If the function value is zero,
-    // then we are in a neighborhood where the function value evalutes to zero,
+    // then we are in a neighborhood where the function value evaluates to zero,
     // so the derivative is zero.
     if (ax == zero) {
       return zero;
@@ -952,10 +953,10 @@ struct igamma_series_impl {
 
 template <typename Scalar>
 struct igammac_impl {
-  EIGEN_DEVICE_FUNC
-  static Scalar run(Scalar a, Scalar x) {
-    EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
-                        THIS_TYPE_IS_NOT_SUPPORTED);
+  EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
+                      THIS_TYPE_IS_NOT_SUPPORTED)
+
+  EIGEN_DEVICE_FUNC static Scalar run(Scalar a, Scalar x) {
     return Scalar(0);
   }
 };
@@ -1051,10 +1052,10 @@ struct igammac_impl {
 
 template <typename Scalar, IgammaComputationMode mode>
 struct igamma_generic_impl {
-  EIGEN_DEVICE_FUNC
-  static EIGEN_STRONG_INLINE Scalar run(Scalar a, Scalar x) {
-    EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
-                        THIS_TYPE_IS_NOT_SUPPORTED);
+  EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
+                      THIS_TYPE_IS_NOT_SUPPORTED)
+
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(Scalar a, Scalar x) {
     return Scalar(0);
   }
 };
@@ -1255,10 +1256,10 @@ struct zeta_retval {
 
 template <typename Scalar>
 struct zeta_impl_series {
-  EIGEN_DEVICE_FUNC
-  static EIGEN_STRONG_INLINE Scalar run(const Scalar) {
-    EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
-                        THIS_TYPE_IS_NOT_SUPPORTED);
+  EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
+                      THIS_TYPE_IS_NOT_SUPPORTED)
+
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Scalar) {
     return Scalar(0);
   }
 };
@@ -1387,7 +1388,7 @@ struct zeta_impl {
             };
 
         const Scalar maxnum = NumTraits<Scalar>::infinity();
-        const Scalar zero = 0.0, half = 0.5, one = 1.0;
+        const Scalar zero = Scalar(0.0), half = Scalar(0.5), one = Scalar(1.0);
         const Scalar machep = cephes_helper<Scalar>::machep();
         const Scalar nan = NumTraits<Scalar>::quiet_NaN();
 
@@ -1429,11 +1430,19 @@ struct zeta_impl {
             return s;
         }
 
+        // If b is zero, then the tail sum will also end up being zero.
+        // Exiting early here can prevent NaNs for some large inputs, where
+        // the tail sum computed below has term `a` which can overflow to `inf`.
+        if (numext::equal_strict(b, zero)) {
+          return s;
+        }
+
         w = a;
         s += b*w/(x-one);
         s -= half * b;
         a = one;
         k = zero;
+        
         for( i=0; i<12; i++ )
         {
             a *= x + k;
@@ -1466,10 +1475,10 @@ struct polygamma_retval {
 
 template <typename Scalar>
 struct polygamma_impl {
-    EIGEN_DEVICE_FUNC
-    static EIGEN_STRONG_INLINE Scalar run(Scalar n, Scalar x) {
-        EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
-                            THIS_TYPE_IS_NOT_SUPPORTED);
+    EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
+                        THIS_TYPE_IS_NOT_SUPPORTED)
+
+    EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(Scalar n, Scalar x) {
         return Scalar(0);
     }
 };
@@ -1515,10 +1524,10 @@ struct betainc_retval {
 
 template <typename Scalar>
 struct betainc_impl {
-  EIGEN_DEVICE_FUNC
-  static EIGEN_STRONG_INLINE Scalar run(Scalar a, Scalar b, Scalar x) {
-    EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
-                        THIS_TYPE_IS_NOT_SUPPORTED);
+  EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
+                      THIS_TYPE_IS_NOT_SUPPORTED)
+
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(Scalar a, Scalar b, Scalar x) {
     return Scalar(0);
   }
 };
@@ -1527,8 +1536,10 @@ struct betainc_impl {
 
 template <typename Scalar>
 struct betainc_impl {
-  EIGEN_DEVICE_FUNC
-  static EIGEN_STRONG_INLINE Scalar run(Scalar, Scalar, Scalar) {
+  EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
+                      THIS_TYPE_IS_NOT_SUPPORTED)
+
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(Scalar, Scalar, Scalar) {
     /*	betaincf.c
      *
      *	Incomplete beta integral
@@ -1597,9 +1608,6 @@ struct betainc_impl {
      * incbet domain      x<0, x>1          nan
      * incbet underflow                     nan
      */
-
-    EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
-                        THIS_TYPE_IS_NOT_SUPPORTED);
     return Scalar(0);
   }
 };
@@ -1609,11 +1617,11 @@ struct betainc_impl {
  */
 template <typename Scalar>
 struct incbeta_cfe {
-  EIGEN_DEVICE_FUNC
-  static EIGEN_STRONG_INLINE Scalar run(Scalar a, Scalar b, Scalar x, bool small_branch) {
-    EIGEN_STATIC_ASSERT((internal::is_same<Scalar, float>::value ||
-                         internal::is_same<Scalar, double>::value),
-                        THIS_TYPE_IS_NOT_SUPPORTED);
+  EIGEN_STATIC_ASSERT((internal::is_same<Scalar, float>::value ||
+                       internal::is_same<Scalar, double>::value),
+                      THIS_TYPE_IS_NOT_SUPPORTED)
+
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(Scalar a, Scalar b, Scalar x, bool small_branch) {
     const Scalar big = cephes_helper<Scalar>::big();
     const Scalar machep = cephes_helper<Scalar>::machep();
     const Scalar biginv = cephes_helper<Scalar>::biginv();
diff --git a/libs/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h b/libs/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h
index 2bb0179..651457f 100644
--- a/libs/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h
+++ b/libs/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_SPECIALFUNCTIONS_PACKETMATH_H
 #define EIGEN_SPECIALFUNCTIONS_PACKETMATH_H
 
+#include "./InternalHeaderCheck.h"
+
 namespace Eigen {
 
 namespace internal {
diff --git a/libs/eigen/unsupported/Eigen/src/SpecialFunctions/arch/AVX512/BesselFunctions.h b/libs/eigen/unsupported/Eigen/src/SpecialFunctions/arch/AVX512/BesselFunctions.h
index 7dd3c3e..909b08e 100644
--- a/libs/eigen/unsupported/Eigen/src/SpecialFunctions/arch/AVX512/BesselFunctions.h
+++ b/libs/eigen/unsupported/Eigen/src/SpecialFunctions/arch/AVX512/BesselFunctions.h
@@ -4,6 +4,9 @@
 namespace Eigen {
 namespace internal {
 
+// Bessel functions only available for some compilers.
+#if EIGEN_HAS_AVX512_MATH
+
 F16_PACKET_FUNCTION(Packet16f, Packet16h, pbessel_i0)
 BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pbessel_i0)
 
@@ -40,6 +43,8 @@ BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pbessel_y0)
 F16_PACKET_FUNCTION(Packet16f, Packet16h, pbessel_y1)
 BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pbessel_y1)
 
+#endif
+
 }  // namespace internal
 }  // namespace Eigen
 
diff --git a/libs/eigen/unsupported/Eigen/src/Splines/InternalHeaderCheck.h b/libs/eigen/unsupported/Eigen/src/Splines/InternalHeaderCheck.h
new file mode 100644
index 0000000..4a6087e
--- /dev/null
+++ b/libs/eigen/unsupported/Eigen/src/Splines/InternalHeaderCheck.h
@@ -0,0 +1,3 @@
+#ifndef EIGEN_SPLINES_MODULE_H
+#error "Please include unsupported/Eigen/Splines instead of including headers inside the src directory directly."
+#endif
diff --git a/libs/eigen/unsupported/Eigen/src/Splines/Spline.h b/libs/eigen/unsupported/Eigen/src/Splines/Spline.h
index 79edd52..ce786ee 100644
--- a/libs/eigen/unsupported/Eigen/src/Splines/Spline.h
+++ b/libs/eigen/unsupported/Eigen/src/Splines/Spline.h
@@ -10,6 +10,8 @@
 #ifndef EIGEN_SPLINE_H
 #define EIGEN_SPLINE_H
 
+#include "./InternalHeaderCheck.h"
+
 #include "SplineFwd.h"
 
 namespace Eigen
@@ -25,19 +27,19 @@ namespace Eigen
      *   C(u) & = \sum_{i=0}^{n}N_{i,p}(u)P_i
      * \f}
      *
-     * \tparam _Scalar The underlying data type (typically float or double)
-     * \tparam _Dim The curve dimension (e.g. 2 or 3)
-     * \tparam _Degree Per default set to Dynamic; could be set to the actual desired
+     * \tparam Scalar_ The underlying data type (typically float or double)
+     * \tparam Dim_ The curve dimension (e.g. 2 or 3)
+     * \tparam Degree_ Per default set to Dynamic; could be set to the actual desired
      *                degree for optimization purposes (would result in stack allocation
      *                of several temporary variables).
      **/
-  template <typename _Scalar, int _Dim, int _Degree>
+  template <typename Scalar_, int Dim_, int Degree_>
   class Spline
   {
   public:
-    typedef _Scalar Scalar; /*!< The spline curve's scalar type. */
-    enum { Dimension = _Dim /*!< The spline curve's dimension. */ };
-    enum { Degree = _Degree /*!< The spline curve's degree. */ };
+    typedef Scalar_ Scalar; /*!< The spline curve's scalar type. */
+    enum { Dimension = Dim_ /*!< The spline curve's dimension. */ };
+    enum { Degree = Degree_ /*!< The spline curve's degree. */ };
 
     /** \brief The point type the spline is representing. */
     typedef typename SplineTraits<Spline>::PointType PointType;
@@ -223,18 +225,18 @@ namespace Eigen
 
     template <typename DerivativeType>
     static void BasisFunctionDerivativesImpl(
-      const typename Spline<_Scalar, _Dim, _Degree>::Scalar u,
+      const typename Spline<Scalar_, Dim_, Degree_>::Scalar u,
       const DenseIndex order,
-      const DenseIndex p, 
-      const typename Spline<_Scalar, _Dim, _Degree>::KnotVectorType& U,
+      const DenseIndex p,
+      const typename Spline<Scalar_, Dim_, Degree_>::KnotVectorType& U,
       DerivativeType& N_);
   };
 
-  template <typename _Scalar, int _Dim, int _Degree>
-  DenseIndex Spline<_Scalar, _Dim, _Degree>::Span(
-    typename SplineTraits< Spline<_Scalar, _Dim, _Degree> >::Scalar u,
+  template <typename Scalar_, int Dim_, int Degree_>
+  DenseIndex Spline<Scalar_, Dim_, Degree_>::Span(
+    typename SplineTraits< Spline<Scalar_, Dim_, Degree_> >::Scalar u,
     DenseIndex degree,
-    const typename SplineTraits< Spline<_Scalar, _Dim, _Degree> >::KnotVectorType& knots)
+    const typename SplineTraits< Spline<Scalar_, Dim_, Degree_> >::KnotVectorType& knots)
   {
     // Piegl & Tiller, "The NURBS Book", A2.1 (p. 68)
     if (u <= knots(0)) return degree;
@@ -242,12 +244,12 @@ namespace Eigen
     return static_cast<DenseIndex>( std::distance(knots.data(), pos) - 1 );
   }
 
-  template <typename _Scalar, int _Dim, int _Degree>
-  typename Spline<_Scalar, _Dim, _Degree>::BasisVectorType
-    Spline<_Scalar, _Dim, _Degree>::BasisFunctions(
-    typename Spline<_Scalar, _Dim, _Degree>::Scalar u,
+  template <typename Scalar_, int Dim_, int Degree_>
+  typename Spline<Scalar_, Dim_, Degree_>::BasisVectorType
+    Spline<Scalar_, Dim_, Degree_>::BasisFunctions(
+    typename Spline<Scalar_, Dim_, Degree_>::Scalar u,
     DenseIndex degree,
-    const typename Spline<_Scalar, _Dim, _Degree>::KnotVectorType& knots)
+    const typename Spline<Scalar_, Dim_, Degree_>::KnotVectorType& knots)
   {
     const DenseIndex p = degree;
     const DenseIndex i = Spline::Span(u, degree, knots);
@@ -276,23 +278,23 @@ namespace Eigen
     return N;
   }
 
-  template <typename _Scalar, int _Dim, int _Degree>
-  DenseIndex Spline<_Scalar, _Dim, _Degree>::degree() const
+  template <typename Scalar_, int Dim_, int Degree_>
+  DenseIndex Spline<Scalar_, Dim_, Degree_>::degree() const
   {
-    if (_Degree == Dynamic)
+    if (Degree_ == Dynamic)
       return m_knots.size() - m_ctrls.cols() - 1;
     else
-      return _Degree;
+      return Degree_;
   }
 
-  template <typename _Scalar, int _Dim, int _Degree>
-  DenseIndex Spline<_Scalar, _Dim, _Degree>::span(Scalar u) const
+  template <typename Scalar_, int Dim_, int Degree_>
+  DenseIndex Spline<Scalar_, Dim_, Degree_>::span(Scalar u) const
   {
     return Spline::Span(u, degree(), knots());
   }
 
-  template <typename _Scalar, int _Dim, int _Degree>
-  typename Spline<_Scalar, _Dim, _Degree>::PointType Spline<_Scalar, _Dim, _Degree>::operator()(Scalar u) const
+  template <typename Scalar_, int Dim_, int Degree_>
+  typename Spline<Scalar_, Dim_, Degree_>::PointType Spline<Scalar_, Dim_, Degree_>::operator()(Scalar u) const
   {
     enum { Order = SplineTraits<Spline>::OrderAtCompileTime };
 
@@ -337,28 +339,28 @@ namespace Eigen
     }
   }
 
-  template <typename _Scalar, int _Dim, int _Degree>
-  typename SplineTraits< Spline<_Scalar, _Dim, _Degree> >::DerivativeType
-    Spline<_Scalar, _Dim, _Degree>::derivatives(Scalar u, DenseIndex order) const
+  template <typename Scalar_, int Dim_, int Degree_>
+  typename SplineTraits< Spline<Scalar_, Dim_, Degree_> >::DerivativeType
+    Spline<Scalar_, Dim_, Degree_>::derivatives(Scalar u, DenseIndex order) const
   {
     typename SplineTraits< Spline >::DerivativeType res;
     derivativesImpl(*this, u, order, res);
     return res;
   }
 
-  template <typename _Scalar, int _Dim, int _Degree>
+  template <typename Scalar_, int Dim_, int Degree_>
   template <int DerivativeOrder>
-  typename SplineTraits< Spline<_Scalar, _Dim, _Degree>, DerivativeOrder >::DerivativeType
-    Spline<_Scalar, _Dim, _Degree>::derivatives(Scalar u, DenseIndex order) const
+  typename SplineTraits< Spline<Scalar_, Dim_, Degree_>, DerivativeOrder >::DerivativeType
+    Spline<Scalar_, Dim_, Degree_>::derivatives(Scalar u, DenseIndex order) const
   {
     typename SplineTraits< Spline, DerivativeOrder >::DerivativeType res;
     derivativesImpl(*this, u, order, res);
     return res;
   }
 
-  template <typename _Scalar, int _Dim, int _Degree>
-  typename SplineTraits< Spline<_Scalar, _Dim, _Degree> >::BasisVectorType
-    Spline<_Scalar, _Dim, _Degree>::basisFunctions(Scalar u) const
+  template <typename Scalar_, int Dim_, int Degree_>
+  typename SplineTraits< Spline<Scalar_, Dim_, Degree_> >::BasisVectorType
+    Spline<Scalar_, Dim_, Degree_>::basisFunctions(Scalar u) const
   {
     return Spline::BasisFunctions(u, degree(), knots());
   }
@@ -366,16 +368,16 @@ namespace Eigen
   /* --------------------------------------------------------------------------------------------- */
   
   
-  template <typename _Scalar, int _Dim, int _Degree>
+  template <typename Scalar_, int Dim_, int Degree_>
   template <typename DerivativeType>
-  void Spline<_Scalar, _Dim, _Degree>::BasisFunctionDerivativesImpl(
-    const typename Spline<_Scalar, _Dim, _Degree>::Scalar u,
+  void Spline<Scalar_, Dim_, Degree_>::BasisFunctionDerivativesImpl(
+    const typename Spline<Scalar_, Dim_, Degree_>::Scalar u,
     const DenseIndex order,
     const DenseIndex p, 
-    const typename Spline<_Scalar, _Dim, _Degree>::KnotVectorType& U,
+    const typename Spline<Scalar_, Dim_, Degree_>::KnotVectorType& U,
     DerivativeType& N_)
   {
-    typedef Spline<_Scalar, _Dim, _Degree> SplineType;
+    typedef Spline<Scalar_, Dim_, Degree_> SplineType;
     enum { Order = SplineTraits<SplineType>::OrderAtCompileTime };
 
     const DenseIndex span = SplineType::Span(u, p, U);
@@ -471,32 +473,32 @@ namespace Eigen
     }
   }
 
-  template <typename _Scalar, int _Dim, int _Degree>
-  typename SplineTraits< Spline<_Scalar, _Dim, _Degree> >::BasisDerivativeType
-    Spline<_Scalar, _Dim, _Degree>::basisFunctionDerivatives(Scalar u, DenseIndex order) const
+  template <typename Scalar_, int Dim_, int Degree_>
+  typename SplineTraits< Spline<Scalar_, Dim_, Degree_> >::BasisDerivativeType
+    Spline<Scalar_, Dim_, Degree_>::basisFunctionDerivatives(Scalar u, DenseIndex order) const
   {
-    typename SplineTraits<Spline<_Scalar, _Dim, _Degree> >::BasisDerivativeType der;
+    typename SplineTraits<Spline<Scalar_, Dim_, Degree_> >::BasisDerivativeType der;
     BasisFunctionDerivativesImpl(u, order, degree(), knots(), der);
     return der;
   }
 
-  template <typename _Scalar, int _Dim, int _Degree>
+  template <typename Scalar_, int Dim_, int Degree_>
   template <int DerivativeOrder>
-  typename SplineTraits< Spline<_Scalar, _Dim, _Degree>, DerivativeOrder >::BasisDerivativeType
-    Spline<_Scalar, _Dim, _Degree>::basisFunctionDerivatives(Scalar u, DenseIndex order) const
+  typename SplineTraits< Spline<Scalar_, Dim_, Degree_>, DerivativeOrder >::BasisDerivativeType
+    Spline<Scalar_, Dim_, Degree_>::basisFunctionDerivatives(Scalar u, DenseIndex order) const
   {
-    typename SplineTraits< Spline<_Scalar, _Dim, _Degree>, DerivativeOrder >::BasisDerivativeType der;
+    typename SplineTraits< Spline<Scalar_, Dim_, Degree_>, DerivativeOrder >::BasisDerivativeType der;
     BasisFunctionDerivativesImpl(u, order, degree(), knots(), der);
     return der;
   }
 
-  template <typename _Scalar, int _Dim, int _Degree>
-  typename SplineTraits<Spline<_Scalar, _Dim, _Degree> >::BasisDerivativeType
-  Spline<_Scalar, _Dim, _Degree>::BasisFunctionDerivatives(
-    const typename Spline<_Scalar, _Dim, _Degree>::Scalar u,
+  template <typename Scalar_, int Dim_, int Degree_>
+  typename SplineTraits<Spline<Scalar_, Dim_, Degree_> >::BasisDerivativeType
+  Spline<Scalar_, Dim_, Degree_>::BasisFunctionDerivatives(
+    const typename Spline<Scalar_, Dim_, Degree_>::Scalar u,
     const DenseIndex order,
     const DenseIndex degree,
-    const typename Spline<_Scalar, _Dim, _Degree>::KnotVectorType& knots)
+    const typename Spline<Scalar_, Dim_, Degree_>::KnotVectorType& knots)
   {
     typename SplineTraits<Spline>::BasisDerivativeType der;
     BasisFunctionDerivativesImpl(u, order, degree, knots, der);
diff --git a/libs/eigen/unsupported/Eigen/src/Splines/SplineFitting.h b/libs/eigen/unsupported/Eigen/src/Splines/SplineFitting.h
index 9f6e8af..fe70658 100644
--- a/libs/eigen/unsupported/Eigen/src/Splines/SplineFitting.h
+++ b/libs/eigen/unsupported/Eigen/src/Splines/SplineFitting.h
@@ -15,11 +15,14 @@
 #include <numeric>
 #include <vector>
 
+#include "./InternalHeaderCheck.h"
+
 #include "SplineFwd.h"
 
 #include "../../../../Eigen/LU"
 #include "../../../../Eigen/QR"
 
+
 namespace Eigen
 {
   /**
diff --git a/libs/eigen/unsupported/Eigen/src/Splines/SplineFwd.h b/libs/eigen/unsupported/Eigen/src/Splines/SplineFwd.h
index 00d6b49..2cc0977 100644
--- a/libs/eigen/unsupported/Eigen/src/Splines/SplineFwd.h
+++ b/libs/eigen/unsupported/Eigen/src/Splines/SplineFwd.h
@@ -10,6 +10,7 @@
 #ifndef EIGEN_SPLINES_FWD_H
 #define EIGEN_SPLINES_FWD_H
 
+#include "./InternalHeaderCheck.h"
 #include "../../../../Eigen/Core"
 
 namespace Eigen
@@ -22,14 +23,14 @@ namespace Eigen
      * \ingroup Splines_Module
      * \brief Compile-time attributes of the Spline class for Dynamic degree.
      **/
-    template <typename _Scalar, int _Dim, int _Degree>
-    struct SplineTraits< Spline<_Scalar, _Dim, _Degree>, Dynamic >
+    template <typename Scalar_, int Dim_, int Degree_>
+    struct SplineTraits< Spline<Scalar_, Dim_, Degree_>, Dynamic >
     {
-      typedef _Scalar Scalar; /*!< The spline curve's scalar type. */
-      enum { Dimension = _Dim /*!< The spline curve's dimension. */ };
-      enum { Degree = _Degree /*!< The spline curve's degree. */ };
+      typedef Scalar_ Scalar; /*!< The spline curve's scalar type. */
+      enum { Dimension = Dim_ /*!< The spline curve's dimension. */ };
+      enum { Degree = Degree_ /*!< The spline curve's degree. */ };
 
-      enum { OrderAtCompileTime = _Degree==Dynamic ? Dynamic : _Degree+1 /*!< The spline curve's order at compile-time. */ };
+      enum { OrderAtCompileTime = Degree_==Dynamic ? Dynamic : Degree_+1 /*!< The spline curve's order at compile-time. */ };
       enum { NumOfDerivativesAtCompileTime = OrderAtCompileTime /*!< The number of derivatives defined for the current spline. */ };
       
       enum { DerivativeMemoryLayout = Dimension==1 ? RowMajor : ColMajor /*!< The derivative type's memory layout. */ };
@@ -62,19 +63,19 @@ namespace Eigen
      *
      * The traits class inherits all attributes from the SplineTraits of Dynamic degree.
      **/
-    template < typename _Scalar, int _Dim, int _Degree, int _DerivativeOrder >
-    struct SplineTraits< Spline<_Scalar, _Dim, _Degree>, _DerivativeOrder > : public SplineTraits< Spline<_Scalar, _Dim, _Degree> >
+    template < typename Scalar_, int Dim_, int Degree_, int _DerivativeOrder >
+    struct SplineTraits< Spline<Scalar_, Dim_, Degree_>, _DerivativeOrder > : public SplineTraits< Spline<Scalar_, Dim_, Degree_> >
     {
-      enum { OrderAtCompileTime = _Degree==Dynamic ? Dynamic : _Degree+1 /*!< The spline curve's order at compile-time. */ };
+      enum { OrderAtCompileTime = Degree_==Dynamic ? Dynamic : Degree_+1 /*!< The spline curve's order at compile-time. */ };
       enum { NumOfDerivativesAtCompileTime = _DerivativeOrder==Dynamic ? Dynamic : _DerivativeOrder+1 /*!< The number of derivatives defined for the current spline. */ };
       
-      enum { DerivativeMemoryLayout = _Dim==1 ? RowMajor : ColMajor /*!< The derivative type's memory layout. */ };
+      enum { DerivativeMemoryLayout = Dim_==1 ? RowMajor : ColMajor /*!< The derivative type's memory layout. */ };
 
       /** \brief The data type used to store the values of the basis function derivatives. */
-      typedef Array<_Scalar,Dynamic,Dynamic,RowMajor,NumOfDerivativesAtCompileTime,OrderAtCompileTime> BasisDerivativeType;
+      typedef Array<Scalar_,Dynamic,Dynamic,RowMajor,NumOfDerivativesAtCompileTime,OrderAtCompileTime> BasisDerivativeType;
       
       /** \brief The data type used to store the spline's derivative values. */      
-      typedef Array<_Scalar,_Dim,Dynamic,DerivativeMemoryLayout,_Dim,NumOfDerivativesAtCompileTime> DerivativeType;
+      typedef Array<Scalar_,Dim_,Dynamic,DerivativeMemoryLayout,Dim_,NumOfDerivativesAtCompileTime> DerivativeType;
     };
 
     /** \brief 2D float B-spline with dynamic degree. */
diff --git a/libs/eigen/unsupported/doc/examples/SYCL/CMakeLists.txt b/libs/eigen/unsupported/doc/examples/SYCL/CMakeLists.txt
index 1d0f721..8ee96f5 100644
--- a/libs/eigen/unsupported/doc/examples/SYCL/CMakeLists.txt
+++ b/libs/eigen/unsupported/doc/examples/SYCL/CMakeLists.txt
@@ -6,12 +6,8 @@ if(EIGEN_SYCL_TRISYCL)
   set(CMAKE_CXX_STANDARD 17)
 else(EIGEN_SYCL_TRISYCL)
   if(MSVC)
-    # Set the host and device compilers C++ standard to C++14. On Windows setting this to C++11
-    # can cause issues with the ComputeCpp device compiler parsing Visual Studio Headers.
-    set(CMAKE_CXX_STANDARD 14)
     list(APPEND COMPUTECPP_USER_FLAGS -DWIN32)
   else()
-    set(CMAKE_CXX_STANDARD 11)
     list(APPEND COMPUTECPP_USER_FLAGS -Wall)
   endif()
   # The following flags are not supported by Clang and can cause warnings
@@ -20,7 +16,6 @@ else(EIGEN_SYCL_TRISYCL)
     set(CMAKE_CXX_COMPILER ${ComputeCpp_DEVICE_COMPILER_EXECUTABLE})
     string(REPLACE "-Wlogical-op" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
     string(REPLACE "-Wno-psabi" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
-    string(REPLACE "-ansi" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
   endif()
   list(APPEND COMPUTECPP_USER_FLAGS
       -DEIGEN_NO_ASSERTION_CHECKING=1
diff --git a/libs/eigen/unsupported/test/CMakeLists.txt b/libs/eigen/unsupported/test/CMakeLists.txt
index d30fa62..21d8c5e 100644
--- a/libs/eigen/unsupported/test/CMakeLists.txt
+++ b/libs/eigen/unsupported/test/CMakeLists.txt
@@ -26,11 +26,7 @@ find_package(Adolc)
 if(Adolc_FOUND)
   include_directories(${ADOLC_INCLUDES})
   ei_add_property(EIGEN_TESTED_BACKENDS "Adolc, ")
-  if(EIGEN_TEST_CXX11)
-    ei_add_test(forward_adolc "" ${ADOLC_LIBRARIES})
-  else()
-    message(STATUS "Adolc found, but tests require C++11 mode")
-  endif()
+  ei_add_test(forward_adolc "" ${ADOLC_LIBRARIES})
 else()
   ei_add_property(EIGEN_MISSING_BACKENDS "Adolc, ")
 endif()
@@ -56,14 +52,16 @@ ei_add_test(FFT)
 ei_add_test(EulerAngles)
 
 find_package(MPREAL)
-if(MPREAL_FOUND AND EIGEN_COMPILER_SUPPORT_CPP11)
+if(MPREAL_FOUND)
   ei_add_property(EIGEN_TESTED_BACKENDS "MPFR C++, ")
   include_directories(${MPREAL_INCLUDES})
-  ei_add_test(mpreal_support "-std=c++11" "${MPREAL_LIBRARIES}" )
+  ei_add_test(mpreal_support "" "${MPREAL_LIBRARIES}" )
 else()
   ei_add_property(EIGEN_MISSING_BACKENDS "MPFR C++, ")
 endif()
 
+ei_add_test(NNLS)
+
 ei_add_test(sparse_extra   "" "")
 
 find_package(FFTW)
@@ -79,6 +77,17 @@ else()
   ei_add_property(EIGEN_MISSING_BACKENDS "fftw, ")
 endif()
 
+find_path(POCKETFFT  pocketfft_hdronly.h)
+if(POCKETFFT)
+  if(EIGEN_TEST_CXX11)
+    ei_add_property(EIGEN_TESTED_BACKENDS "pocketfft, ")
+    include_directories( ${POCKETFFT} )
+    ei_add_test(pocketfft "-pthread" "${CMAKE_THREAD_LIBS_INIT}" "-DEIGEN_POCKETFFT_DEFAULT" )  
+  endif()  
+else()
+  ei_add_property(EIGEN_MISSING_BACKENDS "pocketfft, ")
+endif()
+
 option(EIGEN_TEST_OPENGL "Enable OpenGL support in unit tests" OFF)
 if(EIGEN_TEST_OPENGL)
   find_package(OpenGL)
@@ -103,229 +112,222 @@ ei_add_test(gmres)
 ei_add_test(dgmres)
 ei_add_test(minres)
 ei_add_test(idrs)
+ei_add_test(bicgstabl)
+ei_add_test(idrstabl)
 ei_add_test(levenberg_marquardt)
 ei_add_test(kronecker_product)
 ei_add_test(bessel_functions)
 ei_add_test(special_functions)
 ei_add_test(special_packetmath "-DEIGEN_FAST_MATH=1")
 
-if(EIGEN_TEST_CXX11)
-  if(EIGEN_TEST_SYCL)
-    set(EIGEN_SYCL ON)
-    # Forward CMake options as preprocessor definitions
-    if(EIGEN_SYCL_USE_DEFAULT_SELECTOR)
-      add_definitions(-DEIGEN_SYCL_USE_DEFAULT_SELECTOR=${EIGEN_SYCL_USE_DEFAULT_SELECTOR})
-    endif()
-    if(EIGEN_SYCL_NO_LOCAL_MEM)
-      add_definitions(-DEIGEN_SYCL_NO_LOCAL_MEM=${EIGEN_SYCL_NO_LOCAL_MEM})
-    endif()
-    if(EIGEN_SYCL_LOCAL_MEM)
-      add_definitions(-DEIGEN_SYCL_LOCAL_MEM=${EIGEN_SYCL_LOCAL_MEM})
-    endif()
-    if(EIGEN_SYCL_MAX_GLOBAL_RANGE)
-      add_definitions(-DEIGEN_SYCL_MAX_GLOBAL_RANGE=${EIGEN_SYCL_MAX_GLOBAL_RANGE})
-    endif()
-    if(EIGEN_SYCL_LOCAL_THREAD_DIM0)
-      add_definitions(-DEIGEN_SYCL_LOCAL_THREAD_DIM0=${EIGEN_SYCL_LOCAL_THREAD_DIM0})
-    endif()
-    if(EIGEN_SYCL_LOCAL_THREAD_DIM1)
-      add_definitions(-DEIGEN_SYCL_LOCAL_THREAD_DIM1=${EIGEN_SYCL_LOCAL_THREAD_DIM1})
-    endif()
-    if(EIGEN_SYCL_REG_M)
-      add_definitions(-DEIGEN_SYCL_REG_M=${EIGEN_SYCL_REG_M})
-    endif()
-    if(EIGEN_SYCL_REG_N)
-      add_definitions(-DEIGEN_SYCL_REG_N=${EIGEN_SYCL_REG_N})
-    endif()
-    if(EIGEN_SYCL_USE_PROGRAM_CLASS)
-      add_definitions(-DEIGEN_SYCL_USE_PROGRAM_CLASS=${EIGEN_SYCL_USE_PROGRAM_CLASS})
-    endif()
-    if(EIGEN_SYCL_ASYNC_EXECUTION)
-      add_definitions(-DEIGEN_SYCL_ASYNC_EXECUTION=${EIGEN_SYCL_ASYNC_EXECUTION})
-    endif()
-    if(EIGEN_SYCL_DISABLE_SKINNY)
-      add_definitions(-DEIGEN_SYCL_DISABLE_SKINNY=${EIGEN_SYCL_DISABLE_SKINNY})
-    endif()
-    if(EIGEN_SYCL_DISABLE_DOUBLE_BUFFER)
+if(EIGEN_TEST_SYCL)
+  set(EIGEN_SYCL ON)
+  # Forward CMake options as preprocessor definitions
+  if(EIGEN_SYCL_USE_DEFAULT_SELECTOR)
+    add_definitions(-DEIGEN_SYCL_USE_DEFAULT_SELECTOR=${EIGEN_SYCL_USE_DEFAULT_SELECTOR})
+  endif()
+  if(EIGEN_SYCL_NO_LOCAL_MEM)
+    add_definitions(-DEIGEN_SYCL_NO_LOCAL_MEM=${EIGEN_SYCL_NO_LOCAL_MEM})
+  endif()
+  if(EIGEN_SYCL_LOCAL_MEM)
+    add_definitions(-DEIGEN_SYCL_LOCAL_MEM=${EIGEN_SYCL_LOCAL_MEM})
+  endif()
+  if(EIGEN_SYCL_MAX_GLOBAL_RANGE)
+    add_definitions(-DEIGEN_SYCL_MAX_GLOBAL_RANGE=${EIGEN_SYCL_MAX_GLOBAL_RANGE})
+  endif()
+  if(EIGEN_SYCL_LOCAL_THREAD_DIM0)
+    add_definitions(-DEIGEN_SYCL_LOCAL_THREAD_DIM0=${EIGEN_SYCL_LOCAL_THREAD_DIM0})
+  endif()
+  if(EIGEN_SYCL_LOCAL_THREAD_DIM1)
+    add_definitions(-DEIGEN_SYCL_LOCAL_THREAD_DIM1=${EIGEN_SYCL_LOCAL_THREAD_DIM1})
+  endif()
+  if(EIGEN_SYCL_REG_M)
+    add_definitions(-DEIGEN_SYCL_REG_M=${EIGEN_SYCL_REG_M})
+  endif()
+  if(EIGEN_SYCL_REG_N)
+    add_definitions(-DEIGEN_SYCL_REG_N=${EIGEN_SYCL_REG_N})
+  endif()
+  if(EIGEN_SYCL_USE_PROGRAM_CLASS)
+    add_definitions(-DEIGEN_SYCL_USE_PROGRAM_CLASS=${EIGEN_SYCL_USE_PROGRAM_CLASS})
+  endif()
+  if(EIGEN_SYCL_ASYNC_EXECUTION)
+    add_definitions(-DEIGEN_SYCL_ASYNC_EXECUTION=${EIGEN_SYCL_ASYNC_EXECUTION})
+  endif()
+  if(EIGEN_SYCL_DISABLE_SKINNY)
+    add_definitions(-DEIGEN_SYCL_DISABLE_SKINNY=${EIGEN_SYCL_DISABLE_SKINNY})
+  endif()
+  if(EIGEN_SYCL_DISABLE_DOUBLE_BUFFER)
     add_definitions(-DEIGEN_SYCL_DISABLE_DOUBLE_BUFFER=${EIGEN_SYCL_DISABLE_DOUBLE_BUFFER})
   endif()
-    if(EIGEN_SYCL_DISABLE_RANK1)
-      add_definitions(-DEIGEN_SYCL_DISABLE_RANK1=${EIGEN_SYCL_DISABLE_RANK1})
-    endif()
-    if(EIGEN_SYCL_DISABLE_SCALAR)
-      add_definitions(-DEIGEN_SYCL_DISABLE_SCALAR=${EIGEN_SYCL_DISABLE_SCALAR})
-    endif()
-    if(EIGEN_SYCL_DISABLE_GEMV)
-      add_definitions(-DEIGEN_SYCL_DISABLE_GEMV=${EIGEN_SYCL_DISABLE_GEMV})
-    endif()
-    if(EIGEN_SYCL_DISABLE_ARM_GPU_CACHE_OPTIMISATION)
-      add_definitions(-DEIGEN_SYCL_DISABLE_ARM_GPU_CACHE_OPTIMISATION=${EIGEN_SYCL_DISABLE_ARM_GPU_CACHE_OPTIMISATION})
-    endif()
-
-    if(EIGEN_SYCL_TRISYCL)
-      # triSYCL now requires c++17.
-      set(CMAKE_CXX_STANDARD 17)
-    else()
-      if(MSVC)
-        # Set the host and device compilers C++ standard to C++14. On Windows setting this to C++11
-        # can cause issues with the ComputeCpp device compiler parsing Visual Studio Headers.
-        set(CMAKE_CXX_STANDARD 14)
-        list(APPEND COMPUTECPP_USER_FLAGS -DWIN32)
-      else()
-        set(CMAKE_CXX_STANDARD 11)
-        list(APPEND COMPUTECPP_USER_FLAGS -Wall)
-      endif()
-      # The following flags are not supported by Clang and can cause warnings
-      # if used with -Werror so they are removed here.
-      if(COMPUTECPP_USE_COMPILER_DRIVER)
-        set(CMAKE_CXX_COMPILER ${ComputeCpp_DEVICE_COMPILER_EXECUTABLE})
-        string(REPLACE "-Wlogical-op" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
-        string(REPLACE "-Wno-psabi" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
-        string(REPLACE "-ansi" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
-      endif()
-      list(APPEND COMPUTECPP_USER_FLAGS
-          -DEIGEN_NO_ASSERTION_CHECKING=1
-          -no-serial-memop
-          -Xclang
-          -cl-mad-enable)
-    endif()
-
-    ei_add_test(cxx11_tensor_sycl ${STD_CXX_FLAG})
-    ei_add_test(cxx11_tensor_image_op_sycl ${STD_CXX_FLAG})
-    ei_add_test(cxx11_tensor_math_sycl ${STD_CXX_FLAG})
-    ei_add_test(cxx11_tensor_forced_eval_sycl ${STD_CXX_FLAG})
-    ei_add_test(cxx11_tensor_broadcast_sycl ${STD_CXX_FLAG})
-    ei_add_test(cxx11_tensor_device_sycl ${STD_CXX_FLAG})
-    ei_add_test(cxx11_tensor_reduction_sycl ${STD_CXX_FLAG})
-    ei_add_test(cxx11_tensor_morphing_sycl ${STD_CXX_FLAG})
-    ei_add_test(cxx11_tensor_shuffling_sycl ${STD_CXX_FLAG})
-    ei_add_test(cxx11_tensor_padding_sycl ${STD_CXX_FLAG})
-    ei_add_test(cxx11_tensor_builtins_sycl ${STD_CXX_FLAG})
-    ei_add_test(cxx11_tensor_contract_sycl ${STD_CXX_FLAG})
-    ei_add_test(cxx11_tensor_concatenation_sycl ${STD_CXX_FLAG})
-    ei_add_test(cxx11_tensor_reverse_sycl ${STD_CXX_FLAG})
-    ei_add_test(cxx11_tensor_convolution_sycl ${STD_CXX_FLAG})
-    ei_add_test(cxx11_tensor_striding_sycl ${STD_CXX_FLAG})
-    ei_add_test(cxx11_tensor_chipping_sycl ${STD_CXX_FLAG})
-    ei_add_test(cxx11_tensor_layout_swap_sycl ${STD_CXX_FLAG})
-    ei_add_test(cxx11_tensor_inflation_sycl ${STD_CXX_FLAG})
-    ei_add_test(cxx11_tensor_random_sycl ${STD_CXX_FLAG})
-    ei_add_test(cxx11_tensor_generator_sycl ${STD_CXX_FLAG})
-    ei_add_test(cxx11_tensor_patch_sycl ${STD_CXX_FLAG})
-    ei_add_test(cxx11_tensor_image_patch_sycl ${STD_CXX_FLAG})
-    ei_add_test(cxx11_tensor_volume_patch_sycl ${STD_CXX_FLAG})
-    ei_add_test(cxx11_tensor_argmax_sycl ${STD_CXX_FLAG})
-    ei_add_test(cxx11_tensor_custom_op_sycl ${STD_CXX_FLAG})
-    ei_add_test(cxx11_tensor_scan_sycl ${STD_CXX_FLAG})
-    set(EIGEN_SYCL OFF)
+  if(EIGEN_SYCL_DISABLE_RANK1)
+    add_definitions(-DEIGEN_SYCL_DISABLE_RANK1=${EIGEN_SYCL_DISABLE_RANK1})
+  endif()
+  if(EIGEN_SYCL_DISABLE_SCALAR)
+    add_definitions(-DEIGEN_SYCL_DISABLE_SCALAR=${EIGEN_SYCL_DISABLE_SCALAR})
+  endif()
+  if(EIGEN_SYCL_DISABLE_GEMV)
+    add_definitions(-DEIGEN_SYCL_DISABLE_GEMV=${EIGEN_SYCL_DISABLE_GEMV})
+  endif()
+  if(EIGEN_SYCL_DISABLE_ARM_GPU_CACHE_OPTIMISATION)
+    add_definitions(-DEIGEN_SYCL_DISABLE_ARM_GPU_CACHE_OPTIMISATION=${EIGEN_SYCL_DISABLE_ARM_GPU_CACHE_OPTIMISATION})
   endif()
 
-  ei_add_test(cxx11_eventcount "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
-  ei_add_test(cxx11_runqueue "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
-  ei_add_test(cxx11_non_blocking_thread_pool "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
+  if(EIGEN_SYCL_TRISYCL)
+    # triSYCL now requires c++17.
+    set(CMAKE_CXX_STANDARD 17)
+  else()
+    if(MSVC)
+      list(APPEND COMPUTECPP_USER_FLAGS -DWIN32)
+    else()
+      list(APPEND COMPUTECPP_USER_FLAGS -Wall)
+    endif()
+    # The following flags are not supported by Clang and can cause warnings
+    # if used with -Werror so they are removed here.
+    if(COMPUTECPP_USE_COMPILER_DRIVER)
+      set(CMAKE_CXX_COMPILER ${ComputeCpp_DEVICE_COMPILER_EXECUTABLE})
+      string(REPLACE "-Wlogical-op" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
+      string(REPLACE "-Wno-psabi" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
+    endif()
+    list(APPEND COMPUTECPP_USER_FLAGS
+        -DEIGEN_NO_ASSERTION_CHECKING=1
+        -no-serial-memop
+        -Xclang
+        -cl-mad-enable)
+  endif()
 
-  ei_add_test(cxx11_meta)
-  ei_add_test(cxx11_maxsizevector)
-  ei_add_test(cxx11_tensor_argmax)
-  ei_add_test(cxx11_tensor_assign)
-  ei_add_test(cxx11_tensor_block_access)
-  ei_add_test(cxx11_tensor_block_eval)
-  ei_add_test(cxx11_tensor_block_io)
-  ei_add_test(cxx11_tensor_broadcasting)
-  ei_add_test(cxx11_tensor_casts)
-  ei_add_test(cxx11_tensor_chipping)
-  ei_add_test(cxx11_tensor_comparisons)
-  ei_add_test(cxx11_tensor_concatenation)
-  ei_add_test(cxx11_tensor_const)
-  ei_add_test(cxx11_tensor_contraction)
-  ei_add_test(cxx11_tensor_convolution)
-  ei_add_test(cxx11_tensor_custom_index)
-  ei_add_test(cxx11_tensor_custom_op)
-  ei_add_test(cxx11_tensor_dimension)
-  ei_add_test(cxx11_tensor_empty)
-  ei_add_test(cxx11_tensor_executor "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
-  ei_add_test(cxx11_tensor_expr)
-  ei_add_test(cxx11_tensor_fft)
-  ei_add_test(cxx11_tensor_fixed_size)
-  ei_add_test(cxx11_tensor_forced_eval)
-  ei_add_test(cxx11_tensor_generator)
-  ei_add_test(cxx11_tensor_ifft)
-  ei_add_test(cxx11_tensor_image_patch)
-  ei_add_test(cxx11_tensor_index_list)
-  ei_add_test(cxx11_tensor_inflation)
-  ei_add_test(cxx11_tensor_intdiv)
-  ei_add_test(cxx11_tensor_io)
-  ei_add_test(cxx11_tensor_layout_swap)
-  ei_add_test(cxx11_tensor_lvalue)
-  ei_add_test(cxx11_tensor_map)
-  ei_add_test(cxx11_tensor_math)
-  ei_add_test(cxx11_tensor_mixed_indices)
-  ei_add_test(cxx11_tensor_morphing)
-  ei_add_test(cxx11_tensor_move)
-  ei_add_test(cxx11_tensor_notification "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
-  ei_add_test(cxx11_tensor_of_complex)
-  ei_add_test(cxx11_tensor_of_const_values)
-  ei_add_test(cxx11_tensor_of_strings)
-  ei_add_test(cxx11_tensor_padding)
-  ei_add_test(cxx11_tensor_patch)
-  ei_add_test(cxx11_tensor_random)
-  ei_add_test(cxx11_tensor_reduction)
-  ei_add_test(cxx11_tensor_ref)
-  ei_add_test(cxx11_tensor_roundings)
-  ei_add_test(cxx11_tensor_scan)
-  ei_add_test(cxx11_tensor_shuffling)
-  ei_add_test(cxx11_tensor_simple)
-  ei_add_test(cxx11_tensor_striding)
-  ei_add_test(cxx11_tensor_sugar)
-  ei_add_test(cxx11_tensor_thread_local "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
-  ei_add_test(cxx11_tensor_thread_pool "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
-  ei_add_test(cxx11_tensor_trace)
-  ei_add_test(cxx11_tensor_volume_patch)
+  ei_add_test(cxx11_tensor_sycl)
+  ei_add_test(cxx11_tensor_image_op_sycl)
+  ei_add_test(cxx11_tensor_math_sycl)
+  ei_add_test(cxx11_tensor_forced_eval_sycl)
+  ei_add_test(cxx11_tensor_broadcast_sycl)
+  ei_add_test(cxx11_tensor_device_sycl)
+  ei_add_test(cxx11_tensor_reduction_sycl)
+  ei_add_test(cxx11_tensor_morphing_sycl)
+  ei_add_test(cxx11_tensor_shuffling_sycl)
+  ei_add_test(cxx11_tensor_padding_sycl)
+  ei_add_test(cxx11_tensor_builtins_sycl)
+  ei_add_test(cxx11_tensor_contract_sycl)
+  ei_add_test(cxx11_tensor_concatenation_sycl)
+  ei_add_test(cxx11_tensor_reverse_sycl)
+  ei_add_test(cxx11_tensor_convolution_sycl)
+  ei_add_test(cxx11_tensor_striding_sycl)
+  ei_add_test(cxx11_tensor_chipping_sycl)
+  ei_add_test(cxx11_tensor_layout_swap_sycl)
+  ei_add_test(cxx11_tensor_inflation_sycl)
+  ei_add_test(cxx11_tensor_random_sycl)
+  ei_add_test(cxx11_tensor_generator_sycl)
+  ei_add_test(cxx11_tensor_patch_sycl)
+  ei_add_test(cxx11_tensor_image_patch_sycl)
+  ei_add_test(cxx11_tensor_volume_patch_sycl)
+  ei_add_test(cxx11_tensor_argmax_sycl)
+  ei_add_test(cxx11_tensor_custom_op_sycl)
+  ei_add_test(cxx11_tensor_scan_sycl)
+  set(EIGEN_SYCL OFF)
+endif()
+
+ei_add_test(cxx11_eventcount "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
+ei_add_test(cxx11_runqueue "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
+ei_add_test(cxx11_non_blocking_thread_pool "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
+
+ei_add_test(cxx11_meta)
+ei_add_test(cxx11_maxsizevector)
+ei_add_test(cxx11_tensor_argmax)
+ei_add_test(cxx11_tensor_assign)
+ei_add_test(cxx11_tensor_block_access)
+ei_add_test(cxx11_tensor_block_eval)
+ei_add_test(cxx11_tensor_block_io)
+ei_add_test(cxx11_tensor_broadcasting)
+ei_add_test(cxx11_tensor_casts)
+ei_add_test(cxx11_tensor_chipping)
+ei_add_test(cxx11_tensor_comparisons)
+ei_add_test(cxx11_tensor_concatenation)
+ei_add_test(cxx11_tensor_const)
+ei_add_test(cxx11_tensor_contraction)
+ei_add_test(cxx11_tensor_convolution)
+ei_add_test(cxx11_tensor_custom_index)
+ei_add_test(cxx11_tensor_custom_op)
+ei_add_test(cxx11_tensor_dimension)
+ei_add_test(cxx11_tensor_empty)
+ei_add_test(cxx11_tensor_executor "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
+ei_add_test(cxx11_tensor_expr)
+ei_add_test(cxx11_tensor_fft)
+ei_add_test(cxx11_tensor_fixed_size)
+ei_add_test(cxx11_tensor_forced_eval)
+ei_add_test(cxx11_tensor_generator)
+ei_add_test(cxx11_tensor_ifft)
+ei_add_test(cxx11_tensor_image_patch)
+ei_add_test(cxx11_tensor_index_list)
+ei_add_test(cxx11_tensor_inflation)
+ei_add_test(cxx11_tensor_intdiv)
+ei_add_test(cxx11_tensor_io)
+ei_add_test(cxx11_tensor_layout_swap)
+ei_add_test(cxx11_tensor_lvalue)
+ei_add_test(cxx11_tensor_map)
+ei_add_test(cxx11_tensor_math)
+ei_add_test(cxx11_tensor_mixed_indices)
+ei_add_test(cxx11_tensor_morphing)
+ei_add_test(cxx11_tensor_move)
+ei_add_test(cxx11_tensor_notification "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
+ei_add_test(cxx11_tensor_of_complex)
+ei_add_test(cxx11_tensor_of_const_values)
+ei_add_test(cxx11_tensor_of_strings)
+ei_add_test(cxx11_tensor_padding)
+ei_add_test(cxx11_tensor_patch)
+ei_add_test(cxx11_tensor_random)
+ei_add_test(cxx11_tensor_reduction)
+ei_add_test(cxx11_tensor_ref)
+ei_add_test(cxx11_tensor_roundings)
+ei_add_test(cxx11_tensor_scan)
+ei_add_test(cxx11_tensor_shuffling)
+ei_add_test(cxx11_tensor_simple)
+ei_add_test(cxx11_tensor_striding)
+ei_add_test(cxx11_tensor_sugar)
+ei_add_test(cxx11_tensor_thread_local "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
+ei_add_test(cxx11_tensor_thread_pool "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
+ei_add_test(cxx11_tensor_trace)
+ei_add_test(cxx11_tensor_volume_patch)
 #  ei_add_test(cxx11_tensor_symmetry)
 if("${CMAKE_SIZEOF_VOID_P}" EQUAL "8" AND NOT CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
   # This test requires __uint128_t which is only available on 64bit systems
   ei_add_test(cxx11_tensor_uint128)
 endif()
 
-endif()
-
-# These tests needs nvcc
-find_package(CUDA 7.0)
+find_package(CUDA 9.0)
 if(CUDA_FOUND AND EIGEN_TEST_CUDA)
   # Make sure to compile without the -pedantic, -Wundef, -Wnon-virtual-dtor
   # and -fno-check-new flags since they trigger thousands of compilation warnings
   # in the CUDA runtime
-  # Also remove -ansi that is incompatible with std=c++11.
   string(REPLACE "-pedantic" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
   string(REPLACE "-Wundef" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
   string(REPLACE "-Wnon-virtual-dtor" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
   string(REPLACE "-fno-check-new" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
-  string(REPLACE "-ansi" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
 
-  message(STATUS "Flags used to compile cuda code: " ${CMAKE_CXX_FLAGS})
-
-  if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
-    set(CUDA_NVCC_FLAGS "-ccbin ${CMAKE_C_COMPILER}" CACHE STRING "nvcc flags" FORCE)
-  endif()
   if(EIGEN_TEST_CUDA_CLANG)
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
     string(APPEND CMAKE_CXX_FLAGS " --cuda-path=${CUDA_TOOLKIT_ROOT_DIR}")
     foreach(ARCH IN LISTS EIGEN_CUDA_COMPUTE_ARCH)
         string(APPEND CMAKE_CXX_FLAGS " --cuda-gpu-arch=sm_${ARCH}")
     endforeach()
+    string(APPEND CMAKE_CXX_FLAGS " ${EIGEN_CUDA_CXX_FLAGS}")
+  else()
+    set(CUDA_PROPAGATE_HOST_FLAGS OFF)
+    set(NVCC_ARCH_FLAGS)
+    # Define an -arch=sm_<arch>, otherwise if GPU does not exactly match one of
+    # those in the arch list for -gencode, the kernels will fail to run with
+    #    cudaErrorNoKernelImageForDevice
+    # This can happen with newer cards (e.g. sm_75) and compiling with older
+    # versions of nvcc (e.g. 9.2) that do not support their specific arch.
+    list(LENGTH EIGEN_CUDA_COMPUTE_ARCH EIGEN_CUDA_COMPUTE_ARCH_SIZE)
+    if(EIGEN_CUDA_COMPUTE_ARCH_SIZE)
+      list(GET EIGEN_CUDA_COMPUTE_ARCH 0 EIGEN_CUDA_COMPUTE_DEFAULT)
+      set(NVCC_ARCH_FLAGS " -arch=sm_${EIGEN_CUDA_COMPUTE_DEFAULT}")
+    endif()
+    foreach(ARCH IN LISTS EIGEN_CUDA_COMPUTE_ARCH)
+      string(APPEND NVCC_ARCH_FLAGS " -gencode arch=compute_${ARCH},code=sm_${ARCH}")
+    endforeach()
+    set(CUDA_NVCC_FLAGS  "--expt-relaxed-constexpr -Xcudafe \"--display_error_number\" ${NVCC_ARCH_FLAGS} ${CUDA_NVCC_FLAGS} ${EIGEN_CUDA_CXX_FLAGS}")
+    cuda_include_directories("${CMAKE_CURRENT_BINARY_DIR}" "${CUDA_TOOLKIT_ROOT_DIR}/include")
   endif()
-
-  set(EIGEN_CUDA_RELAXED_CONSTEXPR "--expt-relaxed-constexpr")
-  if (${CUDA_VERSION} STREQUAL "7.0")
-    set(EIGEN_CUDA_RELAXED_CONSTEXPR "--relaxed-constexpr")
-  endif()
-
-  set(NVCC_ARCH_FLAGS)
-  foreach(ARCH IN LISTS EIGEN_CUDA_COMPUTE_ARCH)
-    string(APPEND NVCC_ARCH_FLAGS " -gencode arch=compute_${ARCH},code=sm_${ARCH}")
-  endforeach()
-  set(CUDA_NVCC_FLAGS  "${EIGEN_CUDA_RELAXED_CONSTEXPR} -Xcudafe \"--display_error_number\" ${NVCC_ARCH_FLAGS} ${CUDA_NVCC_FLAGS}")
-  cuda_include_directories("${CMAKE_CURRENT_BINARY_DIR}" "${CUDA_TOOLKIT_ROOT_DIR}/include")
+  
   set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu")
 
   ei_add_test(cxx11_tensor_complex_gpu)
@@ -355,7 +357,6 @@ if(CUDA_FOUND AND EIGEN_TEST_CUDA)
     ei_add_test(cxx11_tensor_random_gpu)
   endif()
 
-
   unset(EIGEN_ADD_TEST_FILENAME_EXTENSION)
 endif()
 
@@ -365,52 +366,46 @@ if (EIGEN_TEST_HIP)
   set(HIP_PATH "/opt/rocm/hip" CACHE STRING "Path to the HIP installation.")
 
   if (EXISTS ${HIP_PATH})
-
     list(APPEND CMAKE_MODULE_PATH ${HIP_PATH}/cmake)
 
     find_package(HIP REQUIRED)
     if (HIP_FOUND)
-
       execute_process(COMMAND ${HIP_PATH}/bin/hipconfig --platform OUTPUT_VARIABLE HIP_PLATFORM)
 
       if ((${HIP_PLATFORM} STREQUAL "hcc") OR (${HIP_PLATFORM} STREQUAL "amd"))
+        include_directories(${CMAKE_CURRENT_BINARY_DIR})
+        include_directories(${HIP_PATH}/include)
 
-	include_directories(${CMAKE_CURRENT_BINARY_DIR})
-	include_directories(${HIP_PATH}/include)
+        set(EIGEN_ADD_TEST_FILENAME_EXTENSION  "cu")
+        #
+        # complex datatype is not yet supported by HIP
+        # so leaving out those tests for now
+        #
+        # ei_add_test(cxx11_tensor_complex_gpu)
+        # ei_add_test(cxx11_tensor_complex_cwise_ops_gpu)
+        #
+        ei_add_test(cxx11_tensor_reduction_gpu)
+        ei_add_test(cxx11_tensor_argmax_gpu)
+        ei_add_test(cxx11_tensor_cast_float16_gpu)
+        ei_add_test(cxx11_tensor_scan_gpu)
+        ei_add_test(cxx11_tensor_device)
 
-	set(EIGEN_ADD_TEST_FILENAME_EXTENSION  "cu")
-	#
-	# complex datatype is not yet supported by HIP
-	# so leaving out those tests for now
-	#
-	# ei_add_test(cxx11_tensor_complex_gpu)
-	# ei_add_test(cxx11_tensor_complex_cwise_ops_gpu)
-	#
-	ei_add_test(cxx11_tensor_reduction_gpu)
-	ei_add_test(cxx11_tensor_argmax_gpu)
-	ei_add_test(cxx11_tensor_cast_float16_gpu)
-	ei_add_test(cxx11_tensor_scan_gpu)
-	ei_add_test(cxx11_tensor_device)
+        ei_add_test(cxx11_tensor_gpu)
+        ei_add_test(cxx11_tensor_contract_gpu)
+        ei_add_test(cxx11_tensor_of_float16_gpu)
+        ei_add_test(cxx11_tensor_of_bfloat16_gpu)
+        ei_add_test(cxx11_tensor_random_gpu)
 
-	ei_add_test(cxx11_tensor_gpu)
-	ei_add_test(cxx11_tensor_contract_gpu)
-	ei_add_test(cxx11_tensor_of_float16_gpu)
-	ei_add_test(cxx11_tensor_random_gpu)
-
-	unset(EIGEN_ADD_TEST_FILENAME_EXTENSION)
+        unset(EIGEN_ADD_TEST_FILENAME_EXTENSION)
 
       elseif ((${HIP_PLATFORM} STREQUAL "nvcc") OR (${HIP_PLATFORM} STREQUAL "nvidia"))
-	message(FATAL_ERROR "HIP_PLATFORM = nvcc is not supported within Eigen")
+        message(FATAL_ERROR "HIP_PLATFORM = nvcc is not supported within Eigen")
       else ()
-	message(FATAL_ERROR "Unknown HIP_PLATFORM = ${HIP_PLATFORM}")
+        message(FATAL_ERROR "Unknown HIP_PLATFORM = ${HIP_PLATFORM}")
       endif()
-
     endif()
-
   else ()
-
     message(FATAL_ERROR "EIGEN_TEST_HIP is ON, but the specified HIP_PATH (${HIP_PATH}) does not exist")
-
   endif()
 
 endif()
diff --git a/libs/eigen/unsupported/test/FFT.cpp b/libs/eigen/unsupported/test/FFT.cpp
index 45c87f5..f85461c 100644
--- a/libs/eigen/unsupported/test/FFT.cpp
+++ b/libs/eigen/unsupported/test/FFT.cpp
@@ -1,2 +1,2 @@
-#define test_FFTW test_FFT
-#include "FFTW.cpp"
+#define EIGEN_FFT_DEFAULT 1
+#include "fft_test_shared.h"
diff --git a/libs/eigen/unsupported/test/FFTW.cpp b/libs/eigen/unsupported/test/FFTW.cpp
index cfe559e..d69867c 100644
--- a/libs/eigen/unsupported/test/FFTW.cpp
+++ b/libs/eigen/unsupported/test/FFTW.cpp
@@ -1,262 +1,2 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2009 Mark Borgerding mark a borgerding net
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#include "main.h"
-#include <unsupported/Eigen/FFT>
-
-template <typename T> 
-std::complex<T> RandomCpx() { return std::complex<T>( (T)(rand()/(T)RAND_MAX - .5), (T)(rand()/(T)RAND_MAX - .5) ); }
-
-using namespace std;
-using namespace Eigen;
-
-
-template < typename T>
-complex<long double>  promote(complex<T> x) { return complex<long double>((long double)x.real(),(long double)x.imag()); }
-
-complex<long double>  promote(float x) { return complex<long double>((long double)x); }
-complex<long double>  promote(double x) { return complex<long double>((long double)x); }
-complex<long double>  promote(long double x) { return complex<long double>((long double)x); }
-    
-
-    template <typename VT1,typename VT2>
-    long double fft_rmse( const VT1 & fftbuf,const VT2 & timebuf)
-    {
-        long double totalpower=0;
-        long double difpower=0;
-        long double pi = acos((long double)-1 );
-        for (size_t k0=0;k0<(size_t)fftbuf.size();++k0) {
-            complex<long double> acc = 0;
-            long double phinc = (long double)(-2.)*k0* pi / timebuf.size();
-            for (size_t k1=0;k1<(size_t)timebuf.size();++k1) {
-                acc +=  promote( timebuf[k1] ) * exp( complex<long double>(0,k1*phinc) );
-            }
-            totalpower += numext::abs2(acc);
-            complex<long double> x = promote(fftbuf[k0]); 
-            complex<long double> dif = acc - x;
-            difpower += numext::abs2(dif);
-            //cerr << k0 << "\t" << acc << "\t" <<  x << "\t" << sqrt(numext::abs2(dif)) << endl;
-        }
-        cerr << "rmse:" << sqrt(difpower/totalpower) << endl;
-        return sqrt(difpower/totalpower);
-    }
-
-    template <typename VT1,typename VT2>
-    long double dif_rmse( const VT1 buf1,const VT2 buf2)
-    {
-        long double totalpower=0;
-        long double difpower=0;
-        size_t n = (min)( buf1.size(),buf2.size() );
-        for (size_t k=0;k<n;++k) {
-            totalpower += (long double)((numext::abs2( buf1[k] ) + numext::abs2(buf2[k]) )/2);
-            difpower += (long double)(numext::abs2(buf1[k] - buf2[k]));
-        }
-        return sqrt(difpower/totalpower);
-    }
-
-enum { StdVectorContainer, EigenVectorContainer };
-
-template<int Container, typename Scalar> struct VectorType;
-
-template<typename Scalar> struct VectorType<StdVectorContainer,Scalar>
-{
-  typedef vector<Scalar> type;
-};
-
-template<typename Scalar> struct VectorType<EigenVectorContainer,Scalar>
-{
-  typedef Matrix<Scalar,Dynamic,1> type;
-};
-
-template <int Container, typename T>
-void test_scalar_generic(int nfft)
-{
-    typedef typename FFT<T>::Complex Complex;
-    typedef typename FFT<T>::Scalar Scalar;
-    typedef typename VectorType<Container,Scalar>::type ScalarVector;
-    typedef typename VectorType<Container,Complex>::type ComplexVector;
-
-    FFT<T> fft;
-    ScalarVector tbuf(nfft);
-    ComplexVector freqBuf;
-    for (int k=0;k<nfft;++k)
-        tbuf[k]= (T)( rand()/(double)RAND_MAX - .5);
-
-    // make sure it DOESN'T give the right full spectrum answer
-    // if we've asked for half-spectrum
-    fft.SetFlag(fft.HalfSpectrum );
-    fft.fwd( freqBuf,tbuf);
-    VERIFY((size_t)freqBuf.size() == (size_t)( (nfft>>1)+1) );
-    VERIFY( T(fft_rmse(freqBuf,tbuf)) < test_precision<T>()  );// gross check
-
-    fft.ClearFlag(fft.HalfSpectrum );
-    fft.fwd( freqBuf,tbuf);
-    VERIFY( (size_t)freqBuf.size() == (size_t)nfft);
-    VERIFY( T(fft_rmse(freqBuf,tbuf)) < test_precision<T>()  );// gross check
-
-    if (nfft&1)
-        return; // odd FFTs get the wrong size inverse FFT
-
-    ScalarVector tbuf2;
-    fft.inv( tbuf2 , freqBuf);
-    VERIFY( T(dif_rmse(tbuf,tbuf2)) < test_precision<T>()  );// gross check
-
-
-    // verify that the Unscaled flag takes effect
-    ScalarVector tbuf3;
-    fft.SetFlag(fft.Unscaled);
-
-    fft.inv( tbuf3 , freqBuf);
-
-    for (int k=0;k<nfft;++k)
-        tbuf3[k] *= T(1./nfft);
-
-
-    //for (size_t i=0;i<(size_t) tbuf.size();++i)
-    //    cout << "freqBuf=" << freqBuf[i] << " in2=" << tbuf3[i] << " -  in=" << tbuf[i] << " => " << (tbuf3[i] - tbuf[i] ) <<  endl;
-
-    VERIFY( T(dif_rmse(tbuf,tbuf3)) < test_precision<T>()  );// gross check
-
-    // verify that ClearFlag works
-    fft.ClearFlag(fft.Unscaled);
-    fft.inv( tbuf2 , freqBuf);
-    VERIFY( T(dif_rmse(tbuf,tbuf2)) < test_precision<T>()  );// gross check
-}
-
-template <typename T>
-void test_scalar(int nfft)
-{
-  test_scalar_generic<StdVectorContainer,T>(nfft);
-  //test_scalar_generic<EigenVectorContainer,T>(nfft);
-}
-
-
-template <int Container, typename T>
-void test_complex_generic(int nfft)
-{
-    typedef typename FFT<T>::Complex Complex;
-    typedef typename VectorType<Container,Complex>::type ComplexVector;
-
-    FFT<T> fft;
-
-    ComplexVector inbuf(nfft);
-    ComplexVector outbuf;
-    ComplexVector buf3;
-    for (int k=0;k<nfft;++k)
-        inbuf[k]= Complex( (T)(rand()/(double)RAND_MAX - .5), (T)(rand()/(double)RAND_MAX - .5) );
-    fft.fwd( outbuf , inbuf);
-
-    VERIFY( T(fft_rmse(outbuf,inbuf)) < test_precision<T>()  );// gross check
-    fft.inv( buf3 , outbuf);
-
-    VERIFY( T(dif_rmse(inbuf,buf3)) < test_precision<T>()  );// gross check
-
-    // verify that the Unscaled flag takes effect
-    ComplexVector buf4;
-    fft.SetFlag(fft.Unscaled);
-    fft.inv( buf4 , outbuf);
-    for (int k=0;k<nfft;++k)
-        buf4[k] *= T(1./nfft);
-    VERIFY( T(dif_rmse(inbuf,buf4)) < test_precision<T>()  );// gross check
-
-    // verify that ClearFlag works
-    fft.ClearFlag(fft.Unscaled);
-    fft.inv( buf3 , outbuf);
-    VERIFY( T(dif_rmse(inbuf,buf3)) < test_precision<T>()  );// gross check
-}
-
-template <typename T>
-void test_complex(int nfft)
-{
-  test_complex_generic<StdVectorContainer,T>(nfft);
-  test_complex_generic<EigenVectorContainer,T>(nfft);
-}
-/*
-template <typename T,int nrows,int ncols>
-void test_complex2d()
-{
-    typedef typename Eigen::FFT<T>::Complex Complex;
-    FFT<T> fft;
-    Eigen::Matrix<Complex,nrows,ncols> src,src2,dst,dst2;
-
-    src = Eigen::Matrix<Complex,nrows,ncols>::Random();
-    //src =  Eigen::Matrix<Complex,nrows,ncols>::Identity();
-
-    for (int k=0;k<ncols;k++) {
-        Eigen::Matrix<Complex,nrows,1> tmpOut;
-        fft.fwd( tmpOut,src.col(k) );
-        dst2.col(k) = tmpOut;
-    }
-
-    for (int k=0;k<nrows;k++) {
-        Eigen::Matrix<Complex,1,ncols> tmpOut;
-        fft.fwd( tmpOut,  dst2.row(k) );
-        dst2.row(k) = tmpOut;
-    }
-
-    fft.fwd2(dst.data(),src.data(),ncols,nrows);
-    fft.inv2(src2.data(),dst.data(),ncols,nrows);
-    VERIFY( (src-src2).norm() < test_precision<T>() );
-    VERIFY( (dst-dst2).norm() < test_precision<T>() );
-}
-*/
-
-
-void test_return_by_value(int len)
-{
-    VectorXf in;
-    VectorXf in1;
-    in.setRandom( len );
-    VectorXcf out1,out2;
-    FFT<float> fft;
-
-    fft.SetFlag(fft.HalfSpectrum );
-
-    fft.fwd(out1,in);
-    out2 = fft.fwd(in);
-    VERIFY( (out1-out2).norm() < test_precision<float>() );
-    in1 = fft.inv(out1);
-    VERIFY( (in1-in).norm() < test_precision<float>() );
-}
-
-EIGEN_DECLARE_TEST(FFTW)
-{
-  CALL_SUBTEST( test_return_by_value(32) );
-  //CALL_SUBTEST( ( test_complex2d<float,4,8> () ) ); CALL_SUBTEST( ( test_complex2d<double,4,8> () ) );
-  //CALL_SUBTEST( ( test_complex2d<long double,4,8> () ) );
-  CALL_SUBTEST( test_complex<float>(32) ); CALL_SUBTEST( test_complex<double>(32) ); 
-  CALL_SUBTEST( test_complex<float>(256) ); CALL_SUBTEST( test_complex<double>(256) ); 
-  CALL_SUBTEST( test_complex<float>(3*8) ); CALL_SUBTEST( test_complex<double>(3*8) ); 
-  CALL_SUBTEST( test_complex<float>(5*32) ); CALL_SUBTEST( test_complex<double>(5*32) ); 
-  CALL_SUBTEST( test_complex<float>(2*3*4) ); CALL_SUBTEST( test_complex<double>(2*3*4) ); 
-  CALL_SUBTEST( test_complex<float>(2*3*4*5) ); CALL_SUBTEST( test_complex<double>(2*3*4*5) ); 
-  CALL_SUBTEST( test_complex<float>(2*3*4*5*7) ); CALL_SUBTEST( test_complex<double>(2*3*4*5*7) ); 
-
-  CALL_SUBTEST( test_scalar<float>(32) ); CALL_SUBTEST( test_scalar<double>(32) ); 
-  CALL_SUBTEST( test_scalar<float>(45) ); CALL_SUBTEST( test_scalar<double>(45) ); 
-  CALL_SUBTEST( test_scalar<float>(50) ); CALL_SUBTEST( test_scalar<double>(50) ); 
-  CALL_SUBTEST( test_scalar<float>(256) ); CALL_SUBTEST( test_scalar<double>(256) ); 
-  CALL_SUBTEST( test_scalar<float>(2*3*4*5*7) ); CALL_SUBTEST( test_scalar<double>(2*3*4*5*7) ); 
-  
-  #ifdef EIGEN_HAS_FFTWL
-  CALL_SUBTEST( test_complex<long double>(32) );
-  CALL_SUBTEST( test_complex<long double>(256) );
-  CALL_SUBTEST( test_complex<long double>(3*8) );
-  CALL_SUBTEST( test_complex<long double>(5*32) );
-  CALL_SUBTEST( test_complex<long double>(2*3*4) );
-  CALL_SUBTEST( test_complex<long double>(2*3*4*5) );
-  CALL_SUBTEST( test_complex<long double>(2*3*4*5*7) );
-  
-  CALL_SUBTEST( test_scalar<long double>(32) );
-  CALL_SUBTEST( test_scalar<long double>(45) );
-  CALL_SUBTEST( test_scalar<long double>(50) );
-  CALL_SUBTEST( test_scalar<long double>(256) );
-  CALL_SUBTEST( test_scalar<long double>(2*3*4*5*7) );
-  #endif
-}
+#define EIGEN_FFTW_DEFAULT 1 
+#include "fft_test_shared.h"
diff --git a/libs/eigen/unsupported/test/NNLS.cpp b/libs/eigen/unsupported/test/NNLS.cpp
new file mode 100644
index 0000000..d65920b
--- /dev/null
+++ b/libs/eigen/unsupported/test/NNLS.cpp
@@ -0,0 +1,472 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) Essex Edwards <essex.edwards@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#define EIGEN_RUNTIME_NO_MALLOC
+
+#include "main.h"
+#include <unsupported/Eigen/NNLS>
+
+/// Check that 'x' solves the NNLS optimization problem `min ||A*x-b|| s.t. 0 <= x`.
+/// The \p tolerance parameter is the absolute tolerance on the gradient, A'*(A*x-b).
+template <typename MatrixType, typename VectorB, typename VectorX, typename Scalar>
+static void verify_nnls_optimality(const MatrixType &A, const VectorB &b, const VectorX &x, const Scalar tolerance) {
+  // The NNLS optimality conditions are:
+  //
+  // * 0 = A'*A*x - A'*b - lambda
+  // * 0 <= x[i] \forall i
+  // * 0 <= lambda[i] \forall i
+  // * 0 = x[i]*lambda[i] \forall i
+  //
+  // we don't know lambda, but by assuming the first optimality condition is true,
+  // we can derive it and then check the others conditions.
+  const VectorX lambda = A.transpose() * (A * x - b);
+
+  // NNLS solutions are EXACTLY not negative.
+  VERIFY_LE(0, x.minCoeff());
+
+  // Exact lambda would be non-negative, but computed lambda might leak a little
+  VERIFY_LE(-tolerance, lambda.minCoeff());
+
+  // x[i]*lambda[i] == 0 <~~> (x[i]==0) || (lambda[i] is small)
+  VERIFY(((x.array() == Scalar(0)) || (lambda.array() <= tolerance)).all());
+}
+
+template <typename MatrixType, typename VectorB, typename VectorX>
+static void test_nnls_known_solution(const MatrixType &A, const VectorB &b, const VectorX &x_expected) {
+  using Scalar = typename MatrixType::Scalar;
+
+  using std::sqrt;
+  const Scalar tolerance = sqrt(Eigen::GenericNumTraits<Scalar>::epsilon());
+  Index max_iter = 5 * A.cols();  // A heuristic guess.
+  NNLS<MatrixType> nnls(A, max_iter, tolerance);
+  const VectorX x = nnls.solve(b);
+
+  VERIFY_IS_EQUAL(nnls.info(), ComputationInfo::Success);
+  VERIFY_IS_APPROX(x, x_expected);
+  verify_nnls_optimality(A, b, x, tolerance);
+}
+
+template <typename MatrixType>
+static void test_nnls_random_problem() {
+  //
+  // SETUP
+  //
+
+  Index cols = MatrixType::ColsAtCompileTime;
+  if (cols == Dynamic) cols = internal::random<Index>(1, EIGEN_TEST_MAX_SIZE);
+  Index rows = MatrixType::RowsAtCompileTime;
+  if (rows == Dynamic) rows = internal::random<Index>(cols, EIGEN_TEST_MAX_SIZE);
+  VERIFY_LE(cols, rows);  // To have a unique LS solution: cols <= rows.
+
+  // Make some sort of random test problem from a wide range of scales and condition numbers.
+  using std::pow;
+  using Scalar = typename MatrixType::Scalar;
+  const Scalar sqrtConditionNumber = pow(Scalar(10), internal::random<Scalar>(Scalar(0), Scalar(2)));
+  const Scalar scaleA = pow(Scalar(10), internal::random<Scalar>(Scalar(-3), Scalar(3)));
+  const Scalar minSingularValue = scaleA / sqrtConditionNumber;
+  const Scalar maxSingularValue = scaleA * sqrtConditionNumber;
+  MatrixType A(rows, cols);
+  generateRandomMatrixSvs(setupRangeSvs<Matrix<Scalar, Dynamic, 1>>(cols, minSingularValue, maxSingularValue), rows,
+                          cols, A);
+
+  // Make a random RHS also with a random scaling.
+  using VectorB = decltype(A.col(0).eval());
+  const Scalar scaleB = pow(Scalar(10), internal::random<Scalar>(Scalar(-3), Scalar(3)));
+  const VectorB b = scaleB * VectorB::Random(A.rows());
+
+  //
+  // ACT
+  //
+
+  using Scalar = typename MatrixType::Scalar;
+  using std::sqrt;
+  const Scalar tolerance =
+      sqrt(Eigen::GenericNumTraits<Scalar>::epsilon()) * b.cwiseAbs().maxCoeff() * A.cwiseAbs().maxCoeff();
+  Index max_iter = 5 * A.cols();  // A heuristic guess.
+  NNLS<MatrixType> nnls(A, max_iter, tolerance);
+  const typename NNLS<MatrixType>::SolutionVectorType &x = nnls.solve(b);
+
+  //
+  // VERIFY
+  //
+
+  // In fact, NNLS can fail on some problems, but they are rare in practice.
+  VERIFY_IS_EQUAL(nnls.info(), ComputationInfo::Success);
+  verify_nnls_optimality(A, b, x, tolerance);
+}
+
+static void test_nnls_handles_zero_rhs() {
+  //
+  // SETUP
+  //
+  const Index cols = internal::random<Index>(1, EIGEN_TEST_MAX_SIZE);
+  const Index rows = internal::random<Index>(cols, EIGEN_TEST_MAX_SIZE);
+  const MatrixXd A = MatrixXd::Random(rows, cols);
+  const VectorXd b = VectorXd::Zero(rows);
+
+  //
+  // ACT
+  //
+  NNLS<MatrixXd> nnls(A);
+  const VectorXd x = nnls.solve(b);
+
+  //
+  // VERIFY
+  //
+  VERIFY_IS_EQUAL(nnls.info(), ComputationInfo::Success);
+  VERIFY_LE(nnls.iterations(), 1);  // 0 or 1 would be be fine for an edge case like this.
+  VERIFY_IS_EQUAL(x, VectorXd::Zero(cols));
+}
+
+static void test_nnls_handles_Mx0_matrix() {
+  //
+  // SETUP
+  //
+  const Index rows = internal::random<Index>(1, EIGEN_TEST_MAX_SIZE);
+  const MatrixXd A(rows, 0);
+  const VectorXd b = VectorXd::Random(rows);
+
+  //
+  // ACT
+  //
+  NNLS<MatrixXd> nnls(A);
+  const VectorXd x = nnls.solve(b);
+
+  //
+  // VERIFY
+  //
+  VERIFY_IS_EQUAL(nnls.info(), ComputationInfo::Success);
+  VERIFY_LE(nnls.iterations(), 0);
+  VERIFY_IS_EQUAL(x.size(), 0);
+}
+
+static void test_nnls_handles_0x0_matrix() {
+  //
+  // SETUP
+  //
+  const MatrixXd A(0, 0);
+  const VectorXd b(0);
+
+  //
+  // ACT
+  //
+  NNLS<MatrixXd> nnls(A);
+  const VectorXd x = nnls.solve(b);
+
+  //
+  // VERIFY
+  //
+  VERIFY_IS_EQUAL(nnls.info(), ComputationInfo::Success);
+  VERIFY_LE(nnls.iterations(), 0);
+  VERIFY_IS_EQUAL(x.size(), 0);
+}
+
+static void test_nnls_handles_dependent_columns() {
+  //
+  // SETUP
+  //
+  const Index rank = internal::random<Index>(1, EIGEN_TEST_MAX_SIZE / 2);
+  const Index cols = 2 * rank;
+  const Index rows = internal::random<Index>(cols, EIGEN_TEST_MAX_SIZE);
+  const MatrixXd A = MatrixXd::Random(rows, rank) * MatrixXd::Random(rank, cols);
+  const VectorXd b = VectorXd::Random(rows);
+
+  //
+  // ACT
+  //
+  const double tolerance = 1e-8;
+  NNLS<MatrixXd> nnls(A);
+  const VectorXd &x = nnls.solve(b);
+
+  //
+  // VERIFY
+  //
+  // What should happen when the input 'A' has dependent columns?
+  // We might still succeed. Or we might not converge.
+  // Either outcome is fine. If Success is indicated,
+  // then 'x' must actually be a solution vector.
+
+  if (nnls.info() == ComputationInfo::Success) {
+    verify_nnls_optimality(A, b, x, tolerance);
+  }
+}
+
+static void test_nnls_handles_wide_matrix() {
+  //
+  // SETUP
+  //
+  const Index cols = internal::random<Index>(2, EIGEN_TEST_MAX_SIZE);
+  const Index rows = internal::random<Index>(2, cols - 1);
+  const MatrixXd A = MatrixXd::Random(rows, cols);
+  const VectorXd b = VectorXd::Random(rows);
+
+  //
+  // ACT
+  //
+  const double tolerance = 1e-8;
+  NNLS<MatrixXd> nnls(A);
+  const VectorXd &x = nnls.solve(b);
+
+  //
+  // VERIFY
+  //
+  // What should happen when the input 'A' is wide?
+  // The unconstrained least-squares problem has infinitely many solutions.
+  // Subject the the non-negativity constraints,
+  // the solution might actually be unique (e.g. it is [0,0,..,0]).
+  // So, NNLS might succeed or it might fail.
+  // Either outcome is fine. If Success is indicated,
+  // then 'x' must actually be a solution vector.
+
+  if (nnls.info() == ComputationInfo::Success) {
+    verify_nnls_optimality(A, b, x, tolerance);
+  }
+}
+
+// 4x2 problem, unconstrained solution positive
+static void test_nnls_known_1() {
+  Matrix<double, 4, 2> A(4, 2);
+  Matrix<double, 4, 1> b(4);
+  Matrix<double, 2, 1> x(2);
+  A << 1, 1, 2, 4, 3, 9, 4, 16;
+  b << 0.6, 2.2, 4.8, 8.4;
+  x << 0.1, 0.5;
+
+  return test_nnls_known_solution(A, b, x);
+}
+
+// 4x3 problem, unconstrained solution positive
+static void test_nnls_known_2() {
+  Matrix<double, 4, 3> A(4, 3);
+  Matrix<double, 4, 1> b(4);
+  Matrix<double, 3, 1> x(3);
+
+  A << 1, 1, 1, 2, 4, 8, 3, 9, 27, 4, 16, 64;
+  b << 0.73, 3.24, 8.31, 16.72;
+  x << 0.1, 0.5, 0.13;
+
+  test_nnls_known_solution(A, b, x);
+}
+
+// Simple 4x4 problem, unconstrained solution non-negative
+static void test_nnls_known_3() {
+  Matrix<double, 4, 4> A(4, 4);
+  Matrix<double, 4, 1> b(4);
+  Matrix<double, 4, 1> x(4);
+
+  A << 1, 1, 1, 1, 2, 4, 8, 16, 3, 9, 27, 81, 4, 16, 64, 256;
+  b << 0.73, 3.24, 8.31, 16.72;
+  x << 0.1, 0.5, 0.13, 0;
+
+  test_nnls_known_solution(A, b, x);
+}
+
+// Simple 4x3 problem, unconstrained solution non-negative
+static void test_nnls_known_4() {
+  Matrix<double, 4, 3> A(4, 3);
+  Matrix<double, 4, 1> b(4);
+  Matrix<double, 3, 1> x(3);
+
+  A << 1, 1, 1, 2, 4, 8, 3, 9, 27, 4, 16, 64;
+  b << 0.23, 1.24, 3.81, 8.72;
+  x << 0.1, 0, 0.13;
+
+  test_nnls_known_solution(A, b, x);
+}
+
+// Simple 4x3 problem, unconstrained solution indefinite
+static void test_nnls_known_5() {
+  Matrix<double, 4, 3> A(4, 3);
+  Matrix<double, 4, 1> b(4);
+  Matrix<double, 3, 1> x(3);
+
+  A << 1, 1, 1, 2, 4, 8, 3, 9, 27, 4, 16, 64;
+  b << 0.13, 0.84, 2.91, 7.12;
+  // Solution obtained by original nnls() implementation in Fortran
+  x << 0.0, 0.0, 0.1106544;
+
+  test_nnls_known_solution(A, b, x);
+}
+
+static void test_nnls_small_reference_problems() {
+  test_nnls_known_1();
+  test_nnls_known_2();
+  test_nnls_known_3();
+  test_nnls_known_4();
+  test_nnls_known_5();
+}
+
+static void test_nnls_with_half_precision() {
+  // The random matrix generation tools don't work with `half`,
+  // so here's a simpler setup mostly just to check that NNLS compiles & runs with custom scalar types.
+
+  using Mat = Matrix<half, 8, 2>;
+  using VecB = Matrix<half, 8, 1>;
+  using VecX = Matrix<half, 2, 1>;
+  Mat A = Mat::Random();  // full-column rank with high probability.
+  VecB b = VecB::Random();
+
+  NNLS<Mat> nnls(A, 20, half(1e-2f));
+  const VecX x = nnls.solve(b);
+
+  VERIFY_IS_EQUAL(nnls.info(), ComputationInfo::Success);
+  verify_nnls_optimality(A, b, x, half(1e-1));
+}
+
+static void test_nnls_special_case_solves_in_zero_iterations() {
+  // The particular NNLS algorithm that is implemented starts with all variables
+  // in the active set.
+  // This test builds a system where all constraints are active at the solution,
+  // so that initial guess is already correct.
+  //
+  // If the implementation changes to another algorithm that does not have this property,
+  // then this test will need to change (e.g. starting from all constraints inactive,
+  // or using ADMM, or an interior point solver).
+
+  const Index n = 10;
+  const Index m = 3 * n;
+  const VectorXd b = VectorXd::Random(m);
+  // With high probability, this is full column rank, which we need for uniqueness.
+  MatrixXd A = MatrixXd::Random(m, n);
+  // Make every column of `A` such that adding it to the active set only /increases/ the objective,
+  // this ensuring the NNLS solution is all zeros.
+  const VectorXd alignment = -(A.transpose() * b).cwiseSign();
+  A = A * alignment.asDiagonal();
+
+  NNLS<MatrixXd> nnls(A);
+  nnls.solve(b);
+
+  VERIFY_IS_EQUAL(nnls.info(), ComputationInfo::Success);
+  VERIFY(nnls.iterations() == 0);
+}
+
+static void test_nnls_special_case_solves_in_n_iterations() {
+  // The particular NNLS algorithm that is implemented starts with all variables
+  // in the active set and then adds one variable to the inactive set each iteration.
+  // This test builds a system where all variables are inactive at the solution,
+  // so it should take 'n' iterations to get there.
+  //
+  // If the implementation changes to another algorithm that does not have this property,
+  // then this test will need to change (e.g. starting from all constraints inactive,
+  // or using ADMM, or an interior point solver).
+
+  const Index n = 10;
+  const Index m = 3 * n;
+  // With high probability, this is full column rank, which we need for uniqueness.
+  const MatrixXd A = MatrixXd::Random(m, n);
+  const VectorXd x = VectorXd::Random(n).cwiseAbs().array() + 1;  // all positive.
+  const VectorXd b = A * x;
+
+  NNLS<MatrixXd> nnls(A);
+  nnls.solve(b);
+
+  VERIFY_IS_EQUAL(nnls.info(), ComputationInfo::Success);
+  VERIFY(nnls.iterations() == n);
+}
+
+static void test_nnls_returns_NoConvergence_when_maxIterations_is_too_low() {
+  // Using the special case that takes `n` iterations,
+  // from `test_nnls_special_case_solves_in_n_iterations`,
+  // we can set max iterations too low and that should cause the solve to fail.
+
+  const Index n = 10;
+  const Index m = 3 * n;
+  // With high probability, this is full column rank, which we need for uniqueness.
+  const MatrixXd A = MatrixXd::Random(m, n);
+  const VectorXd x = VectorXd::Random(n).cwiseAbs().array() + 1;  // all positive.
+  const VectorXd b = A * x;
+
+  NNLS<MatrixXd> nnls(A);
+  const Index max_iters = n - 1;
+  nnls.setMaxIterations(max_iters);
+  nnls.solve(b);
+
+  VERIFY_IS_EQUAL(nnls.info(), ComputationInfo::NoConvergence);
+  VERIFY(nnls.iterations() == max_iters);
+}
+
+static void test_nnls_default_maxIterations_is_twice_column_count() {
+  const Index cols = internal::random<Index>(1, EIGEN_TEST_MAX_SIZE);
+  const Index rows = internal::random<Index>(cols, EIGEN_TEST_MAX_SIZE);
+  const MatrixXd A = MatrixXd::Random(rows, cols);
+
+  NNLS<MatrixXd> nnls(A);
+
+  VERIFY_IS_EQUAL(nnls.maxIterations(), 2 * cols);
+}
+
+static void test_nnls_does_not_allocate_during_solve() {
+  const Index cols = internal::random<Index>(1, EIGEN_TEST_MAX_SIZE);
+  const Index rows = internal::random<Index>(cols, EIGEN_TEST_MAX_SIZE);
+  const MatrixXd A = MatrixXd::Random(rows, cols);
+  const VectorXd b = VectorXd::Random(rows);
+
+  NNLS<MatrixXd> nnls(A);
+
+  internal::set_is_malloc_allowed(false);
+  nnls.solve(b);
+  internal::set_is_malloc_allowed(true);
+}
+
+static void test_nnls_repeated_calls_to_compute_and_solve() {
+  const Index cols2 = internal::random<Index>(1, EIGEN_TEST_MAX_SIZE);
+  const Index rows2 = internal::random<Index>(cols2, EIGEN_TEST_MAX_SIZE);
+  const MatrixXd A2 = MatrixXd::Random(rows2, cols2);
+  const VectorXd b2 = VectorXd::Random(rows2);
+
+  NNLS<MatrixXd> nnls;
+
+  for (int i = 0; i < 4; ++i) {
+    const Index cols = internal::random<Index>(1, EIGEN_TEST_MAX_SIZE);
+    const Index rows = internal::random<Index>(cols, EIGEN_TEST_MAX_SIZE);
+    const MatrixXd A = MatrixXd::Random(rows, cols);
+
+    nnls.compute(A);
+    VERIFY_IS_EQUAL(nnls.info(), ComputationInfo::Success);
+
+    for (int j = 0; j < 3; ++j) {
+      const VectorXd b = VectorXd::Random(rows);
+      const VectorXd x = nnls.solve(b);
+      VERIFY_IS_EQUAL(nnls.info(), ComputationInfo::Success);
+      verify_nnls_optimality(A, b, x, 1e-4);
+    }
+  }
+}
+
+EIGEN_DECLARE_TEST(NNLS) {
+  // Small matrices with known solutions:
+  CALL_SUBTEST_1(test_nnls_small_reference_problems());
+  CALL_SUBTEST_1(test_nnls_handles_Mx0_matrix());
+  CALL_SUBTEST_1(test_nnls_handles_0x0_matrix());
+
+  for (int i = 0; i < g_repeat; i++) {
+    // Essential NNLS properties, across different types.
+    CALL_SUBTEST_2(test_nnls_random_problem<MatrixXf>());
+    CALL_SUBTEST_3(test_nnls_random_problem<MatrixXd>());
+    using MatFixed = Matrix<double, 12, 5>;
+    CALL_SUBTEST_4(test_nnls_random_problem<MatFixed>());
+    CALL_SUBTEST_5(test_nnls_with_half_precision());
+
+    // Robustness tests:
+    CALL_SUBTEST_6(test_nnls_handles_zero_rhs());
+    CALL_SUBTEST_6(test_nnls_handles_dependent_columns());
+    CALL_SUBTEST_6(test_nnls_handles_wide_matrix());
+
+    // Properties specific to the implementation,
+    // not NNLS in general.
+    CALL_SUBTEST_7(test_nnls_special_case_solves_in_zero_iterations());
+    CALL_SUBTEST_7(test_nnls_special_case_solves_in_n_iterations());
+    CALL_SUBTEST_7(test_nnls_returns_NoConvergence_when_maxIterations_is_too_low());
+    CALL_SUBTEST_7(test_nnls_default_maxIterations_is_twice_column_count());
+    CALL_SUBTEST_8(test_nnls_repeated_calls_to_compute_and_solve());
+
+    // This test fails. It hits allocations in HouseholderSequence.h
+    // test_nnls_does_not_allocate_during_solve();
+  }
+}
diff --git a/libs/eigen/unsupported/test/NonLinearOptimization.cpp b/libs/eigen/unsupported/test/NonLinearOptimization.cpp
index c667b72..aaa96ec 100644
--- a/libs/eigen/unsupported/test/NonLinearOptimization.cpp
+++ b/libs/eigen/unsupported/test/NonLinearOptimization.cpp
@@ -12,14 +12,10 @@
 // It is intended to be done for this test only.
 #include <Eigen/src/Core/util/DisableStupidWarnings.h>
 
-// tolerance for chekcing number of iterations
-#define LM_EVAL_COUNT_TOL 4/3
+// tolerance for checking number of iterations
+#define LM_EVAL_COUNT_TOL 2
 
 #define LM_CHECK_N_ITERS(SOLVER,NFEV,NJEV) { \
-            ++g_test_level; \
-            VERIFY_IS_EQUAL(SOLVER.nfev, NFEV); \
-            VERIFY_IS_EQUAL(SOLVER.njev, NJEV); \
-            --g_test_level; \
             VERIFY(SOLVER.nfev <= NFEV * LM_EVAL_COUNT_TOL); \
             VERIFY(SOLVER.njev <= NJEV * LM_EVAL_COUNT_TOL); \
         }
@@ -113,10 +109,10 @@ void testChkder()
 }
 
 // Generic functor
-template<typename _Scalar, int NX=Dynamic, int NY=Dynamic>
+template<typename Scalar_, int NX=Dynamic, int NY=Dynamic>
 struct Functor
 {
-  typedef _Scalar Scalar;
+  typedef Scalar_ Scalar;
   enum {
     InputsAtCompileTime = NX,
     ValuesAtCompileTime = NY
@@ -186,9 +182,10 @@ void testLmder1()
   lmder_functor functor;
   LevenbergMarquardt<lmder_functor> lm(functor);
   info = lm.lmder1(x);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check return value
-  VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(info, 1);
   LM_CHECK_N_ITERS(lm, 6, 5);
 
   // check norm
@@ -214,9 +211,10 @@ void testLmder()
   lmder_functor functor;
   LevenbergMarquardt<lmder_functor> lm(functor);
   info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check return values
-  VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(info, 1);
   LM_CHECK_N_ITERS(lm, 6, 5);
 
   // check norm
@@ -298,9 +296,10 @@ void testHybrj1()
   hybrj_functor functor;
   HybridNonLinearSolver<hybrj_functor> solver(functor);
   info = solver.hybrj1(x);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check return value
-  VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(info, 1);
   LM_CHECK_N_ITERS(solver, 11, 1);
 
   // check norm
@@ -332,9 +331,10 @@ void testHybrj()
   solver.diag.setConstant(n, 1.);
   solver.useExternalScaling = true;
   info = solver.solve(x);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check return value
-  VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(info, 1);
   LM_CHECK_N_ITERS(solver, 11, 1);
 
   // check norm
@@ -385,10 +385,11 @@ void testHybrd1()
   hybrd_functor functor;
   HybridNonLinearSolver<hybrd_functor> solver(functor);
   info = solver.hybrd1(x);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check return value
-  VERIFY_IS_EQUAL(info, 1);
-  VERIFY_IS_EQUAL(solver.nfev, 20);
+  // VERIFY_IS_EQUAL(info, 1);
+  VERIFY(solver.nfev <= 20*LM_EVAL_COUNT_TOL);
 
   // check norm
   VERIFY_IS_APPROX(solver.fvec.blueNorm(), 1.192636e-08);
@@ -416,10 +417,11 @@ void testHybrd()
   solver.diag.setConstant(n, 1.);
   solver.useExternalScaling = true;
   info = solver.solveNumericalDiff(x);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check return value
-  VERIFY_IS_EQUAL(info, 1);
-  VERIFY_IS_EQUAL(solver.nfev, 14);
+  // VERIFY_IS_EQUAL(info, 1);
+  VERIFY(solver.nfev <= 14*LM_EVAL_COUNT_TOL);
 
   // check norm
   VERIFY_IS_APPROX(solver.fvec.blueNorm(), 1.192636e-08);
@@ -487,9 +489,10 @@ void testLmstr1()
   lmstr_functor functor;
   LevenbergMarquardt<lmstr_functor> lm(functor);
   info = lm.lmstr1(x);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check return value
-  VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(info, 1);
   LM_CHECK_N_ITERS(lm, 6, 5);
 
   // check norm
@@ -515,9 +518,10 @@ void testLmstr()
   lmstr_functor functor;
   LevenbergMarquardt<lmstr_functor> lm(functor);
   info = lm.minimizeOptimumStorage(x);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check return values
-  VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(info, 1);
   LM_CHECK_N_ITERS(lm, 6, 5);
 
   // check norm
@@ -570,10 +574,11 @@ void testLmdif1()
   lmdif_functor functor;
   DenseIndex nfev = -1; // initialize to avoid maybe-uninitialized warning
   info = LevenbergMarquardt<lmdif_functor>::lmdif1(functor, x, &nfev);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check return value
-  VERIFY_IS_EQUAL(info, 1);
-  VERIFY_IS_EQUAL(nfev, 26);
+  // VERIFY_IS_EQUAL(info, 1);
+  VERIFY( nfev <= 26*LM_EVAL_COUNT_TOL);
 
   // check norm
   functor(x, fvec);
@@ -601,10 +606,11 @@ void testLmdif()
   NumericalDiff<lmdif_functor> numDiff(functor);
   LevenbergMarquardt<NumericalDiff<lmdif_functor> > lm(numDiff);
   info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check return values
-  VERIFY_IS_EQUAL(info, 1);
-  VERIFY_IS_EQUAL(lm.nfev, 26);
+  // VERIFY_IS_EQUAL(info, 1);
+  VERIFY(lm.nfev <= 26*LM_EVAL_COUNT_TOL);
 
   // check norm
   fnorm = lm.fvec.blueNorm();
@@ -686,9 +692,10 @@ void testNistChwirut2(void)
   chwirut2_functor functor;
   LevenbergMarquardt<chwirut2_functor> lm(functor);
   info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check return value
-  VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(info, 1);
   LM_CHECK_N_ITERS(lm, 10, 8);
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 5.1304802941E+02);
@@ -706,9 +713,10 @@ void testNistChwirut2(void)
   lm.parameters.ftol = 1.E6*NumTraits<double>::epsilon();
   lm.parameters.xtol = 1.E6*NumTraits<double>::epsilon();
   info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check return value
-  VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(info, 1);
   LM_CHECK_N_ITERS(lm, 7, 6);
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 5.1304802941E+02);
@@ -764,9 +772,10 @@ void testNistMisra1a(void)
   misra1a_functor functor;
   LevenbergMarquardt<misra1a_functor> lm(functor);
   info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check return value
-  VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(info, 1);
   LM_CHECK_N_ITERS(lm, 19, 15);
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.2455138894E-01);
@@ -780,9 +789,10 @@ void testNistMisra1a(void)
   x<< 250., 0.0005;
   // do the computation
   info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check return value
-  VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(info, 1);
   LM_CHECK_N_ITERS(lm, 5, 4);
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.2455138894E-01);
@@ -852,9 +862,10 @@ void testNistHahn1(void)
   hahn1_functor functor;
   LevenbergMarquardt<hahn1_functor> lm(functor);
   info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check return value
-  VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(info, 1);
   LM_CHECK_N_ITERS(lm, 11, 10);
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.5324382854E+00);
@@ -873,9 +884,10 @@ void testNistHahn1(void)
   x<< .1, -.1, .005, -.000001, -.005, .0001, -.0000001;
   // do the computation
   info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check return value
-  VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(info, 1);
   LM_CHECK_N_ITERS(lm, 11, 10);
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.5324382854E+00);
@@ -936,9 +948,10 @@ void testNistMisra1d(void)
   misra1d_functor functor;
   LevenbergMarquardt<misra1d_functor> lm(functor);
   info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check return value
-  VERIFY_IS_EQUAL(info, 3);
+  // VERIFY_IS_EQUAL(info, 3);
   LM_CHECK_N_ITERS(lm, 9, 7);
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 5.6419295283E-02);
@@ -952,9 +965,10 @@ void testNistMisra1d(void)
   x<< 450., 0.0003;
   // do the computation
   info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check return value
-  VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(info, 1);
   LM_CHECK_N_ITERS(lm, 4, 3);
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 5.6419295283E-02);
@@ -1012,13 +1026,14 @@ void testNistLanczos1(void)
   lanczos1_functor functor;
   LevenbergMarquardt<lanczos1_functor> lm(functor);
   info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check return value
-  VERIFY_IS_EQUAL(info, 2);
+  // VERIFY_IS_EQUAL(info, 2);
   LM_CHECK_N_ITERS(lm, 79, 72);
   // check norm^2
-  std::cout.precision(30);
-  std::cout << lm.fvec.squaredNorm() << "\n";
+  // std::cout.precision(30);
+  // std::cout << lm.fvec.squaredNorm() << "\n";
   VERIFY(lm.fvec.squaredNorm() <= 1.4307867721E-25);
   // check x
   VERIFY_IS_APPROX(x[0], 9.5100000027E-02);
@@ -1034,9 +1049,10 @@ void testNistLanczos1(void)
   x<< 0.5, 0.7, 3.6, 4.2, 4., 6.3;
   // do the computation
   info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check return value
-  VERIFY_IS_EQUAL(info, 2);
+  // VERIFY_IS_EQUAL(info, 2);
   LM_CHECK_N_ITERS(lm, 9, 8);
   // check norm^2
   VERIFY(lm.fvec.squaredNorm() <= 1.4307867721E-25);
@@ -1098,9 +1114,10 @@ void testNistRat42(void)
   rat42_functor functor;
   LevenbergMarquardt<rat42_functor> lm(functor);
   info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check return value
-  VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(info, 1);
   LM_CHECK_N_ITERS(lm, 10, 8);
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 8.0565229338E+00);
@@ -1115,9 +1132,10 @@ void testNistRat42(void)
   x<< 75., 2.5, 0.07;
   // do the computation
   info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check return value
-  VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(info, 1);
   LM_CHECK_N_ITERS(lm, 6, 5);
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 8.0565229338E+00);
@@ -1174,9 +1192,10 @@ void testNistMGH10(void)
   MGH10_functor functor;
   LevenbergMarquardt<MGH10_functor> lm(functor);
   info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check return value
-  VERIFY_IS_EQUAL(info, 2); 
+  // VERIFY_IS_EQUAL(info, 2); 
   LM_CHECK_N_ITERS(lm, 284, 249); 
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 8.7945855171E+01);
@@ -1191,9 +1210,10 @@ void testNistMGH10(void)
   x<< 0.02, 4000., 250.;
   // do the computation
   info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check return value
-  VERIFY_IS_EQUAL(info, 3);
+  // VERIFY_IS_EQUAL(info, 3);
   LM_CHECK_N_ITERS(lm, 126, 116);
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 8.7945855171E+01);
@@ -1251,9 +1271,10 @@ void testNistBoxBOD(void)
   lm.parameters.xtol = 1.E6*NumTraits<double>::epsilon();
   lm.parameters.factor = 10.;
   info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check return value
-  VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(info, 1);
   LM_CHECK_N_ITERS(lm, 31, 25);
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.1680088766E+03);
@@ -1270,10 +1291,11 @@ void testNistBoxBOD(void)
   lm.parameters.ftol = NumTraits<double>::epsilon();
   lm.parameters.xtol = NumTraits<double>::epsilon();
   info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check return value
-  VERIFY_IS_EQUAL(info, 1);
-  LM_CHECK_N_ITERS(lm, 15, 14);
+  // VERIFY_IS_EQUAL(info, 1);
+  LM_CHECK_N_ITERS(lm, 20, 14);
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.1680088766E+03);
   // check x
@@ -1331,6 +1353,7 @@ void testNistMGH17(void)
   lm.parameters.xtol = NumTraits<double>::epsilon();
   lm.parameters.maxfev = 1000;
   info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 5.4648946975E-05);
@@ -1342,7 +1365,7 @@ void testNistMGH17(void)
   VERIFY_IS_APPROX(x[4], 2.2122699662E-02);
   
   // check return value
-  VERIFY_IS_EQUAL(info, 2); 
+  // VERIFY_IS_EQUAL(info, 2); 
   LM_CHECK_N_ITERS(lm, 602, 545);
 
   /*
@@ -1352,9 +1375,10 @@ void testNistMGH17(void)
   // do the computation
   lm.resetParameters();
   info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check return value
-  VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(info, 1);
   LM_CHECK_N_ITERS(lm, 18, 15);
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 5.4648946975E-05);
@@ -1417,9 +1441,10 @@ void testNistMGH09(void)
   LevenbergMarquardt<MGH09_functor> lm(functor);
   lm.parameters.maxfev = 1000;
   info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check return value
-  VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(info, 1);
   LM_CHECK_N_ITERS(lm, 490, 376);
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 3.0750560385E-04);
@@ -1436,9 +1461,10 @@ void testNistMGH09(void)
   // do the computation
   lm.resetParameters();
   info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check return value
-  VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(info, 1);
   LM_CHECK_N_ITERS(lm, 18, 16);
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 3.0750560385E-04);
@@ -1501,9 +1527,10 @@ void testNistBennett5(void)
   LevenbergMarquardt<Bennett5_functor> lm(functor);
   lm.parameters.maxfev = 1000;
   info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check return value
-  VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(info, 1);
   LM_CHECK_N_ITERS(lm, 758, 744);
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 5.2404744073E-04);
@@ -1518,9 +1545,10 @@ void testNistBennett5(void)
   // do the computation
   lm.resetParameters();
   info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check return value
-  VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(info, 1);
   LM_CHECK_N_ITERS(lm, 203, 192);
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 5.2404744073E-04);
@@ -1587,9 +1615,10 @@ void testNistThurber(void)
   lm.parameters.ftol = 1.E4*NumTraits<double>::epsilon();
   lm.parameters.xtol = 1.E4*NumTraits<double>::epsilon();
   info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check return value
-  VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(info, 1);
   LM_CHECK_N_ITERS(lm, 39,36);
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 5.6427082397E+03);
@@ -1611,9 +1640,10 @@ void testNistThurber(void)
   lm.parameters.ftol = 1.E4*NumTraits<double>::epsilon();
   lm.parameters.xtol = 1.E4*NumTraits<double>::epsilon();
   info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check return value
-  VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(info, 1);
   LM_CHECK_N_ITERS(lm, 29, 28);
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 5.6427082397E+03);
@@ -1677,9 +1707,10 @@ void testNistRat43(void)
   lm.parameters.ftol = 1.E6*NumTraits<double>::epsilon();
   lm.parameters.xtol = 1.E6*NumTraits<double>::epsilon();
   info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check return value
-  VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(info, 1);
   LM_CHECK_N_ITERS(lm, 27, 20);
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 8.7864049080E+03);
@@ -1698,9 +1729,10 @@ void testNistRat43(void)
   lm.parameters.ftol = 1.E5*NumTraits<double>::epsilon();
   lm.parameters.xtol = 1.E5*NumTraits<double>::epsilon();
   info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check return value
-  VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(info, 1);
   LM_CHECK_N_ITERS(lm, 9, 8);
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 8.7864049080E+03);
@@ -1760,9 +1792,10 @@ void testNistEckerle4(void)
   eckerle4_functor functor;
   LevenbergMarquardt<eckerle4_functor> lm(functor);
   info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check return value
-  VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(info, 1);
   LM_CHECK_N_ITERS(lm, 18, 15);
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.4635887487E-03);
@@ -1777,9 +1810,10 @@ void testNistEckerle4(void)
   x<< 1.5, 5., 450.;
   // do the computation
   info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check return value
-  VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(info, 1);
   LM_CHECK_N_ITERS(lm, 7, 6);
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.4635887487E-03);
diff --git a/libs/eigen/unsupported/test/NumericalDiff.cpp b/libs/eigen/unsupported/test/NumericalDiff.cpp
index 6d83641..96e7f19 100644
--- a/libs/eigen/unsupported/test/NumericalDiff.cpp
+++ b/libs/eigen/unsupported/test/NumericalDiff.cpp
@@ -9,10 +9,10 @@
 #include <unsupported/Eigen/NumericalDiff>
     
 // Generic functor
-template<typename _Scalar, int NX=Dynamic, int NY=Dynamic>
+template<typename Scalar_, int NX=Dynamic, int NY=Dynamic>
 struct Functor
 {
-  typedef _Scalar Scalar;
+  typedef Scalar_ Scalar;
   enum {
     InputsAtCompileTime = NX,
     ValuesAtCompileTime = NY
diff --git a/libs/eigen/unsupported/test/alignedvector3.cpp b/libs/eigen/unsupported/test/alignedvector3.cpp
index f442e41..1fc6d62 100644
--- a/libs/eigen/unsupported/test/alignedvector3.cpp
+++ b/libs/eigen/unsupported/test/alignedvector3.cpp
@@ -7,6 +7,8 @@
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
+#define EIGEN_NO_STATIC_ASSERT
+
 #include "main.h"
 #include <unsupported/Eigen/AlignedVector3>
 
diff --git a/libs/eigen/unsupported/test/autodiff.cpp b/libs/eigen/unsupported/test/autodiff.cpp
index 2cea56b..0addf2c 100644
--- a/libs/eigen/unsupported/test/autodiff.cpp
+++ b/libs/eigen/unsupported/test/autodiff.cpp
@@ -29,10 +29,10 @@ EIGEN_DONT_INLINE typename Vector::Scalar foo(const Vector& p)
   return (p-Vector(Scalar(-1),Scalar(1.))).norm() + (p.array() * p.array()).sum() + p.dot(p);
 }
 
-template<typename _Scalar, int NX=Dynamic, int NY=Dynamic>
+template<typename Scalar_, int NX=Dynamic, int NY=Dynamic>
 struct TestFunc1
 {
-  typedef _Scalar Scalar;
+  typedef Scalar_ Scalar;
   enum {
     InputsAtCompileTime = NX,
     ValuesAtCompileTime = NY
@@ -106,7 +106,6 @@ struct TestFunc1
 };
 
 
-#if EIGEN_HAS_VARIADIC_TEMPLATES
 /* Test functor for the C++11 features. */
 template <typename Scalar>
 struct integratorFunctor
@@ -186,7 +185,6 @@ template<typename Func> void forward_jacobian_cpp11(const Func& f)
     VERIFY_IS_APPROX(y, yref);
     VERIFY_IS_APPROX(j, jref);
 }
-#endif
 
 template<typename Func> void forward_jacobian(const Func& f)
 {
@@ -247,9 +245,7 @@ void test_autodiff_jacobian()
   CALL_SUBTEST(( forward_jacobian(TestFunc1<double,3,2>()) ));
   CALL_SUBTEST(( forward_jacobian(TestFunc1<double,3,3>()) ));
   CALL_SUBTEST(( forward_jacobian(TestFunc1<double>(3,3)) ));
-#if EIGEN_HAS_VARIADIC_TEMPLATES
   CALL_SUBTEST(( forward_jacobian_cpp11(integratorFunctor<double>(10)) ));
-#endif
 }
 
 
diff --git a/libs/eigen/unsupported/test/autodiff_scalar.cpp b/libs/eigen/unsupported/test/autodiff_scalar.cpp
index e81a778..1dbf585 100644
--- a/libs/eigen/unsupported/test/autodiff_scalar.cpp
+++ b/libs/eigen/unsupported/test/autodiff_scalar.cpp
@@ -84,9 +84,7 @@ void check_limits_specialization()
   // workaround "unused typedef" warning:
   VERIFY(!bool(internal::is_same<B, A>::value));
 
-#if EIGEN_HAS_CXX11
   VERIFY(bool(std::is_base_of<B, A>::value));
-#endif
 }
 
 EIGEN_DECLARE_TEST(autodiff_scalar)
diff --git a/libs/eigen/unsupported/test/bicgstabl.cpp b/libs/eigen/unsupported/test/bicgstabl.cpp
new file mode 100644
index 0000000..302848c
--- /dev/null
+++ b/libs/eigen/unsupported/test/bicgstabl.cpp
@@ -0,0 +1,31 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2011 Gael Guennebaud <g.gael@free.fr>
+// Copyright (C) 2012 Kolja Brix <brix@igpm.rwth-aaachen.de>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "../../test/sparse_solver.h"
+#include <Eigen/IterativeSolvers>
+
+template<typename T> void test_bicgstabl_T()
+{
+  BiCGSTABL<SparseMatrix<T>, DiagonalPreconditioner<T> > bicgstabl_colmajor_diag;
+  BiCGSTABL<SparseMatrix<T>, IncompleteLUT<T> >           bicgstabl_colmajor_ilut;
+
+  //This does not change the tolerance of the test, only the tolerance of the solver.
+  bicgstabl_colmajor_diag.setTolerance(NumTraits<T>::epsilon()*20);
+  bicgstabl_colmajor_ilut.setTolerance(NumTraits<T>::epsilon()*20);
+
+  CALL_SUBTEST( check_sparse_square_solving(bicgstabl_colmajor_diag)  );
+  CALL_SUBTEST( check_sparse_square_solving(bicgstabl_colmajor_ilut)     );
+}
+
+EIGEN_DECLARE_TEST(bicgstabl)
+{
+  CALL_SUBTEST_1(test_bicgstabl_T<double>());
+  CALL_SUBTEST_2(test_bicgstabl_T<std::complex<double> >());
+}
diff --git a/libs/eigen/unsupported/test/cxx11_tensor_argmax.cpp b/libs/eigen/unsupported/test/cxx11_tensor_argmax.cpp
index 4a0c896..8be622b 100644
--- a/libs/eigen/unsupported/test/cxx11_tensor_argmax.cpp
+++ b/libs/eigen/unsupported/test/cxx11_tensor_argmax.cpp
@@ -14,57 +14,57 @@
 
 using Eigen::Tensor;
 using Eigen::array;
-using Eigen::Tuple;
+using Eigen::Pair;
 
 template <int DataLayout>
-static void test_simple_index_tuples()
+static void test_simple_index_pairs()
 {
   Tensor<float, 4, DataLayout> tensor(2,3,5,7);
   tensor.setRandom();
   tensor = (tensor + tensor.constant(0.5)).log();
 
-  Tensor<Tuple<DenseIndex, float>, 4, DataLayout> index_tuples(2,3,5,7);
-  index_tuples = tensor.index_tuples();
+  Tensor<Pair<DenseIndex, float>, 4, DataLayout> index_pairs(2,3,5,7);
+  index_pairs = tensor.index_pairs();
 
   for (DenseIndex n = 0; n < 2*3*5*7; ++n) {
-    const Tuple<DenseIndex, float>& v = index_tuples.coeff(n);
+    const Pair<DenseIndex, float>& v = index_pairs.coeff(n);
     VERIFY_IS_EQUAL(v.first, n);
     VERIFY_IS_EQUAL(v.second, tensor.coeff(n));
   }
 }
 
 template <int DataLayout>
-static void test_index_tuples_dim()
+static void test_index_pairs_dim()
 {
   Tensor<float, 4, DataLayout> tensor(2,3,5,7);
   tensor.setRandom();
   tensor = (tensor + tensor.constant(0.5)).log();
 
-  Tensor<Tuple<DenseIndex, float>, 4, DataLayout> index_tuples(2,3,5,7);
+  Tensor<Pair<DenseIndex, float>, 4, DataLayout> index_pairs(2,3,5,7);
 
-  index_tuples = tensor.index_tuples();
+  index_pairs = tensor.index_pairs();
 
   for (Eigen::DenseIndex n = 0; n < tensor.size(); ++n) {
-    const Tuple<DenseIndex, float>& v = index_tuples(n); //(i, j, k, l);
+    const Pair<DenseIndex, float>& v = index_pairs(n); //(i, j, k, l);
     VERIFY_IS_EQUAL(v.first, n);
     VERIFY_IS_EQUAL(v.second, tensor(n));
   }
 }
 
 template <int DataLayout>
-static void test_argmax_tuple_reducer()
+static void test_argmax_pair_reducer()
 {
   Tensor<float, 4, DataLayout> tensor(2,3,5,7);
   tensor.setRandom();
   tensor = (tensor + tensor.constant(0.5)).log();
 
-  Tensor<Tuple<DenseIndex, float>, 4, DataLayout> index_tuples(2,3,5,7);
-  index_tuples = tensor.index_tuples();
+  Tensor<Pair<DenseIndex, float>, 4, DataLayout> index_pairs(2,3,5,7);
+  index_pairs = tensor.index_pairs();
 
-  Tensor<Tuple<DenseIndex, float>, 0, DataLayout> reduced;
+  Tensor<Pair<DenseIndex, float>, 0, DataLayout> reduced;
   DimensionList<DenseIndex, 4> dims;
-  reduced = index_tuples.reduce(
-      dims, internal::ArgMaxTupleReducer<Tuple<DenseIndex, float> >());
+  reduced = index_pairs.reduce(
+      dims, internal::ArgMaxPairReducer<Pair<DenseIndex, float> >());
 
   Tensor<float, 0, DataLayout> maxi = tensor.maximum();
 
@@ -72,9 +72,9 @@ static void test_argmax_tuple_reducer()
 
   array<DenseIndex, 3> reduce_dims;
   for (int d = 0; d < 3; ++d) reduce_dims[d] = d;
-  Tensor<Tuple<DenseIndex, float>, 1, DataLayout> reduced_by_dims(7);
-  reduced_by_dims = index_tuples.reduce(
-      reduce_dims, internal::ArgMaxTupleReducer<Tuple<DenseIndex, float> >());
+  Tensor<Pair<DenseIndex, float>, 1, DataLayout> reduced_by_dims(7);
+  reduced_by_dims = index_pairs.reduce(
+      reduce_dims, internal::ArgMaxPairReducer<Pair<DenseIndex, float> >());
 
   Tensor<float, 1, DataLayout> max_by_dims = tensor.maximum(reduce_dims);
 
@@ -84,19 +84,19 @@ static void test_argmax_tuple_reducer()
 }
 
 template <int DataLayout>
-static void test_argmin_tuple_reducer()
+static void test_argmin_pair_reducer()
 {
   Tensor<float, 4, DataLayout> tensor(2,3,5,7);
   tensor.setRandom();
   tensor = (tensor + tensor.constant(0.5)).log();
 
-  Tensor<Tuple<DenseIndex, float>, 4, DataLayout> index_tuples(2,3,5,7);
-  index_tuples = tensor.index_tuples();
+  Tensor<Pair<DenseIndex, float>, 4, DataLayout> index_pairs(2,3,5,7);
+  index_pairs = tensor.index_pairs();
 
-  Tensor<Tuple<DenseIndex, float>, 0, DataLayout> reduced;
+  Tensor<Pair<DenseIndex, float>, 0, DataLayout> reduced;
   DimensionList<DenseIndex, 4> dims;
-  reduced = index_tuples.reduce(
-      dims, internal::ArgMinTupleReducer<Tuple<DenseIndex, float> >());
+  reduced = index_pairs.reduce(
+      dims, internal::ArgMinPairReducer<Pair<DenseIndex, float> >());
 
   Tensor<float, 0, DataLayout> mini = tensor.minimum();
 
@@ -104,9 +104,9 @@ static void test_argmin_tuple_reducer()
 
   array<DenseIndex, 3> reduce_dims;
   for (int d = 0; d < 3; ++d) reduce_dims[d] = d;
-  Tensor<Tuple<DenseIndex, float>, 1, DataLayout> reduced_by_dims(7);
-  reduced_by_dims = index_tuples.reduce(
-      reduce_dims, internal::ArgMinTupleReducer<Tuple<DenseIndex, float> >());
+  Tensor<Pair<DenseIndex, float>, 1, DataLayout> reduced_by_dims(7);
+  reduced_by_dims = index_pairs.reduce(
+      reduce_dims, internal::ArgMinPairReducer<Pair<DenseIndex, float> >());
 
   Tensor<float, 1, DataLayout> min_by_dims = tensor.minimum(reduce_dims);
 
@@ -275,14 +275,14 @@ static void test_argmin_dim()
 
 EIGEN_DECLARE_TEST(cxx11_tensor_argmax)
 {
-  CALL_SUBTEST(test_simple_index_tuples<RowMajor>());
-  CALL_SUBTEST(test_simple_index_tuples<ColMajor>());
-  CALL_SUBTEST(test_index_tuples_dim<RowMajor>());
-  CALL_SUBTEST(test_index_tuples_dim<ColMajor>());
-  CALL_SUBTEST(test_argmax_tuple_reducer<RowMajor>());
-  CALL_SUBTEST(test_argmax_tuple_reducer<ColMajor>());
-  CALL_SUBTEST(test_argmin_tuple_reducer<RowMajor>());
-  CALL_SUBTEST(test_argmin_tuple_reducer<ColMajor>());
+  CALL_SUBTEST(test_simple_index_pairs<RowMajor>());
+  CALL_SUBTEST(test_simple_index_pairs<ColMajor>());
+  CALL_SUBTEST(test_index_pairs_dim<RowMajor>());
+  CALL_SUBTEST(test_index_pairs_dim<ColMajor>());
+  CALL_SUBTEST(test_argmax_pair_reducer<RowMajor>());
+  CALL_SUBTEST(test_argmax_pair_reducer<ColMajor>());
+  CALL_SUBTEST(test_argmin_pair_reducer<RowMajor>());
+  CALL_SUBTEST(test_argmin_pair_reducer<ColMajor>());
   CALL_SUBTEST(test_simple_argmax<RowMajor>());
   CALL_SUBTEST(test_simple_argmax<ColMajor>());
   CALL_SUBTEST(test_simple_argmin<RowMajor>());
diff --git a/libs/eigen/unsupported/test/cxx11_tensor_argmax_sycl.cpp b/libs/eigen/unsupported/test/cxx11_tensor_argmax_sycl.cpp
index 7ac7128..41ea3cf 100644
--- a/libs/eigen/unsupported/test/cxx11_tensor_argmax_sycl.cpp
+++ b/libs/eigen/unsupported/test/cxx11_tensor_argmax_sycl.cpp
@@ -16,7 +16,6 @@
 
 #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
 #define EIGEN_USE_SYCL
-#define EIGEN_HAS_CONSTEXPR 1
 
 #include "main.h"
 
diff --git a/libs/eigen/unsupported/test/cxx11_tensor_assign.cpp b/libs/eigen/unsupported/test/cxx11_tensor_assign.cpp
index ce9d243..015865e 100644
--- a/libs/eigen/unsupported/test/cxx11_tensor_assign.cpp
+++ b/libs/eigen/unsupported/test/cxx11_tensor_assign.cpp
@@ -25,10 +25,8 @@ static void test_1d()
   vec1(4) = 23; vec2(4) = 4;
   vec1(5) = 42; vec2(5) = 5;
 
-  int col_major[6];
-  int row_major[6];
-  memset(col_major, 0, 6*sizeof(int));
-  memset(row_major, 0, 6*sizeof(int));
+  int col_major[6] = {0};
+  int row_major[6] = {0};
   TensorMap<Tensor<int, 1> > vec3(col_major, 6);
   TensorMap<Tensor<int, 1, RowMajor> > vec4(row_major, 6);
 
@@ -88,10 +86,8 @@ static void test_2d()
   mat2(1,1) = 4;
   mat2(1,2) = 5;
 
-  int col_major[6];
-  int row_major[6];
-  memset(col_major, 0, 6*sizeof(int));
-  memset(row_major, 0, 6*sizeof(int));
+  int col_major[6] = {0};
+  int row_major[6] = {0};
   TensorMap<Tensor<int, 2> > mat3(row_major, 2, 3);
   TensorMap<Tensor<int, 2, RowMajor> > mat4(col_major, 2, 3);
 
@@ -148,10 +144,8 @@ static void test_3d()
     }
   }
 
-  int col_major[2*3*7];
-  int row_major[2*3*7];
-  memset(col_major, 0, 2*3*7*sizeof(int));
-  memset(row_major, 0, 2*3*7*sizeof(int));
+  int col_major[2*3*7] = {0};
+  int row_major[2*3*7] = {0};
   TensorMap<Tensor<int, 3> > mat3(col_major, 2, 3, 7);
   TensorMap<Tensor<int, 3, RowMajor> > mat4(row_major, 2, 3, 7);
 
@@ -286,7 +280,6 @@ static void test_compound_assign()
 }
 
 static void test_std_initializers_tensor() {
-#if EIGEN_HAS_VARIADIC_TEMPLATES
   Tensor<int, 1> a(3);
   a.setValues({0, 1, 2});
   VERIFY_IS_EQUAL(a(0), 0);
@@ -355,7 +348,6 @@ static void test_std_initializers_tensor() {
   VERIFY_IS_EQUAL(c(2, 1, 1), 25);
   VERIFY_IS_EQUAL(c(2, 1, 2), 26);
   VERIFY_IS_EQUAL(c(2, 1, 3), 27);
-#endif  // EIGEN_HAS_VARIADIC_TEMPLATES
 }
 
 EIGEN_DECLARE_TEST(cxx11_tensor_assign)
diff --git a/libs/eigen/unsupported/test/cxx11_tensor_block_eval.cpp b/libs/eigen/unsupported/test/cxx11_tensor_block_eval.cpp
index b2e26eb..d66a63e 100644
--- a/libs/eigen/unsupported/test/cxx11_tensor_block_eval.cpp
+++ b/libs/eigen/unsupported/test/cxx11_tensor_block_eval.cpp
@@ -244,7 +244,7 @@ static void test_eval_tensor_binary_with_unary_expr_block() {
   rhs.setRandom();
 
   VerifyBlockEvaluator<T, NumDims, Layout>(
-      (lhs.square() + rhs.square()).sqrt(),
+      (lhs.abs() + rhs.abs()).sqrt(),
       [&dims]() { return RandomBlock<Layout>(dims, 1, 10); });
 }
 
diff --git a/libs/eigen/unsupported/test/cxx11_tensor_broadcasting.cpp b/libs/eigen/unsupported/test/cxx11_tensor_broadcasting.cpp
index d3dab89..2da35e4 100644
--- a/libs/eigen/unsupported/test/cxx11_tensor_broadcasting.cpp
+++ b/libs/eigen/unsupported/test/cxx11_tensor_broadcasting.cpp
@@ -91,15 +91,7 @@ static void test_vectorized_broadcasting()
     }
   }
 
-#if EIGEN_HAS_VARIADIC_TEMPLATES
   tensor.resize(11,3,5);
-#else
-  array<Index, 3> new_dims;
-  new_dims[0] = 11;
-  new_dims[1] = 3;
-  new_dims[2] = 5;
-  tensor.resize(new_dims);
-#endif
 
   tensor.setRandom();
   broadcast = tensor.broadcast(broadcasts);
@@ -124,15 +116,7 @@ static void test_static_broadcasting()
   Tensor<float, 3, DataLayout> tensor(8,3,5);
   tensor.setRandom();
 
-#if defined(EIGEN_HAS_INDEX_LIST)
   Eigen::IndexList<Eigen::type2index<2>, Eigen::type2index<3>, Eigen::type2index<4>> broadcasts;
-#else
-  Eigen::array<int, 3> broadcasts;
-  broadcasts[0] = 2;
-  broadcasts[1] = 3;
-  broadcasts[2] = 4;
-#endif
-
   Tensor<float, 3, DataLayout> broadcast;
   broadcast = tensor.broadcast(broadcasts);
 
@@ -148,15 +132,7 @@ static void test_static_broadcasting()
     }
   }
 
-#if EIGEN_HAS_VARIADIC_TEMPLATES
   tensor.resize(11,3,5);
-#else
-  array<Index, 3> new_dims;
-  new_dims[0] = 11;
-  new_dims[1] = 3;
-  new_dims[2] = 5;
-  tensor.resize(new_dims);
-#endif
 
   tensor.setRandom();
   broadcast = tensor.broadcast(broadcasts);
@@ -256,6 +232,22 @@ static void test_simple_broadcasting_n_by_one()
   }
 }
 
+template <int DataLayout>
+static void test_size_one_broadcasting()
+{
+  Tensor<float, 1, DataLayout> tensor(1);
+  tensor.setRandom();
+  array<ptrdiff_t, 1> broadcasts = {64};
+  Tensor<float, 1, DataLayout> broadcast;
+  broadcast = tensor.broadcast(broadcasts);
+
+  VERIFY_IS_EQUAL(broadcast.dimension(0), broadcasts[0]);
+
+  for (int i = 0; i < broadcasts[0]; ++i) {
+    VERIFY_IS_EQUAL(tensor(0), broadcast(i));
+  }
+}
+
 template <int DataLayout>
 static void test_simple_broadcasting_one_by_n_by_one_1d()
 {
@@ -328,4 +320,6 @@ EIGEN_DECLARE_TEST(cxx11_tensor_broadcasting)
   CALL_SUBTEST(test_simple_broadcasting_one_by_n_by_one_2d<ColMajor>());
   CALL_SUBTEST(test_simple_broadcasting_one_by_n_by_one_1d<RowMajor>());
   CALL_SUBTEST(test_simple_broadcasting_one_by_n_by_one_2d<RowMajor>());
+  CALL_SUBTEST(test_size_one_broadcasting<ColMajor>());
+  CALL_SUBTEST(test_size_one_broadcasting<RowMajor>());
 }
diff --git a/libs/eigen/unsupported/test/cxx11_tensor_builtins_sycl.cpp b/libs/eigen/unsupported/test/cxx11_tensor_builtins_sycl.cpp
index 72cb62f..27a8254 100644
--- a/libs/eigen/unsupported/test/cxx11_tensor_builtins_sycl.cpp
+++ b/libs/eigen/unsupported/test/cxx11_tensor_builtins_sycl.cpp
@@ -38,24 +38,24 @@ template <typename T> T cwiseMin(T x, T y) { return cl::sycl::min(x, y); }
 }
 }
 
-struct EqualAssignement {
+struct EqualAssignment {
   template <typename Lhs, typename Rhs>
   void operator()(Lhs& lhs, const Rhs& rhs) { lhs = rhs; }
 };
 
-struct PlusEqualAssignement {
+struct PlusEqualAssignment {
   template <typename Lhs, typename Rhs>
   void operator()(Lhs& lhs, const Rhs& rhs) { lhs += rhs; }
 };
 
 template <typename DataType, int DataLayout,
-          typename Assignement, typename Operator>
+          typename Assignment, typename Operator>
 void test_unary_builtins_for_scalar(const Eigen::SyclDevice& sycl_device,
                                     const array<int64_t, 3>& tensor_range) {
   Operator op;
-  Assignement asgn;
+  Assignment asgn;
   {
-    /* Assignement(out, Operator(in)) */
+    /* Assignment(out, Operator(in)) */
     Tensor<DataType, 3, DataLayout, int64_t> in(tensor_range);
     Tensor<DataType, 3, DataLayout, int64_t> out(tensor_range);
     in = in.random() + DataType(0.01);
@@ -84,9 +84,10 @@ void test_unary_builtins_for_scalar(const Eigen::SyclDevice& sycl_device,
     sycl_device.deallocate(gpu_data_out);
   }
   {
-    /* Assignement(out, Operator(out)) */
+    /* Assignment(out, Operator(out)) */
     Tensor<DataType, 3, DataLayout, int64_t> out(tensor_range);
-    out = out.random() + DataType(0.01);
+    // Offset with 1 to avoid tiny output (< 1e-6) as they can easily fail.
+    out = out.random() + DataType(1);
     Tensor<DataType, 3, DataLayout, int64_t> reference(out);
     DataType *gpu_data_out = static_cast<DataType *>(
         sycl_device.allocate(out.size() * sizeof(DataType)));
@@ -137,11 +138,11 @@ DECLARE_UNARY_STRUCT(isnan)
 DECLARE_UNARY_STRUCT(isfinite)
 DECLARE_UNARY_STRUCT(isinf)
 
-template <typename DataType, int DataLayout, typename Assignement>
+template <typename DataType, int DataLayout, typename Assignment>
 void test_unary_builtins_for_assignement(const Eigen::SyclDevice& sycl_device,
                                          const array<int64_t, 3>& tensor_range) {
 #define RUN_UNARY_TEST(FUNC) \
-  test_unary_builtins_for_scalar<DataType, DataLayout, Assignement, \
+  test_unary_builtins_for_scalar<DataType, DataLayout, Assignment, \
                                  op_##FUNC>(sycl_device, tensor_range)
   RUN_UNARY_TEST(abs);
   RUN_UNARY_TEST(sqrt);
@@ -190,9 +191,9 @@ template <typename DataType, int DataLayout>
 void test_unary_builtins(const Eigen::SyclDevice& sycl_device,
                          const array<int64_t, 3>& tensor_range) {
   test_unary_builtins_for_assignement<DataType, DataLayout,
-                                      PlusEqualAssignement>(sycl_device, tensor_range);
+                                      PlusEqualAssignment>(sycl_device, tensor_range);
   test_unary_builtins_for_assignement<DataType, DataLayout,
-                                      EqualAssignement>(sycl_device, tensor_range);
+                                      EqualAssignment>(sycl_device, tensor_range);
   test_unary_builtins_return_bool<DataType, DataLayout,
                                   op_isnan>(sycl_device, tensor_range);
   test_unary_builtins_return_bool<DataType, DataLayout,
diff --git a/libs/eigen/unsupported/test/cxx11_tensor_casts.cpp b/libs/eigen/unsupported/test/cxx11_tensor_casts.cpp
index 45456f3..7b67738 100644
--- a/libs/eigen/unsupported/test/cxx11_tensor_casts.cpp
+++ b/libs/eigen/unsupported/test/cxx11_tensor_casts.cpp
@@ -149,7 +149,7 @@ struct test_cast_runner {
 
 // Only certain types allow cast from std::complex<>.
 template<typename Scalar>
-struct test_cast_runner<Scalar, typename internal::enable_if<NumTraits<Scalar>::IsComplex>::type> {
+struct test_cast_runner<Scalar, std::enable_if_t<NumTraits<Scalar>::IsComplex>> {
   static void run() {
     test_type_cast<Scalar, half>();
     test_type_cast<Scalar, bfloat16>();
diff --git a/libs/eigen/unsupported/test/cxx11_tensor_contract_gpu.cu b/libs/eigen/unsupported/test/cxx11_tensor_contract_gpu.cu
index 575bdc1..5abf213 100644
--- a/libs/eigen/unsupported/test/cxx11_tensor_contract_gpu.cu
+++ b/libs/eigen/unsupported/test/cxx11_tensor_contract_gpu.cu
@@ -25,10 +25,6 @@ typedef Tensor<float, 1>::DimensionPair DimPair;
 template<int DataLayout>
 void test_gpu_contraction(int m_size, int k_size, int n_size)
 {
-  std::cout << "Testing for (" << m_size << "," << k_size << "," << n_size << ")" << std::endl;
-  // with these dimensions, the output has 300 * 140 elements, which is
-  // more than 30 * 1024, which is the number of threads in blocks on
-  // a 15 SM GK110 GPU
   Tensor<float, 2, DataLayout> t_left(m_size, k_size);
   Tensor<float, 2, DataLayout> t_right(k_size, n_size);
   Tensor<float, 2, DataLayout> t_result(m_size, n_size);
@@ -171,25 +167,45 @@ void test_gpu_contraction_n() {
 
 template<int DataLayout>
 void test_gpu_contraction_sizes() {
-  int m_sizes[] = { 31,  39,   63,   64,   65,
-                   127, 129,  255,  257 , 511,
-                   512, 513, 1023, 1024, 1025};
+  int m_sizes[3][5] = {{ 31,  39,   63,   64,   65},
+                       {127, 129,  255,  257 , 511},
+                       {512, 513, 1023, 1024, 1025}};
 
-  int n_sizes[] = { 31,  39,   63,   64,   65,
-                   127, 129,  255,  257,  511,
-                   512, 513, 1023, 1024, 1025};
+  int n_sizes[3][5] = {{ 31,  39,   63,   64,   65},
+                       {127, 129,  255,  257,  511},
+                       {512, 513, 1023, 1024, 1025}};
 
-  int k_sizes[] = {  31,   39,  63,  64,   65,
-                     95,   96, 127, 129,  255,
-                    257,  511, 512, 513, 1023,
-                   1024, 1025};
+  int k_sizes[3][6] = {{ 31,   39,  63,  64,   65,   95},
+                       { 96, 127, 129,  255,  257,  511},
+                       {512, 513, 725, 1023, 1024, 1025}};
 
-  for (int i = 0; i < 15; i++) {
-    for (int j = 0; j < 15; j++) {
-      for (int k = 0; k < 17; k++) {
-        test_gpu_contraction<DataLayout>(m_sizes[i], n_sizes[j], k_sizes[k]);
+  // Some selection of specific cases.
+  //  - m changes rows each iteration
+  //  - n changes rows each 3 iterations
+  //  - k changes rows each 9 iterations
+  //  - within a row, advance once column each iteration
+  const int m_cols = 5;
+  const int n_cols = 5;
+  const int k_cols = 6;
+  int m_offset = 0;
+  int n_offset = 1;
+  int k_offset = 2;
+  for (int i = 0; i < 3; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int l = 0; l < 3; ++l) {
+        int m = m_sizes[l][m_offset];
+        int n = n_sizes[j][n_offset];
+        int k = k_sizes[i][k_offset];
+        test_gpu_contraction<DataLayout>(m, n, k);
+        n_offset = (n_offset + 1) % n_cols;
+        k_offset = (k_offset + 1) % k_cols;
+      }
+      m_offset = (m_offset + 1) % m_cols;
+      if (j < 2) {
+        n_offset = (n_offset + n_cols - 3) % n_cols;  // Rewind 3.
       }
     }
+    k_offset = (k_offset + 2 * k_cols - 9) % k_cols;  // Rewind 9.
   }
 }
 
diff --git a/libs/eigen/unsupported/test/cxx11_tensor_custom_index.cpp b/libs/eigen/unsupported/test/cxx11_tensor_custom_index.cpp
index b5dbc97..38ce05b 100644
--- a/libs/eigen/unsupported/test/cxx11_tensor_custom_index.cpp
+++ b/libs/eigen/unsupported/test/cxx11_tensor_custom_index.cpp
@@ -20,7 +20,6 @@ using Eigen::Tensor;
 template <int DataLayout>
 static void test_map_as_index()
 {
-#ifdef EIGEN_HAS_SFINAE
   Tensor<float, 4, DataLayout> tensor(2, 3, 5, 7);
   tensor.setRandom();
 
@@ -35,14 +34,12 @@ static void test_map_as_index()
 
   VERIFY_IS_EQUAL(tensor.coeff(coeffC), tensor.coeff(coeff));
   VERIFY_IS_EQUAL(tensor.coeffRef(coeffC), tensor.coeffRef(coeff));
-#endif
 }
 
 
 template <int DataLayout>
 static void test_matrix_as_index()
 {
-#ifdef EIGEN_HAS_SFINAE
   Tensor<float, 4, DataLayout> tensor(2, 3, 5, 7);
   tensor.setRandom();
 
@@ -53,14 +50,12 @@ static void test_matrix_as_index()
 
   VERIFY_IS_EQUAL(tensor.coeff(coeffC), tensor.coeff(coeff));
   VERIFY_IS_EQUAL(tensor.coeffRef(coeffC), tensor.coeffRef(coeff));
-#endif
 }
 
 
 template <int DataLayout>
 static void test_varlist_as_index()
 {
-#ifdef EIGEN_HAS_SFINAE
   Tensor<float, 4, DataLayout> tensor(2, 3, 5, 7);
   tensor.setRandom();
 
@@ -68,14 +63,12 @@ static void test_varlist_as_index()
 
   VERIFY_IS_EQUAL(tensor.coeff({1,2,4,1}), tensor.coeff(coeff));
   VERIFY_IS_EQUAL(tensor.coeffRef({1,2,4,1}), tensor.coeffRef(coeff));
-#endif
 }
 
 
 template <int DataLayout>
 static void test_sizes_as_index()
 {
-#ifdef EIGEN_HAS_SFINAE
   Tensor<float, 4, DataLayout> tensor(2, 3, 5, 7);
   tensor.setRandom();
 
@@ -84,7 +77,6 @@ static void test_sizes_as_index()
 
   VERIFY_IS_EQUAL(tensor.coeff(coeffC), tensor.coeff(coeff));
   VERIFY_IS_EQUAL(tensor.coeffRef(coeffC), tensor.coeffRef(coeff));
-#endif
 }
 
 
diff --git a/libs/eigen/unsupported/test/cxx11_tensor_device.cu b/libs/eigen/unsupported/test/cxx11_tensor_device.cu
index c9f78d2..58cfc01 100644
--- a/libs/eigen/unsupported/test/cxx11_tensor_device.cu
+++ b/libs/eigen/unsupported/test/cxx11_tensor_device.cu
@@ -14,6 +14,7 @@
 #define EIGEN_USE_GPU
 
 #include "main.h"
+#include "OffByOneScalar.h"
 #include <unsupported/Eigen/CXX11/Tensor>
 
 #include <unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h>
@@ -175,6 +176,44 @@ void test_3d_convolution(Context* context)
   context->out().slice(indices, sizes).device(context->device()) = context->in1().convolve(context->kernel3d(), dims);
 }
 
+// Helper method to synchronize device.
+template<typename Device>
+void synchronize(Device& device) { /*nothing*/ }
+template<>
+void synchronize(Eigen::GpuDevice& device) {
+  device.synchronize();
+}
+
+template <typename DataType, typename TensorDevice>
+void test_device_memory(const TensorDevice& device) {
+  int count = 100;
+  Eigen::array<int, 1> tensorRange = {{count}};
+  Eigen::Tensor<DataType, 1> host(tensorRange);
+  Eigen::Tensor<DataType, 1> expected(tensorRange);
+  DataType* device_data  = static_cast<DataType*>(device.allocate(count * sizeof(DataType)));
+  
+  // memset
+  const char byte_value = static_cast<char>(0xAB);
+  device.memset(device_data, byte_value, count * sizeof(DataType));
+  device.memcpyDeviceToHost(host.data(), device_data, count * sizeof(DataType));
+  synchronize(device);
+  memset(expected.data(), byte_value, count * sizeof(DataType));
+  for (size_t i=0; i<count; i++) {
+    VERIFY_IS_EQUAL(host(i), expected(i));
+  }
+  
+  // fill
+  DataType fill_value = DataType(7);
+  std::fill_n(expected.data(), count, fill_value);
+  device.fill(device_data, device_data + count, fill_value);
+  device.memcpyDeviceToHost(host.data(), device_data, count * sizeof(DataType));
+  synchronize(device);
+  for (int i=0; i<count; i++) {
+    VERIFY_IS_EQUAL(host(i), expected(i));
+  }
+  
+  device.deallocate(device_data);
+}
 
 void test_cpu() {
   Eigen::Tensor<float, 3> in1(40,50,70);
@@ -266,6 +305,9 @@ void test_cpu() {
       }
     }
   }
+  
+  test_device_memory<float>(context.device());
+  test_device_memory<OffByOneScalar<int>>(context.device());
 }
 
 void test_gpu() {
@@ -386,6 +428,8 @@ void test_gpu() {
 
 #endif
  
+  test_device_memory<float>(context.device());
+  test_device_memory<OffByOneScalar<int>>(context.device());
 }
 
 
diff --git a/libs/eigen/unsupported/test/cxx11_tensor_device_sycl.cpp b/libs/eigen/unsupported/test/cxx11_tensor_device_sycl.cpp
index 5095cb0..d7ff38d 100644
--- a/libs/eigen/unsupported/test/cxx11_tensor_device_sycl.cpp
+++ b/libs/eigen/unsupported/test/cxx11_tensor_device_sycl.cpp
@@ -18,26 +18,36 @@
 #define EIGEN_USE_SYCL
 
 #include "main.h"
+#include "OffByOneScalar.h"
 #include <unsupported/Eigen/CXX11/Tensor>
 #include <stdint.h>
 #include <iostream>
 
 template <typename DataType, int DataLayout, typename IndexType>
 void test_device_memory(const Eigen::SyclDevice &sycl_device) {
-  std::cout << "Running on : "
-            << sycl_device.sycl_queue().get_device(). template get_info<cl::sycl::info::device::name>()
-            <<std::endl;
   IndexType sizeDim1 = 100;
   array<IndexType, 1> tensorRange = {{sizeDim1}};
   Tensor<DataType, 1, DataLayout,IndexType> in(tensorRange);
   Tensor<DataType, 1, DataLayout,IndexType> in1(tensorRange);
-  memset(in1.data(), 1, in1.size() * sizeof(DataType));
   DataType* gpu_in_data  = static_cast<DataType*>(sycl_device.allocate(in.size()*sizeof(DataType)));
+  
+  // memset
+  memset(in1.data(), 1, in1.size() * sizeof(DataType));
   sycl_device.memset(gpu_in_data, 1, in.size()*sizeof(DataType));
   sycl_device.memcpyDeviceToHost(in.data(), gpu_in_data, in.size()*sizeof(DataType));
   for (IndexType i=0; i<in.size(); i++) {
     VERIFY_IS_EQUAL(in(i), in1(i));
   }
+  
+  // fill
+  DataType value = DataType(7);
+  std::fill_n(in1.data(), in1.size(), value);
+  sycl_device.fill(gpu_in_data, gpu_in_data + in.size(), value);
+  sycl_device.memcpyDeviceToHost(in.data(), gpu_in_data, in.size()*sizeof(DataType));
+  for (IndexType i=0; i<in.size(); i++) {
+    VERIFY_IS_EQUAL(in(i), in1(i));
+  }
+
   sycl_device.deallocate(gpu_in_data);
 }
 
@@ -58,6 +68,31 @@ void test_device_exceptions(const Eigen::SyclDevice &sycl_device) {
   sycl_device.deallocate(gpu_data);
 }
 
+template<typename DataType, int DataLayout, typename IndexType>
+void test_device_attach_buffer(const Eigen::SyclDevice &sycl_device) {
+  IndexType sizeDim1 = 100;
+  
+  array<IndexType, 1> tensorRange = {{sizeDim1}};
+  Tensor<DataType, 1, DataLayout, IndexType> in(tensorRange);
+  
+  cl::sycl::buffer<buffer_scalar_t, 1> buffer(cl::sycl::range<1>(sizeDim1 * sizeof(DataType)));
+  DataType* gpu_in_data = static_cast<DataType*>(sycl_device.attach_buffer(buffer));
+  
+  // fill
+  DataType value = DataType(7);
+  std::fill_n(in.data(), in.size(), value);
+  sycl_device.fill(gpu_in_data, gpu_in_data + in.size(), value);
+  
+  // Check that buffer is filled with the correct value.
+  auto reint = buffer.reinterpret<DataType>(cl::sycl::range<1>(sizeDim1));
+  auto access = reint.template get_access<cl::sycl::access::mode::read>();
+  for (IndexType i=0; i<in.size(); i++) {
+    VERIFY_IS_EQUAL(in(i), access[i]);
+  }
+  
+  sycl_device.detach_buffer(gpu_in_data);
+}
+
 template<typename DataType> void sycl_device_test_per_device(const cl::sycl::device& d){
   std::cout << "Running on " << d.template get_info<cl::sycl::info::device::name>() << std::endl;
   QueueInterface queueInterface(d);
@@ -68,10 +103,12 @@ template<typename DataType> void sycl_device_test_per_device(const cl::sycl::dev
   //test_device_exceptions<DataType, RowMajor>(sycl_device);
   /// this test throw an exception. enable it if you want to see the exception
   //test_device_exceptions<DataType, ColMajor>(sycl_device);
+  test_device_attach_buffer<DataType, ColMajor, int64_t>(sycl_device);
 }
 
 EIGEN_DECLARE_TEST(cxx11_tensor_device_sycl) {
   for (const auto& device :Eigen::get_sycl_supported_devices()) {
     CALL_SUBTEST(sycl_device_test_per_device<float>(device));
+    CALL_SUBTEST(sycl_device_test_per_device<OffByOneScalar<int>>(device));
   }
 }
diff --git a/libs/eigen/unsupported/test/cxx11_tensor_executor.cpp b/libs/eigen/unsupported/test/cxx11_tensor_executor.cpp
index 66b06e8..d0103dc 100644
--- a/libs/eigen/unsupported/test/cxx11_tensor_executor.cpp
+++ b/libs/eigen/unsupported/test/cxx11_tensor_executor.cpp
@@ -612,43 +612,42 @@ static void test_async_execute_binary_expr(Device d)
   }
 }
 
-#ifdef EIGEN_DONT_VECTORIZE
-#define VECTORIZABLE(VAL) !EIGEN_DONT_VECTORIZE && VAL
-#else
-#define VECTORIZABLE(VAL) VAL
+#ifndef EIGEN_DONT_VECTORIZE
+#define EIGEN_DONT_VECTORIZE 0
 #endif
+#define VECTORIZABLE(T, VAL) !EIGEN_DONT_VECTORIZE && Eigen::internal::packet_traits<T>::Vectorizable && VAL
 
 #define CALL_SUBTEST_PART(PART) \
   CALL_SUBTEST_##PART
 
 #define CALL_SUBTEST_COMBINATIONS(PART, NAME, T, NUM_DIMS)                                                                                 \
-  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice,    false,               TiledEvaluation::Off,     ColMajor>(default_device))); \
-  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice,    false,               TiledEvaluation::On,  ColMajor>(default_device)));     \
-  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice,    VECTORIZABLE(true),  TiledEvaluation::Off,     ColMajor>(default_device))); \
-  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice,    VECTORIZABLE(true),  TiledEvaluation::On,  ColMajor>(default_device)));     \
-  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice,    false,               TiledEvaluation::Off,     RowMajor>(default_device))); \
-  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice,    false,               TiledEvaluation::On,  RowMajor>(default_device)));     \
-  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice,    VECTORIZABLE(true),  TiledEvaluation::Off,     RowMajor>(default_device))); \
-  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice,    VECTORIZABLE(true),  TiledEvaluation::On,  RowMajor>(default_device)));     \
-  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false,               TiledEvaluation::Off,     ColMajor>(tp_device)));      \
-  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false,               TiledEvaluation::On,  ColMajor>(tp_device)));          \
-  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true),  TiledEvaluation::Off,     ColMajor>(tp_device)));      \
-  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true),  TiledEvaluation::On,  ColMajor>(tp_device)));          \
-  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false,               TiledEvaluation::Off,     RowMajor>(tp_device)));      \
-  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false,               TiledEvaluation::On,  RowMajor>(tp_device)));          \
-  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true),  TiledEvaluation::Off,     RowMajor>(tp_device)));      \
-  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true),  TiledEvaluation::On,  RowMajor>(tp_device)))
+  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice,    false,                  TiledEvaluation::Off, ColMajor>(default_device))); \
+  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice,    false,                  TiledEvaluation::On,  ColMajor>(default_device)));     \
+  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice,    VECTORIZABLE(T, true),  TiledEvaluation::Off, ColMajor>(default_device))); \
+  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice,    VECTORIZABLE(T, true),  TiledEvaluation::On,  ColMajor>(default_device)));     \
+  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice,    false,                  TiledEvaluation::Off, RowMajor>(default_device))); \
+  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice,    false,                  TiledEvaluation::On,  RowMajor>(default_device)));     \
+  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice,    VECTORIZABLE(T, true),  TiledEvaluation::Off, RowMajor>(default_device))); \
+  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice,    VECTORIZABLE(T, true),  TiledEvaluation::On,  RowMajor>(default_device)));     \
+  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false,                  TiledEvaluation::Off, ColMajor>(tp_device)));      \
+  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false,                  TiledEvaluation::On,  ColMajor>(tp_device)));          \
+  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(T, true),  TiledEvaluation::Off, ColMajor>(tp_device)));      \
+  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(T, true),  TiledEvaluation::On,  ColMajor>(tp_device)));          \
+  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false,                  TiledEvaluation::Off, RowMajor>(tp_device)));      \
+  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false,                  TiledEvaluation::On,  RowMajor>(tp_device)));          \
+  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(T, true),  TiledEvaluation::Off, RowMajor>(tp_device)));      \
+  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(T, true),  TiledEvaluation::On,  RowMajor>(tp_device)))
 
 // NOTE: Currently only ThreadPoolDevice supports async expression evaluation.
 #define CALL_ASYNC_SUBTEST_COMBINATIONS(PART, NAME, T, NUM_DIMS)                                                                      \
-  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false,               TiledEvaluation::Off,     ColMajor>(tp_device))); \
-  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false,               TiledEvaluation::On,  ColMajor>(tp_device)));     \
-  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true),  TiledEvaluation::Off,     ColMajor>(tp_device))); \
-  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true),  TiledEvaluation::On,  ColMajor>(tp_device)));     \
-  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false,               TiledEvaluation::Off,     RowMajor>(tp_device))); \
-  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false,               TiledEvaluation::On,  RowMajor>(tp_device)));     \
-  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true),  TiledEvaluation::Off,     RowMajor>(tp_device))); \
-  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true),  TiledEvaluation::On,  RowMajor>(tp_device)))
+  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false,                  TiledEvaluation::Off, ColMajor>(tp_device))); \
+  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false,                  TiledEvaluation::On,  ColMajor>(tp_device)));     \
+  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(T, true),  TiledEvaluation::Off, ColMajor>(tp_device))); \
+  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(T, true),  TiledEvaluation::On,  ColMajor>(tp_device)));     \
+  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false,                  TiledEvaluation::Off, RowMajor>(tp_device))); \
+  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false,                  TiledEvaluation::On,  RowMajor>(tp_device)));     \
+  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(T, true),  TiledEvaluation::Off, RowMajor>(tp_device))); \
+  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(T, true),  TiledEvaluation::On,  RowMajor>(tp_device)))
 
 EIGEN_DECLARE_TEST(cxx11_tensor_executor) {
   Eigen::DefaultDevice default_device;
diff --git a/libs/eigen/unsupported/test/cxx11_tensor_expr.cpp b/libs/eigen/unsupported/test/cxx11_tensor_expr.cpp
index 169fc18..ddc8132 100644
--- a/libs/eigen/unsupported/test/cxx11_tensor_expr.cpp
+++ b/libs/eigen/unsupported/test/cxx11_tensor_expr.cpp
@@ -130,7 +130,7 @@ static void test_3d()
   Tensor<float, 3, RowMajor> mat4(2,3,7);
   mat4 = mat2 * 3.14f;
   Tensor<float, 3> mat5(2,3,7);
-  mat5 = mat1.inverse().log();
+  mat5 = (mat1 + mat1.constant(1)).inverse().log();
   Tensor<float, 3, RowMajor> mat6(2,3,7);
   mat6 = mat2.pow(0.5f) * 3.14f;
   Tensor<float, 3> mat7(2,3,7);
@@ -150,7 +150,7 @@ static void test_3d()
       for (int k = 0; k < 7; ++k) {
         VERIFY_IS_APPROX(mat3(i,j,k), val + val);
         VERIFY_IS_APPROX(mat4(i,j,k), val * 3.14f);
-        VERIFY_IS_APPROX(mat5(i,j,k), logf(1.0f/val));
+        VERIFY_IS_APPROX(mat5(i,j,k), logf(1.0f/(val + 1)));
         VERIFY_IS_APPROX(mat6(i,j,k), sqrtf(val) * 3.14f);
         VERIFY_IS_APPROX(mat7(i,j,k), expf((std::max)(val, mat5(i,j,k) * 2.0f)));
         VERIFY_IS_APPROX(mat8(i,j,k), expf(-val) * 3.14f);
@@ -305,10 +305,10 @@ void test_minmax_nan_propagation_templ() {
     const Scalar kNaN = std::numeric_limits<Scalar>::quiet_NaN();
     const Scalar kInf = std::numeric_limits<Scalar>::infinity();
     const Scalar kZero(0);
-    Tensor<Scalar, 1> vec_all_nan(size);
+    Tensor<Scalar, 1> vec_full_nan(size);
     Tensor<Scalar, 1> vec_one_nan(size);
     Tensor<Scalar, 1> vec_zero(size);
-    vec_all_nan.setConstant(kNaN);
+    vec_full_nan.setConstant(kNaN);
     vec_zero.setZero();
     vec_one_nan.setZero();
     vec_one_nan(size/2) = kNaN;
@@ -330,12 +330,12 @@ void test_minmax_nan_propagation_templ() {
     // max(nan, 0) = nan
     // max(0, nan) = nan
     // max(0, 0) = 0
-    verify_all_nan(vec_all_nan.template cwiseMax<PropagateNaN>(kNaN));
-    verify_all_nan(vec_all_nan.template cwiseMax<PropagateNaN>(vec_all_nan));
-    verify_all_nan(vec_all_nan.template cwiseMax<PropagateNaN>(kZero));
-    verify_all_nan(vec_all_nan.template cwiseMax<PropagateNaN>(vec_zero));
+    verify_all_nan(vec_full_nan.template cwiseMax<PropagateNaN>(kNaN));
+    verify_all_nan(vec_full_nan.template cwiseMax<PropagateNaN>(vec_full_nan));
+    verify_all_nan(vec_full_nan.template cwiseMax<PropagateNaN>(kZero));
+    verify_all_nan(vec_full_nan.template cwiseMax<PropagateNaN>(vec_zero));
     verify_all_nan(vec_zero.template cwiseMax<PropagateNaN>(kNaN));
-    verify_all_nan(vec_zero.template cwiseMax<PropagateNaN>(vec_all_nan));
+    verify_all_nan(vec_zero.template cwiseMax<PropagateNaN>(vec_full_nan));
     verify_all_zero(vec_zero.template cwiseMax<PropagateNaN>(kZero));
     verify_all_zero(vec_zero.template cwiseMax<PropagateNaN>(vec_zero));
 
@@ -344,12 +344,12 @@ void test_minmax_nan_propagation_templ() {
     // max(nan, 0) = 0
     // max(0, nan) = 0
     // max(0, 0) = 0
-    verify_all_nan(vec_all_nan.template cwiseMax<PropagateNumbers>(kNaN));
-    verify_all_nan(vec_all_nan.template cwiseMax<PropagateNumbers>(vec_all_nan));
-    verify_all_zero(vec_all_nan.template cwiseMax<PropagateNumbers>(kZero));
-    verify_all_zero(vec_all_nan.template cwiseMax<PropagateNumbers>(vec_zero));
+    verify_all_nan(vec_full_nan.template cwiseMax<PropagateNumbers>(kNaN));
+    verify_all_nan(vec_full_nan.template cwiseMax<PropagateNumbers>(vec_full_nan));
+    verify_all_zero(vec_full_nan.template cwiseMax<PropagateNumbers>(kZero));
+    verify_all_zero(vec_full_nan.template cwiseMax<PropagateNumbers>(vec_zero));
     verify_all_zero(vec_zero.template cwiseMax<PropagateNumbers>(kNaN));
-    verify_all_zero(vec_zero.template cwiseMax<PropagateNumbers>(vec_all_nan));
+    verify_all_zero(vec_zero.template cwiseMax<PropagateNumbers>(vec_full_nan));
     verify_all_zero(vec_zero.template cwiseMax<PropagateNumbers>(kZero));
     verify_all_zero(vec_zero.template cwiseMax<PropagateNumbers>(vec_zero));
 
@@ -358,12 +358,12 @@ void test_minmax_nan_propagation_templ() {
     // min(nan, 0) = nan
     // min(0, nan) = nan
     // min(0, 0) = 0
-    verify_all_nan(vec_all_nan.template cwiseMin<PropagateNaN>(kNaN));
-    verify_all_nan(vec_all_nan.template cwiseMin<PropagateNaN>(vec_all_nan));
-    verify_all_nan(vec_all_nan.template cwiseMin<PropagateNaN>(kZero));
-    verify_all_nan(vec_all_nan.template cwiseMin<PropagateNaN>(vec_zero));
+    verify_all_nan(vec_full_nan.template cwiseMin<PropagateNaN>(kNaN));
+    verify_all_nan(vec_full_nan.template cwiseMin<PropagateNaN>(vec_full_nan));
+    verify_all_nan(vec_full_nan.template cwiseMin<PropagateNaN>(kZero));
+    verify_all_nan(vec_full_nan.template cwiseMin<PropagateNaN>(vec_zero));
     verify_all_nan(vec_zero.template cwiseMin<PropagateNaN>(kNaN));
-    verify_all_nan(vec_zero.template cwiseMin<PropagateNaN>(vec_all_nan));
+    verify_all_nan(vec_zero.template cwiseMin<PropagateNaN>(vec_full_nan));
     verify_all_zero(vec_zero.template cwiseMin<PropagateNaN>(kZero));
     verify_all_zero(vec_zero.template cwiseMin<PropagateNaN>(vec_zero));
 
@@ -372,12 +372,12 @@ void test_minmax_nan_propagation_templ() {
     // min(nan, 0) = 0
     // min(0, nan) = 0
     // min(0, 0) = 0
-    verify_all_nan(vec_all_nan.template cwiseMin<PropagateNumbers>(kNaN));
-    verify_all_nan(vec_all_nan.template cwiseMin<PropagateNumbers>(vec_all_nan));
-    verify_all_zero(vec_all_nan.template cwiseMin<PropagateNumbers>(kZero));
-    verify_all_zero(vec_all_nan.template cwiseMin<PropagateNumbers>(vec_zero));
+    verify_all_nan(vec_full_nan.template cwiseMin<PropagateNumbers>(kNaN));
+    verify_all_nan(vec_full_nan.template cwiseMin<PropagateNumbers>(vec_full_nan));
+    verify_all_zero(vec_full_nan.template cwiseMin<PropagateNumbers>(kZero));
+    verify_all_zero(vec_full_nan.template cwiseMin<PropagateNumbers>(vec_zero));
     verify_all_zero(vec_zero.template cwiseMin<PropagateNumbers>(kNaN));
-    verify_all_zero(vec_zero.template cwiseMin<PropagateNumbers>(vec_all_nan));
+    verify_all_zero(vec_zero.template cwiseMin<PropagateNumbers>(vec_full_nan));
     verify_all_zero(vec_zero.template cwiseMin<PropagateNumbers>(kZero));
     verify_all_zero(vec_zero.template cwiseMin<PropagateNumbers>(vec_zero));
 
@@ -397,13 +397,13 @@ void test_minmax_nan_propagation_templ() {
     VERIFY_IS_EQUAL(val(), kZero);
 
     // Test NaN propagation for tensor of all NaNs.
-    val = vec_all_nan.template minimum<PropagateNaN>();
+    val = vec_full_nan.template minimum<PropagateNaN>();
     VERIFY((numext::isnan)(val()));
-    val = vec_all_nan.template minimum<PropagateNumbers>();
+    val = vec_full_nan.template minimum<PropagateNumbers>();
     VERIFY_IS_EQUAL(val(), kInf);
-    val = vec_all_nan.template maximum<PropagateNaN>();
+    val = vec_full_nan.template maximum<PropagateNaN>();
     VERIFY((numext::isnan)(val()));
-    val = vec_all_nan.template maximum<PropagateNumbers>();
+    val = vec_full_nan.template maximum<PropagateNumbers>();
     VERIFY_IS_EQUAL(val(), -kInf);
 
     // Test NaN propagation for tensor with a single NaN.
diff --git a/libs/eigen/unsupported/test/cxx11_tensor_fft.cpp b/libs/eigen/unsupported/test/cxx11_tensor_fft.cpp
index 2e1008e..ff8ebcf 100644
--- a/libs/eigen/unsupported/test/cxx11_tensor_fft.cpp
+++ b/libs/eigen/unsupported/test/cxx11_tensor_fft.cpp
@@ -186,7 +186,7 @@ static void test_fft_real_input_energy() {
   }
   const DSizes<ptrdiff_t, TensorRank> arr = dimensions;
 
-  typedef typename internal::conditional<isComplexInput == true, std::complex<RealScalar>, RealScalar>::type InputScalar;
+  typedef std::conditional_t<isComplexInput == true, std::complex<RealScalar>, RealScalar> InputScalar;
 
   Tensor<InputScalar, TensorRank, DataLayout> input;
   input.resize(arr);
@@ -197,7 +197,7 @@ static void test_fft_real_input_energy() {
     fft[i] = i;
   }
 
-  typedef typename internal::conditional<FFTResultType == Eigen::BothParts, std::complex<RealScalar>, RealScalar>::type OutputScalar;
+  typedef std::conditional_t<FFTResultType == Eigen::BothParts, std::complex<RealScalar>, RealScalar> OutputScalar;
   Tensor<OutputScalar, TensorRank, DataLayout> output;
   output = input.template fft<FFTResultType, FFTDirection>(fft);
 
diff --git a/libs/eigen/unsupported/test/cxx11_tensor_gpu.cu b/libs/eigen/unsupported/test/cxx11_tensor_gpu.cu
index 137d0d5..7b3fb5a 100644
--- a/libs/eigen/unsupported/test/cxx11_tensor_gpu.cu
+++ b/libs/eigen/unsupported/test/cxx11_tensor_gpu.cu
@@ -17,8 +17,6 @@
 
 #include <unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h>
 
-#define EIGEN_GPU_TEST_C99_MATH  EIGEN_HAS_CXX11
-
 using Eigen::Tensor;
 
 void test_gpu_nullary() {
@@ -66,6 +64,47 @@ void test_gpu_nullary() {
   gpuFree(d_in2);
 }
 
+// Tests that there are no indexing overflows when computing tensors with the
+// max representable size.
+template <typename IndexType,
+          IndexType N = (std::numeric_limits<IndexType>::max)()>
+void test_gpu_nullary_max_size()
+{
+  typedef int8_t DataType;
+  typedef Tensor<DataType, 1, 0, IndexType> TensorType;
+  typedef Eigen::array<IndexType, 1> ArrayType;
+
+  const IndexType n = N;
+  TensorType in1((ArrayType(n)));
+  in1.setZero();
+
+  std::size_t in1_bytes = in1.size() * sizeof(DataType);
+
+  DataType* d_in1;
+  gpuMalloc((void**)(&d_in1), in1_bytes);
+
+  gpuMemcpy(d_in1, in1.data(), in1_bytes, gpuMemcpyHostToDevice);
+
+  Eigen::GpuStreamDevice stream;
+  Eigen::GpuDevice gpu_device(&stream);
+
+  Eigen::TensorMap<TensorType> gpu_in1(d_in1, ArrayType(n));
+
+  gpu_in1.device(gpu_device) = gpu_in1.constant(123);
+
+  TensorType new1((ArrayType(n)));
+
+  assert(gpuMemcpyAsync(new1.data(), d_in1, in1_bytes, gpuMemcpyDeviceToHost,
+                        gpu_device.stream()) == gpuSuccess);
+  assert(gpuStreamSynchronize(gpu_device.stream()) == gpuSuccess);
+
+  for (IndexType i = 0; i < n; ++i) {
+    VERIFY_IS_EQUAL(new1(ArrayType(i)), 123);
+  }
+
+  gpuFree(d_in1);
+}
+
 void test_gpu_elementwise_small() {
   Tensor<float, 1> in1(Eigen::array<Eigen::DenseIndex, 1>(2));
   Tensor<float, 1> in2(Eigen::array<Eigen::DenseIndex, 1>(2));
@@ -619,7 +658,6 @@ void test_gpu_convolution_3d()
 }
 
 
-#if EIGEN_GPU_TEST_C99_MATH
 template <typename Scalar>
 void test_gpu_lgamma(const Scalar stddev)
 {
@@ -658,7 +696,6 @@ void test_gpu_lgamma(const Scalar stddev)
   gpuFree(d_in);
   gpuFree(d_out);
 }
-#endif
 
 template <typename Scalar>
 void test_gpu_digamma()
@@ -681,8 +718,8 @@ void test_gpu_digamma()
   expected_out(2) = Scalar(1.2561176684318);
   expected_out(3) = Scalar(2.398239129535781);
   expected_out(4) = Scalar(9.210340372392849);
-  expected_out(5) = std::numeric_limits<Scalar>::infinity();
-  expected_out(6) = std::numeric_limits<Scalar>::infinity();
+  expected_out(5) = std::numeric_limits<Scalar>::quiet_NaN();
+  expected_out(6) = std::numeric_limits<Scalar>::quiet_NaN();
 
   std::size_t bytes = in.size() * sizeof(Scalar);
 
@@ -704,11 +741,8 @@ void test_gpu_digamma()
   assert(gpuMemcpyAsync(out.data(), d_out, bytes, gpuMemcpyDeviceToHost, gpu_device.stream()) == gpuSuccess);
   assert(gpuStreamSynchronize(gpu_device.stream()) == gpuSuccess);
 
-  for (int i = 0; i < 5; ++i) {
-    VERIFY_IS_APPROX(out(i), expected_out(i));
-  }
-  for (int i = 5; i < 7; ++i) {
-    VERIFY_IS_EQUAL(out(i), expected_out(i));
+  for (int i = 0; i < 7; ++i) {
+    VERIFY_IS_CWISE_APPROX(out(i), expected_out(i));
   }
 
   gpuFree(d_in);
@@ -741,7 +775,7 @@ void test_gpu_zeta()
   expected_out(0) = std::numeric_limits<Scalar>::infinity();
   expected_out(1) = Scalar(1.61237534869);
   expected_out(2) = Scalar(0.234848505667);
-  expected_out(3) = Scalar(1.03086757337e-5);
+  expected_out(3) = std::numeric_limits<Scalar>::quiet_NaN();
   expected_out(4) = Scalar(0.367879440865);
   expected_out(5) = Scalar(0.054102025820864097);
 
@@ -769,13 +803,8 @@ void test_gpu_zeta()
   assert(gpuMemcpyAsync(out.data(), d_out, bytes, gpuMemcpyDeviceToHost, gpu_device.stream()) == gpuSuccess);
   assert(gpuStreamSynchronize(gpu_device.stream()) == gpuSuccess);
 
-  VERIFY_IS_EQUAL(out(0), expected_out(0));
-  VERIFY((std::isnan)(out(3)));
-
-  for (int i = 1; i < 6; ++i) {
-    if (i != 3) {
-      VERIFY_IS_APPROX(out(i), expected_out(i));
-    }
+  for (int i = 0; i < 6; ++i) {
+    VERIFY_IS_CWISE_APPROX(out(i), expected_out(i));
   }
 
   gpuFree(d_in_x);
@@ -990,7 +1019,6 @@ void test_gpu_igammac()
   gpuFree(d_out);
 }
 
-#if EIGEN_GPU_TEST_C99_MATH
 template <typename Scalar>
 void test_gpu_erf(const Scalar stddev)
 {
@@ -1068,7 +1096,7 @@ void test_gpu_erfc(const Scalar stddev)
   gpuFree(d_in);
   gpuFree(d_out);
 }
-#endif
+
 template <typename Scalar>
 void test_gpu_ndtri()
 {
@@ -1117,13 +1145,8 @@ void test_gpu_ndtri()
   assert(gpuMemcpyAsync(out.data(), d_out, bytes, gpuMemcpyDeviceToHost, gpu_device.stream()) == gpuSuccess);
   assert(gpuStreamSynchronize(gpu_device.stream()) == gpuSuccess);
 
-  VERIFY_IS_EQUAL(out(0), expected_out(0));
-  VERIFY((std::isnan)(out(3)));
-
-  for (int i = 1; i < 6; ++i) {
-    if (i != 3) {
-      VERIFY_IS_APPROX(out(i), expected_out(i));
-    }
+  for (int i = 0; i < 6; ++i) {    
+    VERIFY_IS_CWISE_APPROX(out(i), expected_out(i));
   }
 
   gpuFree(d_in_x);
@@ -1262,12 +1285,8 @@ void test_gpu_betainc()
   assert(gpuMemcpyAsync(out.data(), d_out, bytes, gpuMemcpyDeviceToHost, gpu_device.stream()) == gpuSuccess);
   assert(gpuStreamSynchronize(gpu_device.stream()) == gpuSuccess);
 
-  for (int i = 1; i < 125; ++i) {
-    if ((std::isnan)(expected_out(i))) {
-      VERIFY((std::isnan)(out(i)));
-    } else {
-      VERIFY_IS_APPROX(out(i), expected_out(i));
-    }
+  for (int i = 0; i < 125; ++i) {
+    VERIFY_IS_CWISE_APPROX(out(i), expected_out(i));
   }
 
   gpuFree(d_in_x);
@@ -1541,6 +1560,10 @@ void test_gpu_gamma_sample_der_alpha()
 EIGEN_DECLARE_TEST(cxx11_tensor_gpu)
 {
   CALL_SUBTEST_1(test_gpu_nullary());
+  CALL_SUBTEST_1(test_gpu_nullary_max_size<int16_t>());
+  CALL_SUBTEST_1(test_gpu_nullary_max_size<int32_t>());
+  CALL_SUBTEST_1((test_gpu_nullary_max_size<
+                  int64_t, (std::numeric_limits<int32_t>::max)() + 100ll>()));
   CALL_SUBTEST_1(test_gpu_elementwise_small());
   CALL_SUBTEST_1(test_gpu_elementwise());
   CALL_SUBTEST_1(test_gpu_props());
@@ -1560,7 +1583,6 @@ EIGEN_DECLARE_TEST(cxx11_tensor_gpu)
   CALL_SUBTEST_3(test_gpu_convolution_3d<RowMajor>());
 #endif
 
-#if EIGEN_GPU_TEST_C99_MATH
   // std::erf, std::erfc, and so on where only added in c++11. We use them
   // as a golden reference to validate the results produced by Eigen. Therefore
   // we can only run these tests if we use a c++11 compiler.
@@ -1638,6 +1660,4 @@ EIGEN_DECLARE_TEST(cxx11_tensor_gpu)
   CALL_SUBTEST_6(test_gpu_gamma_sample_der_alpha<float>());
   CALL_SUBTEST_6(test_gpu_gamma_sample_der_alpha<double>());
 #endif
-
-#endif
 }
diff --git a/libs/eigen/unsupported/test/cxx11_tensor_index_list.cpp b/libs/eigen/unsupported/test/cxx11_tensor_index_list.cpp
index 2166532..9cbdaa7 100644
--- a/libs/eigen/unsupported/test/cxx11_tensor_index_list.cpp
+++ b/libs/eigen/unsupported/test/cxx11_tensor_index_list.cpp
@@ -11,8 +11,6 @@
 
 #include <Eigen/CXX11/Tensor>
 
-#ifdef EIGEN_HAS_INDEX_LIST
-
 static void test_static_index_list()
 {
   Tensor<float, 4> tensor(2,3,5,7);
@@ -26,6 +24,8 @@ static void test_static_index_list()
   VERIFY_IS_EQUAL(static_cast<Index>(reduction_axis[1]), 1);
   VERIFY_IS_EQUAL(static_cast<Index>(reduction_axis[2]), 2);
 
+  VERIFY_IS_EQUAL(reduction_axis.size(), std::size_t(3));
+
   EIGEN_STATIC_ASSERT((internal::array_get<0>(reduction_axis) == 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
   EIGEN_STATIC_ASSERT((internal::array_get<1>(reduction_axis) == 1), YOU_MADE_A_PROGRAMMING_MISTAKE);
   EIGEN_STATIC_ASSERT((internal::array_get<2>(reduction_axis) == 2), YOU_MADE_A_PROGRAMMING_MISTAKE);
@@ -370,16 +370,12 @@ static void test_dim_check()
 }
 
 
-#endif
-
 EIGEN_DECLARE_TEST(cxx11_tensor_index_list)
 {
-#ifdef EIGEN_HAS_INDEX_LIST
   CALL_SUBTEST(test_static_index_list());
   CALL_SUBTEST(test_type2index_list());
   CALL_SUBTEST(test_type2indexpair_list());
   CALL_SUBTEST(test_dynamic_index_list());
   CALL_SUBTEST(test_mixed_index_list());
   CALL_SUBTEST(test_dim_check());
-#endif
 }
diff --git a/libs/eigen/unsupported/test/cxx11_tensor_io.cpp b/libs/eigen/unsupported/test/cxx11_tensor_io.cpp
index 2c638f9..c0ea2a1 100644
--- a/libs/eigen/unsupported/test/cxx11_tensor_io.cpp
+++ b/libs/eigen/unsupported/test/cxx11_tensor_io.cpp
@@ -6,131 +6,137 @@
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
 #include "main.h"
+
 #include <sstream>
-#include <string>
 #include <Eigen/CXX11/Tensor>
 
+template <typename Scalar, int rank, int Layout>
+struct test_tensor_ostream_impl {};
 
-template<int DataLayout>
-static void test_output_0d()
-{
-  Tensor<int, 0, DataLayout> tensor;
-  tensor() = 123;
-
-  std::stringstream os;
-  os << tensor;
-
-  std::string expected("123");
-  VERIFY_IS_EQUAL(std::string(os.str()), expected);
-}
-
-
-template<int DataLayout>
-static void test_output_1d()
-{
-  Tensor<int, 1, DataLayout> tensor(5);
-  for (int i = 0; i < 5; ++i) {
-    tensor(i) = i;
+template<typename Scalar, int Layout>
+struct test_tensor_ostream_impl<Scalar, 0, Layout> {
+  static void run() {
+    Eigen::Tensor<Scalar, 0> t;
+    t.setValues(1);
+    std::ostringstream os;
+    os << t.format(Eigen::TensorIOFormat::Plain());
+    VERIFY(os.str() == "1");
   }
+};
 
-  std::stringstream os;
-  os << tensor;
-
-  std::string expected("0\n1\n2\n3\n4");
-  VERIFY_IS_EQUAL(std::string(os.str()), expected);
-
-  Eigen::Tensor<double,1,DataLayout> empty_tensor(0);
-  std::stringstream empty_os;
-  empty_os << empty_tensor;
-  std::string empty_string;
-  VERIFY_IS_EQUAL(std::string(empty_os.str()), empty_string);
-}
-
-
-template<int DataLayout>
-static void test_output_2d()
-{
-  Tensor<int, 2, DataLayout> tensor(5, 3);
-  for (int i = 0; i < 5; ++i) {
-    for (int j = 0; j < 3; ++j) {
-      tensor(i, j) = i*j;
-    }
+template<typename Scalar, int Layout>
+struct test_tensor_ostream_impl<Scalar, 1, Layout> {
+  static void run() {
+    Eigen::Tensor<Scalar, 1> t = {3};
+    t.setValues({1, 2, 3});
+    std::ostringstream os;
+    os << t.format(Eigen::TensorIOFormat::Plain());
+    VERIFY(os.str() == "1 2 3");
   }
+};
 
-  std::stringstream os;
-  os << tensor;
-
-  std::string expected("0  0  0\n0  1  2\n0  2  4\n0  3  6\n0  4  8");
-  VERIFY_IS_EQUAL(std::string(os.str()), expected);
-}
-
-
-template<int DataLayout>
-static void test_output_expr()
-{
-  Tensor<int, 1, DataLayout> tensor1(5);
-  Tensor<int, 1, DataLayout> tensor2(5);
-  for (int i = 0; i < 5; ++i) {
-    tensor1(i) = i;
-    tensor2(i) = 7;
+template<typename Scalar, int Layout>
+struct test_tensor_ostream_impl<Scalar, 2, Layout> {
+  static void run() {
+    Eigen::Tensor<Scalar, 2> t = {3, 2};
+    t.setValues({{1, 2}, {3, 4}, {5, 6}});
+    std::ostringstream os;
+    os << t.format(Eigen::TensorIOFormat::Plain());
+    VERIFY(os.str() == "1 2\n3 4\n5 6");
   }
+};
 
-  std::stringstream os;
-  os << tensor1 + tensor2;
-
-  std::string expected(" 7\n 8\n 9\n10\n11");
-  VERIFY_IS_EQUAL(std::string(os.str()), expected);
-}
-
-
-template<int DataLayout>
-static void test_output_string()
-{
-  Tensor<std::string, 2, DataLayout> tensor(5, 3);
-  tensor.setConstant(std::string("foo"));
-
-  std::cout << tensor << std::endl;
-
-  std::stringstream os;
-  os << tensor;
-
-  std::string expected("foo  foo  foo\nfoo  foo  foo\nfoo  foo  foo\nfoo  foo  foo\nfoo  foo  foo");
-  VERIFY_IS_EQUAL(std::string(os.str()), expected);
-}
-
-
-template<int DataLayout>
-static void test_output_const()
-{
-  Tensor<int, 1, DataLayout> tensor(5);
-  for (int i = 0; i < 5; ++i) {
-    tensor(i) = i;
+template<typename Scalar, int Layout>
+struct test_tensor_ostream_impl<Scalar, 3, Layout> {
+  static void run() {
+    Eigen::Tensor<Scalar, 3> t = {4, 3, 2};
+    t.setValues({{{1, 2}, {3, 4}, {5, 6}},
+                 {{7, 8}, {9, 10}, {11, 12}},
+                 {{13, 14}, {15, 16}, {17, 18}},
+                 {{19, 20}, {21, 22}, {23, 24}}});
+    std::ostringstream os;
+    os << t.format(Eigen::TensorIOFormat::Plain());
+    VERIFY(os.str() == " 1  2\n 3  4\n 5  6\n\n 7  8\n 9 10\n11 12\n\n13 14\n15 16\n17 18\n\n19 20\n21 22\n23 24");
   }
+};
 
-  TensorMap<Tensor<const int, 1, DataLayout> > tensor_map(tensor.data(), 5);
+template<int Layout>
+struct test_tensor_ostream_impl<bool, 2, Layout> {
+  static void run() {
+    Eigen::Tensor<bool, 2> t = {3, 2};
+    t.setValues({{false, true}, {true, false}, {false, false}});
+    std::ostringstream os;
+    os << t.format(Eigen::TensorIOFormat::Plain());
+    VERIFY(os.str() == "0 1\n1 0\n0 0");
+  }
+};
 
-  std::stringstream os;
-  os << tensor_map;
+template<typename Scalar, int Layout>
+struct test_tensor_ostream_impl<std::complex<Scalar>, 2, Layout> {
+  static void run() {
+    Eigen::Tensor<std::complex<Scalar>, 2> t = {3, 2};
+    t.setValues({{std::complex<Scalar>(1, 2), std::complex<Scalar>(12, 3)},
+                 {std::complex<Scalar>(-4, 2), std::complex<Scalar>(0, 5)},
+                 {std::complex<Scalar>(-1, 4), std::complex<Scalar>(5, 27)}});
+    std::ostringstream os;
+    os << t.format(Eigen::TensorIOFormat::Plain());
+    VERIFY(os.str() == " (1,2) (12,3)\n(-4,2)  (0,5)\n(-1,4) (5,27)");
+  }
+};
 
-  std::string expected("0\n1\n2\n3\n4");
-  VERIFY_IS_EQUAL(std::string(os.str()), expected);
+template <typename Scalar, int rank, int Layout>
+void test_tensor_ostream() {
+  test_tensor_ostream_impl<Scalar, rank, Layout>::run();
 }
 
-
-EIGEN_DECLARE_TEST(cxx11_tensor_io)
-{
-  CALL_SUBTEST(test_output_0d<ColMajor>());
-  CALL_SUBTEST(test_output_0d<RowMajor>());
-  CALL_SUBTEST(test_output_1d<ColMajor>());
-  CALL_SUBTEST(test_output_1d<RowMajor>());
-  CALL_SUBTEST(test_output_2d<ColMajor>());
-  CALL_SUBTEST(test_output_2d<RowMajor>());
-  CALL_SUBTEST(test_output_expr<ColMajor>());
-  CALL_SUBTEST(test_output_expr<RowMajor>());
-  CALL_SUBTEST(test_output_string<ColMajor>());
-  CALL_SUBTEST(test_output_string<RowMajor>());
-  CALL_SUBTEST(test_output_const<ColMajor>());
-  CALL_SUBTEST(test_output_const<RowMajor>());
+void test_const_tensor_ostream() {
+  Eigen::Tensor<float, 0> t;
+  t.setValues(1);
+  const Eigen::TensorMap<Eigen::Tensor<const float, 0, Eigen::RowMajor>, Eigen::Unaligned> t_const(
+      t.data(), Eigen::DSizes<Eigen::DenseIndex, 0>{});
+  std::ostringstream os;
+  os << t_const.format(Eigen::TensorIOFormat::Plain());
+  VERIFY(os.str() == "1");
+}
+
+EIGEN_DECLARE_TEST(cxx11_tensor_io) {
+  CALL_SUBTEST((test_tensor_ostream<float, 0, Eigen::ColMajor>()));
+  CALL_SUBTEST((test_tensor_ostream<float, 1, Eigen::ColMajor>()));
+  CALL_SUBTEST((test_tensor_ostream<float, 2, Eigen::ColMajor>()));
+  CALL_SUBTEST((test_tensor_ostream<float, 3, Eigen::ColMajor>()));
+
+  CALL_SUBTEST((test_tensor_ostream<double, 0, Eigen::ColMajor>()));
+  CALL_SUBTEST((test_tensor_ostream<double, 1, Eigen::ColMajor>()));
+  CALL_SUBTEST((test_tensor_ostream<double, 2, Eigen::ColMajor>()));
+  CALL_SUBTEST((test_tensor_ostream<double, 3, Eigen::ColMajor>()));
+
+  CALL_SUBTEST((test_tensor_ostream<int, 0, Eigen::ColMajor>()));
+  CALL_SUBTEST((test_tensor_ostream<int, 1, Eigen::ColMajor>()));
+  CALL_SUBTEST((test_tensor_ostream<int, 2, Eigen::ColMajor>()));
+  CALL_SUBTEST((test_tensor_ostream<int, 3, Eigen::ColMajor>()));
+
+  CALL_SUBTEST((test_tensor_ostream<float, 0, Eigen::RowMajor>()));
+  CALL_SUBTEST((test_tensor_ostream<float, 1, Eigen::RowMajor>()));
+  CALL_SUBTEST((test_tensor_ostream<float, 2, Eigen::RowMajor>()));
+  CALL_SUBTEST((test_tensor_ostream<float, 3, Eigen::RowMajor>()));
+
+  CALL_SUBTEST((test_tensor_ostream<double, 0, Eigen::RowMajor>()));
+  CALL_SUBTEST((test_tensor_ostream<double, 1, Eigen::RowMajor>()));
+  CALL_SUBTEST((test_tensor_ostream<double, 2, Eigen::RowMajor>()));
+  CALL_SUBTEST((test_tensor_ostream<double, 3, Eigen::RowMajor>()));
+
+  CALL_SUBTEST((test_tensor_ostream<int, 0, Eigen::RowMajor>()));
+  CALL_SUBTEST((test_tensor_ostream<int, 1, Eigen::RowMajor>()));
+  CALL_SUBTEST((test_tensor_ostream<int, 2, Eigen::RowMajor>()));
+  CALL_SUBTEST((test_tensor_ostream<int, 3, Eigen::RowMajor>()));
+
+  CALL_SUBTEST((test_tensor_ostream<bool, 2, Eigen::ColMajor>()));
+  CALL_SUBTEST((test_tensor_ostream<bool, 2, Eigen::RowMajor>()));
+
+  CALL_SUBTEST((test_tensor_ostream<std::complex<double>, 2, Eigen::ColMajor>()));
+  CALL_SUBTEST((test_tensor_ostream<std::complex<float>, 2, Eigen::ColMajor>()));
+
+  // Test printing TensorMap with const elements.
+  CALL_SUBTEST((test_const_tensor_ostream()));
 }
diff --git a/libs/eigen/unsupported/test/cxx11_tensor_morphing.cpp b/libs/eigen/unsupported/test/cxx11_tensor_morphing.cpp
index ed5d5ad..d039a7e 100644
--- a/libs/eigen/unsupported/test/cxx11_tensor_morphing.cpp
+++ b/libs/eigen/unsupported/test/cxx11_tensor_morphing.cpp
@@ -43,7 +43,6 @@ static void test_simple_reshape()
 
 template <typename>
 static void test_static_reshape() {
-#if defined(EIGEN_HAS_INDEX_LIST)
   using Eigen::type2index;
 
   Tensor<float, 5> tensor(2, 3, 1, 7, 1);
@@ -60,7 +59,6 @@ static void test_static_reshape() {
       }
     }
   }
-#endif
 }
 
 template <typename>
diff --git a/libs/eigen/unsupported/test/cxx11_tensor_of_bfloat16_gpu.cu b/libs/eigen/unsupported/test/cxx11_tensor_of_bfloat16_gpu.cu
new file mode 100644
index 0000000..10498e6
--- /dev/null
+++ b/libs/eigen/unsupported/test/cxx11_tensor_of_bfloat16_gpu.cu
@@ -0,0 +1,487 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2021 Rohit Santhanam <rohit.santhanam@amd.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#define EIGEN_TEST_NO_LONGDOUBLE
+#define EIGEN_TEST_NO_COMPLEX
+
+#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
+#define EIGEN_USE_GPU
+
+#include "main.h"
+#include <unsupported/Eigen/CXX11/Tensor>
+
+
+using Eigen::Tensor;
+
+template<typename>
+void test_gpu_numext() {
+  Eigen::GpuStreamDevice stream;
+  Eigen::GpuDevice gpu_device(&stream);
+  int num_elem = 101;
+
+  float* d_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
+  bool* d_res_bfloat16 = (bool*)gpu_device.allocate(num_elem * sizeof(bool));
+  bool* d_res_float = (bool*)gpu_device.allocate(num_elem * sizeof(bool));
+
+  Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float(
+      d_float, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<bool, 1>, Eigen::Aligned> gpu_res_bfloat16(
+      d_res_bfloat16, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<bool, 1>, Eigen::Aligned> gpu_res_float(
+      d_res_float, num_elem);
+
+  gpu_float.device(gpu_device) = gpu_float.random() - gpu_float.constant(0.5f);
+  gpu_res_float.device(gpu_device) = gpu_float.unaryExpr(Eigen::internal::scalar_isnan_op<float>());
+  // Test bfloat16 specific isnan op.
+  gpu_res_bfloat16.device(gpu_device) = gpu_float.cast<Eigen::bfloat16>().unaryExpr(Eigen::internal::scalar_isnan_op<Eigen::bfloat16>());
+
+  Tensor<bool, 1> bfloat16_prec(num_elem);
+  Tensor<bool, 1> full_prec(num_elem);
+  gpu_device.memcpyDeviceToHost(bfloat16_prec.data(), d_res_bfloat16, num_elem*sizeof(bool));
+  gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, num_elem*sizeof(bool));
+  gpu_device.synchronize();
+
+  for (int i = 0; i < num_elem; ++i) {
+    VERIFY_IS_EQUAL(full_prec(i), bfloat16_prec(i));
+  }
+
+  gpu_device.deallocate(d_float);
+  gpu_device.deallocate(d_res_bfloat16);
+  gpu_device.deallocate(d_res_float);
+}
+
+
+#ifdef EIGEN_HAS_GPU_BF16
+
+template<typename>
+void test_gpu_conversion() {
+  Eigen::GpuStreamDevice stream;
+  Eigen::GpuDevice gpu_device(&stream);
+  int num_elem = 101;
+
+  float* d_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
+  Eigen::bfloat16* d_bfloat16 = (Eigen::bfloat16*)gpu_device.allocate(num_elem * sizeof(Eigen::bfloat16));
+  float* d_conv = (float*)gpu_device.allocate(num_elem * sizeof(float));
+
+  Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float(
+      d_float, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<Eigen::bfloat16, 1>, Eigen::Aligned> gpu_bfloat16(
+      d_bfloat16, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_conv(
+      d_conv, num_elem);
+
+  gpu_float.device(gpu_device) = gpu_float.random();
+  gpu_bfloat16.device(gpu_device) = gpu_float.cast<Eigen::bfloat16>();
+  gpu_conv.device(gpu_device) = gpu_bfloat16.cast<float>();
+
+  Tensor<float, 1> initial(num_elem);
+  Tensor<float, 1> final(num_elem);
+  gpu_device.memcpyDeviceToHost(initial.data(), d_float, num_elem*sizeof(float));
+  gpu_device.memcpyDeviceToHost(final.data(), d_conv, num_elem*sizeof(float));
+
+  for (int i = 0; i < num_elem; ++i) {
+    VERIFY_IS_APPROX(static_cast<Eigen::bfloat16>(initial(i)), static_cast<Eigen::bfloat16>(final(i)));
+  }
+
+  gpu_device.deallocate(d_float);
+  gpu_device.deallocate(d_bfloat16);
+  gpu_device.deallocate(d_conv);
+}
+
+template<typename>
+void test_gpu_unary() {
+  Eigen::GpuStreamDevice stream;
+  Eigen::GpuDevice gpu_device(&stream);
+  int num_elem = 101;
+
+  float* d_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
+  float* d_res_bfloat16 = (float*)gpu_device.allocate(num_elem * sizeof(float));
+  float* d_res_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
+
+  Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float(
+      d_float, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_bfloat16(
+      d_res_bfloat16, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_float(
+      d_res_float, num_elem);
+
+  gpu_float.device(gpu_device) = gpu_float.random() - gpu_float.constant(0.5f);
+  gpu_float.device(gpu_device) = gpu_float.cast<Eigen::bfloat16>().cast<float>();
+  gpu_res_float.device(gpu_device) = gpu_float.abs();
+  gpu_res_bfloat16.device(gpu_device) = gpu_float.cast<Eigen::bfloat16>().abs().cast<float>();
+
+  Tensor<float, 1> bfloat16_prec(num_elem);
+  Tensor<float, 1> full_prec(num_elem);
+  gpu_device.memcpyDeviceToHost(bfloat16_prec.data(), d_res_bfloat16, num_elem*sizeof(float));
+  gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, num_elem*sizeof(float));
+  gpu_device.synchronize();
+
+  for (int i = 0; i < num_elem; ++i) {
+    VERIFY_IS_APPROX(full_prec(i), bfloat16_prec(i));
+  }
+
+  gpu_device.deallocate(d_float);
+  gpu_device.deallocate(d_res_bfloat16);
+  gpu_device.deallocate(d_res_float);
+}
+
+template<typename>
+void test_gpu_elementwise() {
+  Eigen::GpuStreamDevice stream;
+  Eigen::GpuDevice gpu_device(&stream);
+  int num_elem = 101;
+
+  float* d_float1 = (float*)gpu_device.allocate(num_elem * sizeof(float));
+  float* d_float2 = (float*)gpu_device.allocate(num_elem * sizeof(float));
+  float* d_res_bfloat16 = (float*)gpu_device.allocate(num_elem * sizeof(float));
+  float* d_res_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
+
+  Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float1(
+      d_float1, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float2(
+      d_float2, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_bfloat16(
+      d_res_bfloat16, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_float(
+      d_res_float, num_elem);
+
+  gpu_float1.device(gpu_device) = gpu_float1.random();
+  gpu_float1.device(gpu_device) = gpu_float1.cast<Eigen::bfloat16>().cast<float>();
+  gpu_float2.device(gpu_device) = gpu_float2.random();
+  gpu_float2.device(gpu_device) = gpu_float2.cast<Eigen::bfloat16>().cast<float>();
+  gpu_res_float.device(gpu_device) = (gpu_float1 + gpu_float2) * gpu_float1;
+  gpu_res_bfloat16.device(gpu_device) = ((gpu_float1.cast<Eigen::bfloat16>() + gpu_float2.cast<Eigen::bfloat16>()) * gpu_float1.cast<Eigen::bfloat16>()).cast<float>();
+
+  Tensor<float, 1> bfloat16_prec(num_elem);
+  Tensor<float, 1> full_prec(num_elem);
+  gpu_device.memcpyDeviceToHost(bfloat16_prec.data(), d_res_bfloat16, num_elem*sizeof(float));
+  gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, num_elem*sizeof(float));
+  gpu_device.synchronize();
+
+  for (int i = 0; i < num_elem; ++i) {
+    VERIFY_IS_APPROX(static_cast<Eigen::bfloat16>(full_prec(i)), static_cast<Eigen::bfloat16>(bfloat16_prec(i)));
+  }
+
+  gpu_device.deallocate(d_float1);
+  gpu_device.deallocate(d_float2);
+  gpu_device.deallocate(d_res_bfloat16);
+  gpu_device.deallocate(d_res_float);
+}
+
+template<typename>
+void test_gpu_trancendental() {
+  Eigen::GpuStreamDevice stream;
+  Eigen::GpuDevice gpu_device(&stream);
+  int num_elem = 101;
+
+  float* d_float1 = (float*)gpu_device.allocate(num_elem * sizeof(float));
+  float* d_float2 = (float*)gpu_device.allocate(num_elem * sizeof(float));
+  float* d_float3 = (float*)gpu_device.allocate(num_elem * sizeof(float));
+  Eigen::bfloat16* d_res1_bfloat16 = (Eigen::bfloat16*)gpu_device.allocate(num_elem * sizeof(Eigen::bfloat16));
+  Eigen::bfloat16* d_res1_float = (Eigen::bfloat16*)gpu_device.allocate(num_elem * sizeof(Eigen::bfloat16));
+  Eigen::bfloat16* d_res2_bfloat16 = (Eigen::bfloat16*)gpu_device.allocate(num_elem * sizeof(Eigen::bfloat16));
+  Eigen::bfloat16* d_res2_float = (Eigen::bfloat16*)gpu_device.allocate(num_elem * sizeof(Eigen::bfloat16));
+  Eigen::bfloat16* d_res3_bfloat16 = (Eigen::bfloat16*)gpu_device.allocate(num_elem * sizeof(Eigen::bfloat16));
+  Eigen::bfloat16* d_res3_float = (Eigen::bfloat16*)gpu_device.allocate(num_elem * sizeof(Eigen::bfloat16));
+
+  Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float1(d_float1, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float2(d_float2, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float3(d_float3, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<Eigen::bfloat16, 1>, Eigen::Aligned> gpu_res1_bfloat16(d_res1_bfloat16, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<Eigen::bfloat16, 1>, Eigen::Aligned> gpu_res1_float(d_res1_float, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<Eigen::bfloat16, 1>, Eigen::Aligned> gpu_res2_bfloat16(d_res2_bfloat16, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<Eigen::bfloat16, 1>, Eigen::Aligned> gpu_res2_float(d_res2_float, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<Eigen::bfloat16, 1>, Eigen::Aligned> gpu_res3_bfloat16(d_res3_bfloat16, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<Eigen::bfloat16, 1>, Eigen::Aligned> gpu_res3_float(d_res3_float, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<Eigen::bfloat16, 1>, Eigen::Aligned> gpu_res4_bfloat16(d_res3_bfloat16, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<Eigen::bfloat16, 1>, Eigen::Aligned> gpu_res4_float(d_res3_float, num_elem);
+
+  gpu_float1.device(gpu_device) = gpu_float1.random() - gpu_float1.constant(0.5f);
+  gpu_float1.device(gpu_device) = gpu_float1.cast<Eigen::bfloat16>().cast<float>();
+  gpu_float2.device(gpu_device) = gpu_float2.random() + gpu_float1.constant(0.5f);
+  gpu_float2.device(gpu_device) = gpu_float2.cast<Eigen::bfloat16>().cast<float>();
+  gpu_float3.device(gpu_device) = gpu_float3.random();
+  gpu_float3.device(gpu_device) = gpu_float3.cast<Eigen::bfloat16>().cast<float>();
+  gpu_res1_float.device(gpu_device) = gpu_float1.exp().cast<Eigen::bfloat16>();
+  gpu_res2_float.device(gpu_device) = gpu_float2.log().cast<Eigen::bfloat16>();
+  gpu_res3_float.device(gpu_device) = gpu_float3.log1p().cast<Eigen::bfloat16>();
+  gpu_res4_float.device(gpu_device) = gpu_float3.expm1().cast<Eigen::bfloat16>();
+
+  gpu_res1_bfloat16.device(gpu_device) = gpu_float1.cast<Eigen::bfloat16>();
+  gpu_res1_bfloat16.device(gpu_device) = gpu_res1_bfloat16.exp();
+
+  gpu_res2_bfloat16.device(gpu_device) = gpu_float2.cast<Eigen::bfloat16>();
+  gpu_res2_bfloat16.device(gpu_device) = gpu_res2_bfloat16.log();
+
+  gpu_res3_bfloat16.device(gpu_device) = gpu_float3.cast<Eigen::bfloat16>();
+  gpu_res3_bfloat16.device(gpu_device) = gpu_res3_bfloat16.log1p();
+
+  gpu_res3_bfloat16.device(gpu_device) = gpu_float3.cast<Eigen::bfloat16>();
+  gpu_res3_bfloat16.device(gpu_device) = gpu_res3_bfloat16.expm1();
+
+  Tensor<float, 1> input1(num_elem);
+  Tensor<Eigen::bfloat16, 1> bfloat16_prec1(num_elem);
+  Tensor<Eigen::bfloat16, 1> full_prec1(num_elem);
+  Tensor<float, 1> input2(num_elem);
+  Tensor<Eigen::bfloat16, 1> bfloat16_prec2(num_elem);
+  Tensor<Eigen::bfloat16, 1> full_prec2(num_elem);
+  Tensor<float, 1> input3(num_elem);
+  Tensor<Eigen::bfloat16, 1> bfloat16_prec3(num_elem);
+  Tensor<Eigen::bfloat16, 1> full_prec3(num_elem);
+  gpu_device.memcpyDeviceToHost(input1.data(), d_float1, num_elem*sizeof(float));
+  gpu_device.memcpyDeviceToHost(input2.data(), d_float2, num_elem*sizeof(float));
+  gpu_device.memcpyDeviceToHost(input3.data(), d_float3, num_elem*sizeof(float));
+  gpu_device.memcpyDeviceToHost(bfloat16_prec1.data(), d_res1_bfloat16, num_elem*sizeof(Eigen::bfloat16));
+  gpu_device.memcpyDeviceToHost(full_prec1.data(), d_res1_float, num_elem*sizeof(Eigen::bfloat16));
+  gpu_device.memcpyDeviceToHost(bfloat16_prec2.data(), d_res2_bfloat16, num_elem*sizeof(Eigen::bfloat16));
+  gpu_device.memcpyDeviceToHost(full_prec2.data(), d_res2_float, num_elem*sizeof(Eigen::bfloat16));
+  gpu_device.memcpyDeviceToHost(bfloat16_prec3.data(), d_res3_bfloat16, num_elem*sizeof(Eigen::bfloat16));
+  gpu_device.memcpyDeviceToHost(full_prec3.data(), d_res3_float, num_elem*sizeof(Eigen::bfloat16));
+  gpu_device.synchronize();
+
+  for (int i = 0; i < num_elem; ++i) {
+    VERIFY_IS_APPROX(full_prec1(i), bfloat16_prec1(i));
+  }
+  for (int i = 0; i < num_elem; ++i) {
+    if(std::abs(input2(i)-1.f)<0.05f) // log lacks accuracy nearby 1
+      VERIFY_IS_APPROX(full_prec2(i)+Eigen::bfloat16(0.1f), bfloat16_prec2(i)+Eigen::bfloat16(0.1f));
+    else
+      VERIFY_IS_APPROX(full_prec2(i), bfloat16_prec2(i));
+  }
+  for (int i = 0; i < num_elem; ++i) {
+    VERIFY_IS_APPROX(full_prec3(i), bfloat16_prec3(i));
+  }
+  gpu_device.deallocate(d_float1);
+  gpu_device.deallocate(d_float2);
+  gpu_device.deallocate(d_float3);
+  gpu_device.deallocate(d_res1_bfloat16);
+  gpu_device.deallocate(d_res1_float);
+  gpu_device.deallocate(d_res2_bfloat16);
+  gpu_device.deallocate(d_res2_float);
+  gpu_device.deallocate(d_res3_float);
+  gpu_device.deallocate(d_res3_bfloat16);
+}
+
+template<typename>
+void test_gpu_contractions() {
+  Eigen::GpuStreamDevice stream;
+  Eigen::GpuDevice gpu_device(&stream);
+  int rows = 23;
+  int cols = 23;
+  int num_elem = rows*cols;
+
+  float* d_float1 = (float*)gpu_device.allocate(num_elem * sizeof(float));
+  float* d_float2 = (float*)gpu_device.allocate(num_elem * sizeof(float));
+  Eigen::bfloat16* d_res_bfloat16 = (Eigen::bfloat16*)gpu_device.allocate(num_elem * sizeof(Eigen::bfloat16));
+  Eigen::bfloat16* d_res_float = (Eigen::bfloat16*)gpu_device.allocate(num_elem * sizeof(Eigen::bfloat16));
+
+  Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_float1(
+      d_float1, rows, cols);
+  Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_float2(
+      d_float2, rows, cols);
+  Eigen::TensorMap<Eigen::Tensor<Eigen::bfloat16, 2>, Eigen::Aligned> gpu_res_bfloat16(
+      d_res_bfloat16, rows, cols);
+  Eigen::TensorMap<Eigen::Tensor<Eigen::bfloat16, 2>, Eigen::Aligned> gpu_res_float(
+      d_res_float, rows, cols);
+
+  gpu_float1.device(gpu_device) = gpu_float1.random() - gpu_float1.constant(0.5f);
+  gpu_float2.device(gpu_device) = gpu_float2.random() - gpu_float2.constant(0.5f);
+
+  typedef Tensor<float, 2>::DimensionPair DimPair;
+  Eigen::array<DimPair, 1> dims(DimPair(1, 0));
+  gpu_res_float.device(gpu_device) = gpu_float1.contract(gpu_float2, dims).cast<Eigen::bfloat16>();
+  gpu_res_bfloat16.device(gpu_device) = gpu_float1.cast<Eigen::bfloat16>().contract(gpu_float2.cast<Eigen::bfloat16>(), dims);
+
+  Tensor<Eigen::bfloat16, 2> bfloat16_prec(rows, cols);
+  Tensor<Eigen::bfloat16, 2> full_prec(rows, cols);
+  gpu_device.memcpyDeviceToHost(bfloat16_prec.data(), d_res_bfloat16, num_elem*sizeof(Eigen::bfloat16));
+  gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, num_elem*sizeof(Eigen::bfloat16));
+  gpu_device.synchronize();
+
+  for (int i = 0; i < rows; ++i) {
+    for (int j = 0; j < cols; ++j) {
+      if (numext::abs(full_prec(i, j) - bfloat16_prec(i, j)) > Eigen::bfloat16(1e-2f)) {
+        VERIFY_IS_APPROX(full_prec(i, j), bfloat16_prec(i, j));
+      }
+    }
+  }
+
+  gpu_device.deallocate(d_float1);
+  gpu_device.deallocate(d_float2);
+  gpu_device.deallocate(d_res_bfloat16);
+  gpu_device.deallocate(d_res_float);
+}
+
+template<typename>
+void test_gpu_reductions(int size1, int size2, int redux) {
+  Eigen::GpuStreamDevice stream;
+  Eigen::GpuDevice gpu_device(&stream);
+  int num_elem = size1*size2;
+  int result_size = (redux == 1 ? size1 : size2);
+
+  float* d_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
+  Eigen::bfloat16* d_res_bfloat16 = (Eigen::bfloat16*)gpu_device.allocate(result_size * sizeof(Eigen::bfloat16));
+  Eigen::bfloat16* d_res_float = (Eigen::bfloat16*)gpu_device.allocate(result_size * sizeof(Eigen::bfloat16));
+
+  Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_float(
+      d_float, size1, size2);
+  Eigen::TensorMap<Eigen::Tensor<Eigen::bfloat16, 1>, Eigen::Aligned> gpu_res_bfloat16(
+      d_res_bfloat16, result_size);
+  Eigen::TensorMap<Eigen::Tensor<Eigen::bfloat16, 1>, Eigen::Aligned> gpu_res_float(
+      d_res_float, result_size);
+
+  gpu_float.device(gpu_device) = gpu_float.random() * 2.0f;
+
+  Eigen::array<int, 1> redux_dim = {redux};
+  gpu_res_float.device(gpu_device) = gpu_float.sum(redux_dim).cast<Eigen::bfloat16>();
+  gpu_res_bfloat16.device(gpu_device) = gpu_float.cast<Eigen::bfloat16>().sum(redux_dim);
+
+  Tensor<Eigen::bfloat16, 1> bfloat16_prec(result_size);
+  Tensor<Eigen::bfloat16, 1> full_prec(result_size);
+  gpu_device.memcpyDeviceToHost(bfloat16_prec.data(), d_res_bfloat16, result_size*sizeof(Eigen::bfloat16));
+  gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, result_size*sizeof(Eigen::bfloat16));
+  gpu_device.synchronize();
+
+  for (int i = 0; i < result_size; ++i) {
+    VERIFY_IS_APPROX(full_prec(i), bfloat16_prec(i));
+  }
+
+  gpu_device.deallocate(d_float);
+  gpu_device.deallocate(d_res_bfloat16);
+  gpu_device.deallocate(d_res_float);
+}
+
+template<typename>
+void test_gpu_reductions() {
+  test_gpu_reductions<void>(13, 13, 0);
+  test_gpu_reductions<void>(13, 13, 1);
+
+  test_gpu_reductions<void>(35, 36, 0);
+  test_gpu_reductions<void>(35, 36, 1);
+
+  test_gpu_reductions<void>(36, 35, 0);
+  test_gpu_reductions<void>(36, 35, 1);
+}
+
+template<typename>
+void test_gpu_full_reductions() {
+  Eigen::GpuStreamDevice stream;
+  Eigen::GpuDevice gpu_device(&stream);
+  int size = 13;
+  int num_elem = size*size;
+
+  float* d_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
+  Eigen::bfloat16* d_res_bfloat16 = (Eigen::bfloat16*)gpu_device.allocate(1 * sizeof(Eigen::bfloat16));
+  Eigen::bfloat16* d_res_float = (Eigen::bfloat16*)gpu_device.allocate(1 * sizeof(Eigen::bfloat16));
+
+  Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_float(
+      d_float, size, size);
+  Eigen::TensorMap<Eigen::Tensor<Eigen::bfloat16, 0>, Eigen::Aligned> gpu_res_bfloat16(
+      d_res_bfloat16);
+  Eigen::TensorMap<Eigen::Tensor<Eigen::bfloat16, 0>, Eigen::Aligned> gpu_res_float(
+      d_res_float);
+
+  gpu_float.device(gpu_device) = gpu_float.random();
+
+  gpu_res_float.device(gpu_device) = gpu_float.sum().cast<Eigen::bfloat16>();
+  gpu_res_bfloat16.device(gpu_device) = gpu_float.cast<Eigen::bfloat16>().sum();
+
+  Tensor<Eigen::bfloat16, 0> bfloat16_prec;
+  Tensor<Eigen::bfloat16, 0> full_prec;
+  gpu_device.memcpyDeviceToHost(bfloat16_prec.data(), d_res_bfloat16, sizeof(Eigen::bfloat16));
+  gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, sizeof(Eigen::bfloat16));
+  gpu_device.synchronize();
+
+  VERIFY_IS_APPROX(full_prec(), bfloat16_prec());
+
+  gpu_res_float.device(gpu_device) = gpu_float.maximum().cast<Eigen::bfloat16>();
+  gpu_res_bfloat16.device(gpu_device) = gpu_float.cast<Eigen::bfloat16>().maximum();
+  gpu_device.memcpyDeviceToHost(bfloat16_prec.data(), d_res_bfloat16, sizeof(Eigen::bfloat16));
+  gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, sizeof(Eigen::bfloat16));
+  gpu_device.synchronize();
+
+  VERIFY_IS_APPROX(full_prec(), bfloat16_prec());
+
+  gpu_device.deallocate(d_float);
+  gpu_device.deallocate(d_res_bfloat16);
+  gpu_device.deallocate(d_res_float);
+}
+
+template<typename>
+void test_gpu_forced_evals() {
+
+  Eigen::GpuStreamDevice stream;
+  Eigen::GpuDevice gpu_device(&stream);
+  int num_elem = 101;
+
+  float* d_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
+  float* d_res_bfloat16_1 = (float*)gpu_device.allocate(num_elem * sizeof(float));
+  float* d_res_bfloat16_2 = (float*)gpu_device.allocate(num_elem * sizeof(float));
+  float* d_res_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
+
+  Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float(
+      d_float, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_bfloat16_1(
+      d_res_bfloat16_1, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Unaligned> gpu_res_bfloat16_2(
+      d_res_bfloat16_2, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_float(
+      d_res_float, num_elem);
+
+  Eigen::array<int, 1> no_bcast;
+  no_bcast[0] = 1;
+
+  gpu_float.device(gpu_device) = gpu_float.random() - gpu_float.constant(0.5f);
+  gpu_float.device(gpu_device) = gpu_float.cast<Eigen::bfloat16>().cast<float>();
+  gpu_res_float.device(gpu_device) = gpu_float.abs();
+  gpu_res_bfloat16_1.device(gpu_device) = gpu_float.cast<Eigen::bfloat16>().abs().eval().cast<float>();
+  gpu_res_bfloat16_2.device(gpu_device) = gpu_float.cast<Eigen::bfloat16>().abs().broadcast(no_bcast).eval().cast<float>();
+
+  Tensor<float, 1> bfloat16_prec1(num_elem);
+  Tensor<float, 1> bfloat16_prec2(num_elem);
+  Tensor<float, 1> full_prec(num_elem);
+  gpu_device.memcpyDeviceToHost(bfloat16_prec1.data(), d_res_bfloat16_1, num_elem*sizeof(float));
+  gpu_device.memcpyDeviceToHost(bfloat16_prec2.data(), d_res_bfloat16_2, num_elem*sizeof(float));
+  gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, num_elem*sizeof(float));
+  gpu_device.synchronize();
+
+  for (int i = 0; i < num_elem; ++i) {
+    VERIFY_IS_APPROX(full_prec(i), bfloat16_prec1(i));
+    VERIFY_IS_APPROX(full_prec(i), bfloat16_prec2(i));
+  }
+
+  gpu_device.deallocate(d_float);
+  gpu_device.deallocate(d_res_bfloat16_1);
+  gpu_device.deallocate(d_res_bfloat16_2);
+  gpu_device.deallocate(d_res_float);
+}
+
+#endif
+
+EIGEN_DECLARE_TEST(cxx11_tensor_of_bfloat16_gpu)
+{
+  CALL_SUBTEST_1(test_gpu_numext<void>());
+
+// The reduction unit tests have been excluded until a working
+// implementation to expand the accumulator data type to float32
+// is available.
+// TODO: add reduction unit tests
+#ifdef EIGEN_HAS_GPU_BF16
+  CALL_SUBTEST_2(test_gpu_conversion<void>());
+  CALL_SUBTEST_3(test_gpu_unary<void>());
+  CALL_SUBTEST_4(test_gpu_elementwise<void>());
+  CALL_SUBTEST_5(test_gpu_trancendental<void>());
+  CALL_SUBTEST_6(test_gpu_contractions<void>());
+  CALL_SUBTEST_7(test_gpu_reductions<void>());
+  CALL_SUBTEST_8(test_gpu_full_reductions<void>());
+  CALL_SUBTEST_9(test_gpu_forced_evals<void>());
+#else
+  std::cout << "bfloat16 floats are not supported by this version of gpu: skipping the test" << std::endl;
+#endif
+}
diff --git a/libs/eigen/unsupported/test/cxx11_tensor_of_complex.cpp b/libs/eigen/unsupported/test/cxx11_tensor_of_complex.cpp
index 99e1807..b2f5994 100644
--- a/libs/eigen/unsupported/test/cxx11_tensor_of_complex.cpp
+++ b/libs/eigen/unsupported/test/cxx11_tensor_of_complex.cpp
@@ -47,6 +47,20 @@ static void test_abs()
   }
 }
 
+static void test_arg()
+{
+  Tensor<std::complex<float>, 1> data1(3);
+  Tensor<std::complex<double>, 1> data2(3);
+  data1.setRandom();
+  data2.setRandom();
+
+  Tensor<float, 1> arg1 = data1.arg();
+  Tensor<double, 1> arg2 = data2.arg();
+  for (int i = 0; i < 3; ++i) {
+    VERIFY_IS_APPROX(arg1(i), std::arg(data1(i)));
+    VERIFY_IS_APPROX(arg2(i), std::arg(data2(i)));
+  }
+}
 
 static void test_conjugate()
 {
@@ -98,6 +112,7 @@ EIGEN_DECLARE_TEST(cxx11_tensor_of_complex)
 {
   CALL_SUBTEST(test_additions());
   CALL_SUBTEST(test_abs());
+  CALL_SUBTEST(test_arg());
   CALL_SUBTEST(test_conjugate());
   CALL_SUBTEST(test_contractions());
 }
diff --git a/libs/eigen/unsupported/test/cxx11_tensor_random_sycl.cpp b/libs/eigen/unsupported/test/cxx11_tensor_random_sycl.cpp
index 6c83894..14a7c48 100644
--- a/libs/eigen/unsupported/test/cxx11_tensor_random_sycl.cpp
+++ b/libs/eigen/unsupported/test/cxx11_tensor_random_sycl.cpp
@@ -37,14 +37,8 @@ static void test_sycl_random_uniform(const Eigen::SyclDevice& sycl_device)
 
   gpu_out.device(sycl_device)=gpu_out.random();
   sycl_device.memcpyDeviceToHost(out.data(), d_out,out_bytes);
-  for(IndexType i=1; i<sizeDim0; i++)
-    for(IndexType j=1; j<sizeDim1; j++)
-    {
-      VERIFY_IS_NOT_EQUAL(out(i,j), out(i-1,j));
-      VERIFY_IS_NOT_EQUAL(out(i,j), out(i,j-1));
-      VERIFY_IS_NOT_EQUAL(out(i,j), out(i-1,j-1));    }
 
-  // For now we just check thes code doesn't crash.
+  // For now we just check the code doesn't crash.
   // TODO: come up with a valid test of randomness
   sycl_device.deallocate(d_out);
 }
@@ -66,16 +60,8 @@ void test_sycl_random_normal(const Eigen::SyclDevice& sycl_device)
   Eigen::internal::NormalRandomGenerator<DataType> gen(true);
   gpu_out.device(sycl_device)=gpu_out.random(gen);
   sycl_device.memcpyDeviceToHost(out.data(), d_out,out_bytes);
-  for(IndexType i=1; i<sizeDim0; i++)
-    for(IndexType j=1; j<sizeDim1; j++)
-    {
-      VERIFY_IS_NOT_EQUAL(out(i,j), out(i-1,j));
-      VERIFY_IS_NOT_EQUAL(out(i,j), out(i,j-1));
-      VERIFY_IS_NOT_EQUAL(out(i,j), out(i-1,j-1));
 
-    }
-
-  // For now we just check thes code doesn't crash.
+  // For now we just check the code doesn't crash.
   // TODO: come up with a valid test of randomness
   sycl_device.deallocate(d_out);
 }
diff --git a/libs/eigen/unsupported/test/cxx11_tensor_reduction.cpp b/libs/eigen/unsupported/test/cxx11_tensor_reduction.cpp
index c46c4c9..a090e4a 100644
--- a/libs/eigen/unsupported/test/cxx11_tensor_reduction.cpp
+++ b/libs/eigen/unsupported/test/cxx11_tensor_reduction.cpp
@@ -370,13 +370,7 @@ static void test_static_dims() {
   Tensor<float, 2, DataLayout> out(72, 97);
   in.setRandom();
 
-#if !EIGEN_HAS_CONSTEXPR
-  array<int, 2> reduction_axis;
-  reduction_axis[0] = 1;
-  reduction_axis[1] = 3;
-#else
   Eigen::IndexList<Eigen::type2index<1>, Eigen::type2index<3> > reduction_axis;
-#endif
 
   out = in.maximum(reduction_axis);
 
@@ -400,14 +394,8 @@ static void test_innermost_last_dims() {
   in.setRandom();
 
 // Reduce on the innermost dimensions.
-#if !EIGEN_HAS_CONSTEXPR
-  array<int, 2> reduction_axis;
-  reduction_axis[0] = 0;
-  reduction_axis[1] = 1;
-#else
   // This triggers the use of packets for ColMajor.
   Eigen::IndexList<Eigen::type2index<0>, Eigen::type2index<1> > reduction_axis;
-#endif
 
   out = in.maximum(reduction_axis);
 
@@ -431,14 +419,8 @@ static void test_innermost_first_dims() {
   in.setRandom();
 
 // Reduce on the innermost dimensions.
-#if !EIGEN_HAS_CONSTEXPR
-  array<int, 2> reduction_axis;
-  reduction_axis[0] = 2;
-  reduction_axis[1] = 3;
-#else
   // This triggers the use of packets for RowMajor.
   Eigen::IndexList<Eigen::type2index<2>, Eigen::type2index<3>> reduction_axis;
-#endif
 
   out = in.maximum(reduction_axis);
 
@@ -462,14 +444,8 @@ static void test_reduce_middle_dims() {
   in.setRandom();
 
 // Reduce on the innermost dimensions.
-#if !EIGEN_HAS_CONSTEXPR
-  array<int, 2> reduction_axis;
-  reduction_axis[0] = 1;
-  reduction_axis[1] = 2;
-#else
   // This triggers the use of packets for RowMajor.
   Eigen::IndexList<Eigen::type2index<1>, Eigen::type2index<2>> reduction_axis;
-#endif
 
   out = in.maximum(reduction_axis);
 
@@ -486,22 +462,31 @@ static void test_reduce_middle_dims() {
   }
 }
 
-static void test_sum_accuracy() {
-  Tensor<float, 3> tensor(101, 101, 101);
-  for (float prescribed_mean : {1.0f, 10.0f, 100.0f, 1000.0f, 10000.0f}) {
-    tensor.setRandom();
-    tensor += tensor.constant(prescribed_mean);
+template <typename ScalarType, int num_elements, int max_mean>
+void test_sum_accuracy() {
+  Tensor<double, 1> double_tensor(num_elements);
+  Tensor<ScalarType, 1> tensor(num_elements);
+  for (double prescribed_mean = 0; prescribed_mean <= max_mean; prescribed_mean = numext::maxi(1.0, prescribed_mean*3.99)) {
+    // FIXME: NormalRandomGenerator doesn't work in bfloat and half.
+    double_tensor.setRandom<Eigen::internal::NormalRandomGenerator<double>>();
+    double_tensor += double_tensor.constant(prescribed_mean);
+    tensor = double_tensor.cast<ScalarType>();
 
-    Tensor<float, 0> sum = tensor.sum();
+    Tensor<ScalarType, 0> sum;
+    sum = tensor.sum();
+
+    // Compute the reference value in double precsion.
     double expected_sum = 0.0;
-    for (int i = 0; i < 101; ++i) {
-      for (int j = 0; j < 101; ++j) {
-        for (int k = 0; k < 101; ++k) {
-          expected_sum += static_cast<double>(tensor(i, j, k));
-        }
-      }
+    double abs_sum = 0.0;
+    for (int i = 0; i < num_elements; ++i) {
+      expected_sum += static_cast<double>(tensor(i));
+      abs_sum += static_cast<double>(numext::abs(tensor(i)));
     }
-    VERIFY_IS_APPROX(sum(), static_cast<float>(expected_sum));
+    // Test against probabilistic forward error bound. In reality, the error is much smaller
+    // when we use tree summation.
+    double err = Eigen::numext::abs(static_cast<double>(sum()) - expected_sum);
+    double tol = numext::sqrt(num_elements) * NumTraits<ScalarType>::epsilon() * static_cast<ScalarType>(abs_sum);
+    VERIFY_LE(err, tol);
   }
 }
 
@@ -528,5 +513,11 @@ EIGEN_DECLARE_TEST(cxx11_tensor_reduction) {
   CALL_SUBTEST(test_innermost_first_dims<RowMajor>());
   CALL_SUBTEST(test_reduce_middle_dims<ColMajor>());
   CALL_SUBTEST(test_reduce_middle_dims<RowMajor>());
-  CALL_SUBTEST(test_sum_accuracy());
+  CALL_SUBTEST((test_sum_accuracy<float,10*1024*1024,8*1024>()));
+  CALL_SUBTEST((test_sum_accuracy<Eigen::bfloat16,10*1024*1024,8*1024>()));
+  // The range of half is limited to 65519 when using round-to-even,
+  // so we are severely limited in the size and mean of the tensors
+  // we can reduce without overflow.
+  CALL_SUBTEST((test_sum_accuracy<Eigen::half,4*1024,16>()));
+  CALL_SUBTEST((test_sum_accuracy<Eigen::half,10*1024*1024,0>()));
 }
diff --git a/libs/eigen/unsupported/test/cxx11_tensor_reduction_sycl.cpp b/libs/eigen/unsupported/test/cxx11_tensor_reduction_sycl.cpp
index a297716..3afa62a 100644
--- a/libs/eigen/unsupported/test/cxx11_tensor_reduction_sycl.cpp
+++ b/libs/eigen/unsupported/test/cxx11_tensor_reduction_sycl.cpp
@@ -16,7 +16,6 @@
 
 #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
 #define EIGEN_USE_SYCL
-#define EIGEN_HAS_CONSTEXPR 1
 
 #include "main.h"
 
diff --git a/libs/eigen/unsupported/test/fft_test_shared.h b/libs/eigen/unsupported/test/fft_test_shared.h
new file mode 100644
index 0000000..0e040ad
--- /dev/null
+++ b/libs/eigen/unsupported/test/fft_test_shared.h
@@ -0,0 +1,277 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009 Mark Borgerding mark a borgerding net
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+#include <unsupported/Eigen/FFT>
+
+template <typename T>
+inline std::complex<T> RandomCpx() {
+  return std::complex<T>((T)(rand() / (T)RAND_MAX - .5), (T)(rand() / (T)RAND_MAX - .5));
+}
+
+using namespace std;
+using namespace Eigen;
+
+template <typename T>
+inline complex<long double> promote(complex<T> x) {
+  return complex<long double>((long double)x.real(), (long double)x.imag());
+}
+
+inline complex<long double> promote(float x) { return complex<long double>((long double)x); }
+inline complex<long double> promote(double x) { return complex<long double>((long double)x); }
+inline complex<long double> promote(long double x) { return complex<long double>((long double)x); }
+
+template <typename VT1, typename VT2>
+long double fft_rmse(const VT1& fftbuf, const VT2& timebuf) {
+  long double totalpower = 0;
+  long double difpower = 0;
+  long double pi = acos((long double)-1);
+  for (size_t k0 = 0; k0 < (size_t)fftbuf.size(); ++k0) {
+    complex<long double> acc = 0;
+    long double phinc = (long double)(-2.) * k0 * pi / timebuf.size();
+    for (size_t k1 = 0; k1 < (size_t)timebuf.size(); ++k1) {
+      acc += promote(timebuf[k1]) * exp(complex<long double>(0, k1 * phinc));
+    }
+    totalpower += numext::abs2(acc);
+    complex<long double> x = promote(fftbuf[k0]);
+    complex<long double> dif = acc - x;
+    difpower += numext::abs2(dif);
+    // cerr << k0 << "\t" << acc << "\t" <<  x << "\t" << sqrt(numext::abs2(dif)) << endl;
+  }
+  // cerr << "rmse:" << sqrt(difpower/totalpower) << endl;
+  return sqrt(difpower / totalpower);
+}
+
+template <typename VT1, typename VT2>
+long double dif_rmse(const VT1 buf1, const VT2 buf2) {
+  long double totalpower = 0;
+  long double difpower = 0;
+  size_t n = (min)(buf1.size(), buf2.size());
+  for (size_t k = 0; k < n; ++k) {
+    totalpower += (long double)((numext::abs2(buf1[k]) + numext::abs2(buf2[k])) / 2);
+    difpower += (long double)(numext::abs2(buf1[k] - buf2[k]));
+  }
+  return sqrt(difpower / totalpower);
+}
+
+enum { StdVectorContainer, EigenVectorContainer };
+
+template <int Container, typename Scalar>
+struct VectorType;
+
+template <typename Scalar>
+struct VectorType<StdVectorContainer, Scalar> {
+  typedef vector<Scalar> type;
+};
+
+template <typename Scalar>
+struct VectorType<EigenVectorContainer, Scalar> {
+  typedef Matrix<Scalar, Dynamic, 1> type;
+};
+
+template <int Container, typename T>
+void test_scalar_generic(int nfft) {
+  typedef typename FFT<T>::Complex Complex;
+  typedef typename FFT<T>::Scalar Scalar;
+  typedef typename VectorType<Container, Scalar>::type ScalarVector;
+  typedef typename VectorType<Container, Complex>::type ComplexVector;
+
+  FFT<T> fft;
+  ScalarVector tbuf(nfft);
+  ComplexVector freqBuf;
+  for (int k = 0; k < nfft; ++k) tbuf[k] = (T)(rand() / (double)RAND_MAX - .5);
+
+  // make sure it DOESN'T give the right full spectrum answer
+  // if we've asked for half-spectrum
+  fft.SetFlag(fft.HalfSpectrum);
+  fft.fwd(freqBuf, tbuf);
+  VERIFY((size_t)freqBuf.size() == (size_t)((nfft >> 1) + 1));
+  VERIFY(T(fft_rmse(freqBuf, tbuf)) < test_precision<T>());  // gross check
+
+  fft.ClearFlag(fft.HalfSpectrum);
+  fft.fwd(freqBuf, tbuf);
+  VERIFY((size_t)freqBuf.size() == (size_t)nfft);
+  VERIFY(T(fft_rmse(freqBuf, tbuf)) < test_precision<T>());  // gross check
+
+  if (nfft & 1) return;  // odd FFTs get the wrong size inverse FFT
+
+  ScalarVector tbuf2;
+  fft.inv(tbuf2, freqBuf);
+  VERIFY(T(dif_rmse(tbuf, tbuf2)) < test_precision<T>());  // gross check
+
+  // verify that the Unscaled flag takes effect
+  ScalarVector tbuf3;
+  fft.SetFlag(fft.Unscaled);
+
+  fft.inv(tbuf3, freqBuf);
+
+  for (int k = 0; k < nfft; ++k) tbuf3[k] *= T(1. / nfft);
+
+  // for (size_t i=0;i<(size_t) tbuf.size();++i)
+  //     cout << "freqBuf=" << freqBuf[i] << " in2=" << tbuf3[i] << " -  in=" << tbuf[i] << " => " << (tbuf3[i] -
+  //     tbuf[i] ) <<  endl;
+
+  VERIFY(T(dif_rmse(tbuf, tbuf3)) < test_precision<T>());  // gross check
+
+  // verify that ClearFlag works
+  fft.ClearFlag(fft.Unscaled);
+  fft.inv(tbuf2, freqBuf);
+  VERIFY(T(dif_rmse(tbuf, tbuf2)) < test_precision<T>());  // gross check
+}
+
+template <typename T>
+void test_scalar(int nfft) {
+  test_scalar_generic<StdVectorContainer, T>(nfft);
+  // test_scalar_generic<EigenVectorContainer,T>(nfft);
+}
+
+template <int Container, typename T>
+void test_complex_generic(int nfft) {
+  typedef typename FFT<T>::Complex Complex;
+  typedef typename VectorType<Container, Complex>::type ComplexVector;
+
+  FFT<T> fft;
+
+  ComplexVector inbuf(nfft);
+  ComplexVector outbuf;
+  ComplexVector buf3;
+  for (int k = 0; k < nfft; ++k)
+    inbuf[k] = Complex((T)(rand() / (double)RAND_MAX - .5), (T)(rand() / (double)RAND_MAX - .5));
+  fft.fwd(outbuf, inbuf);
+
+  VERIFY(T(fft_rmse(outbuf, inbuf)) < test_precision<T>());  // gross check
+  fft.inv(buf3, outbuf);
+
+  VERIFY(T(dif_rmse(inbuf, buf3)) < test_precision<T>());  // gross check
+
+  // verify that the Unscaled flag takes effect
+  ComplexVector buf4;
+  fft.SetFlag(fft.Unscaled);
+  fft.inv(buf4, outbuf);
+  for (int k = 0; k < nfft; ++k) buf4[k] *= T(1. / nfft);
+  VERIFY(T(dif_rmse(inbuf, buf4)) < test_precision<T>());  // gross check
+
+  // verify that ClearFlag works
+  fft.ClearFlag(fft.Unscaled);
+  fft.inv(buf3, outbuf);
+  VERIFY(T(dif_rmse(inbuf, buf3)) < test_precision<T>());  // gross check
+}
+
+template <typename T>
+void test_complex(int nfft) {
+  test_complex_generic<StdVectorContainer, T>(nfft);
+  test_complex_generic<EigenVectorContainer, T>(nfft);
+}
+
+template <typename T, int nrows, int ncols>
+void test_complex2d() {
+  typedef typename Eigen::FFT<T>::Complex Complex;
+  FFT<T> fft;
+  Eigen::Matrix<Complex, nrows, ncols> src, src2, dst, dst2;
+
+  src = Eigen::Matrix<Complex, nrows, ncols>::Random();
+  // src =  Eigen::Matrix<Complex,nrows,ncols>::Identity();
+
+  for (int k = 0; k < ncols; k++) {
+    Eigen::Matrix<Complex, nrows, 1> tmpOut;
+    fft.fwd(tmpOut, src.col(k));
+    dst2.col(k) = tmpOut;
+  }
+
+  for (int k = 0; k < nrows; k++) {
+    Eigen::Matrix<Complex, 1, ncols> tmpOut;
+    fft.fwd(tmpOut, dst2.row(k));
+    dst2.row(k) = tmpOut;
+  }
+
+  fft.fwd2(dst.data(), src.data(), ncols, nrows);
+  fft.inv2(src2.data(), dst.data(), ncols, nrows);
+  VERIFY((src - src2).norm() < test_precision<T>());
+  VERIFY((dst - dst2).norm() < test_precision<T>());
+}
+
+inline void test_return_by_value(int len) {
+  VectorXf in;
+  VectorXf in1;
+  in.setRandom(len);
+  VectorXcf out1, out2;
+  FFT<float> fft;
+
+  fft.SetFlag(fft.HalfSpectrum);
+
+  fft.fwd(out1, in);
+  out2 = fft.fwd(in);
+  VERIFY((out1 - out2).norm() < test_precision<float>());
+  in1 = fft.inv(out1);
+  VERIFY((in1 - in).norm() < test_precision<float>());
+}
+
+EIGEN_DECLARE_TEST(FFTW) {
+  CALL_SUBTEST(test_return_by_value(32));
+  CALL_SUBTEST(test_complex<float>(32));
+  CALL_SUBTEST(test_complex<double>(32));
+  CALL_SUBTEST(test_complex<float>(256));
+  CALL_SUBTEST(test_complex<double>(256));
+  CALL_SUBTEST(test_complex<float>(3 * 8));
+  CALL_SUBTEST(test_complex<double>(3 * 8));
+  CALL_SUBTEST(test_complex<float>(5 * 32));
+  CALL_SUBTEST(test_complex<double>(5 * 32));
+  CALL_SUBTEST(test_complex<float>(2 * 3 * 4));
+  CALL_SUBTEST(test_complex<double>(2 * 3 * 4));
+  CALL_SUBTEST(test_complex<float>(2 * 3 * 4 * 5));
+  CALL_SUBTEST(test_complex<double>(2 * 3 * 4 * 5));
+  CALL_SUBTEST(test_complex<float>(2 * 3 * 4 * 5 * 7));
+  CALL_SUBTEST(test_complex<double>(2 * 3 * 4 * 5 * 7));
+
+  CALL_SUBTEST(test_scalar<float>(32));
+  CALL_SUBTEST(test_scalar<double>(32));
+  CALL_SUBTEST(test_scalar<float>(45));
+  CALL_SUBTEST(test_scalar<double>(45));
+  CALL_SUBTEST(test_scalar<float>(50));
+  CALL_SUBTEST(test_scalar<double>(50));
+  CALL_SUBTEST(test_scalar<float>(256));
+  CALL_SUBTEST(test_scalar<double>(256));
+  CALL_SUBTEST(test_scalar<float>(2 * 3 * 4 * 5 * 7));
+  CALL_SUBTEST(test_scalar<double>(2 * 3 * 4 * 5 * 7));
+
+#if defined EIGEN_HAS_FFTWL || defined EIGEN_POCKETFFT_DEFAULT
+  CALL_SUBTEST(test_complex<long double>(32));
+  CALL_SUBTEST(test_complex<long double>(256));
+  CALL_SUBTEST(test_complex<long double>(3 * 8));
+  CALL_SUBTEST(test_complex<long double>(5 * 32));
+  CALL_SUBTEST(test_complex<long double>(2 * 3 * 4));
+  CALL_SUBTEST(test_complex<long double>(2 * 3 * 4 * 5));
+  CALL_SUBTEST(test_complex<long double>(2 * 3 * 4 * 5 * 7));
+
+  CALL_SUBTEST(test_scalar<long double>(32));
+  CALL_SUBTEST(test_scalar<long double>(45));
+  CALL_SUBTEST(test_scalar<long double>(50));
+  CALL_SUBTEST(test_scalar<long double>(256));
+  CALL_SUBTEST(test_scalar<long double>(2 * 3 * 4 * 5 * 7));
+
+  CALL_SUBTEST((test_complex2d<long double, 2 * 3 * 4, 2 * 3 * 4>()));
+  CALL_SUBTEST((test_complex2d<long double, 3 * 4 * 5, 3 * 4 * 5>()));
+  CALL_SUBTEST((test_complex2d<long double, 24, 60>()));
+  CALL_SUBTEST((test_complex2d<long double, 60, 24>()));
+// fail to build since Eigen limit the stack allocation size,too big here.
+// CALL_SUBTEST( ( test_complex2d<long double, 256, 256> () ) );
+#endif
+#if defined EIGEN_FFTW_DEFAULT || defined EIGEN_POCKETFFT_DEFAULT || defined EIGEN_MKL_DEFAULT
+  CALL_SUBTEST((test_complex2d<float, 24, 24>()));
+  CALL_SUBTEST((test_complex2d<float, 60, 60>()));
+  CALL_SUBTEST((test_complex2d<float, 24, 60>()));
+  CALL_SUBTEST((test_complex2d<float, 60, 24>()));
+#endif
+#if defined EIGEN_FFTW_DEFAULT || defined EIGEN_POCKETFFT_DEFAULT || defined EIGEN_MKL_DEFAULT
+  CALL_SUBTEST((test_complex2d<double, 24, 24>()));
+  CALL_SUBTEST((test_complex2d<double, 60, 60>()));
+  CALL_SUBTEST((test_complex2d<double, 24, 60>()));
+  CALL_SUBTEST((test_complex2d<double, 60, 24>()));
+#endif
+}
diff --git a/libs/eigen/unsupported/test/forward_adolc.cpp b/libs/eigen/unsupported/test/forward_adolc.cpp
index 14a909d..27d09dd 100644
--- a/libs/eigen/unsupported/test/forward_adolc.cpp
+++ b/libs/eigen/unsupported/test/forward_adolc.cpp
@@ -20,10 +20,10 @@ EIGEN_DONT_INLINE typename Vector::Scalar foo(const Vector& p)
   return (p-Vector(Scalar(-1),Scalar(1.))).norm() + (p.array().sqrt().abs() * p.array().sin()).sum() + p.dot(p);
 }
 
-template<typename _Scalar, int NX=Dynamic, int NY=Dynamic>
+template<typename Scalar_, int NX=Dynamic, int NY=Dynamic>
 struct TestFunc1
 {
-  typedef _Scalar Scalar;
+  typedef Scalar_ Scalar;
   enum {
     InputsAtCompileTime = NX,
     ValuesAtCompileTime = NY
diff --git a/libs/eigen/unsupported/test/idrstabl.cpp b/libs/eigen/unsupported/test/idrstabl.cpp
new file mode 100644
index 0000000..7e40dd6
--- /dev/null
+++ b/libs/eigen/unsupported/test/idrstabl.cpp
@@ -0,0 +1,28 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2011 Gael Guennebaud <g.gael@free.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "../../test/sparse_solver.h"
+#include <unsupported/Eigen/IterativeSolvers>
+
+template <typename T>
+void test_idrstabl_T() {
+  IDRSTABL<SparseMatrix<T>, DiagonalPreconditioner<T> > idrstabl_colmajor_diag;
+  IDRSTABL<SparseMatrix<T>, IncompleteLUT<T> > idrstabl_colmajor_ilut;
+
+  idrstabl_colmajor_diag.setTolerance(NumTraits<T>::epsilon() * 4);
+  idrstabl_colmajor_ilut.setTolerance(NumTraits<T>::epsilon() * 4);
+
+  CALL_SUBTEST(check_sparse_square_solving(idrstabl_colmajor_diag));
+  CALL_SUBTEST(check_sparse_square_solving(idrstabl_colmajor_ilut));
+}
+
+EIGEN_DECLARE_TEST(idrstabl) {
+  CALL_SUBTEST_1((test_idrstabl_T<double>()));
+  CALL_SUBTEST_2((test_idrstabl_T<std::complex<double> >()));
+}
diff --git a/libs/eigen/unsupported/test/kronecker_product.cpp b/libs/eigen/unsupported/test/kronecker_product.cpp
index b5b764c..3bac01d 100644
--- a/libs/eigen/unsupported/test/kronecker_product.cpp
+++ b/libs/eigen/unsupported/test/kronecker_product.cpp
@@ -29,7 +29,7 @@ void check_kronecker_product(const MatrixType& ab)
 {
   VERIFY_IS_EQUAL(ab.rows(), 6);
   VERIFY_IS_EQUAL(ab.cols(), 6);
-  VERIFY_IS_EQUAL(ab.nonZeros(),  36);
+  VERIFY_IS_EQUAL(ab.size(),  36);
   VERIFY_IS_APPROX(ab.coeff(0,0), -0.4017367630386106);
   VERIFY_IS_APPROX(ab.coeff(0,1),  0.1056863433932735);
   VERIFY_IS_APPROX(ab.coeff(0,2), -0.7255206194554212);
diff --git a/libs/eigen/unsupported/test/levenberg_marquardt.cpp b/libs/eigen/unsupported/test/levenberg_marquardt.cpp
index 7f9a81c..d0748d1 100644
--- a/libs/eigen/unsupported/test/levenberg_marquardt.cpp
+++ b/libs/eigen/unsupported/test/levenberg_marquardt.cpp
@@ -24,7 +24,7 @@
 using std::sqrt;
 
 // tolerance for chekcing number of iterations
-#define LM_EVAL_COUNT_TOL 4/3
+#define LM_EVAL_COUNT_TOL 2
 
 struct lmder_functor : DenseFunctor<double>
 {
@@ -75,11 +75,11 @@ void testLmder1()
   lmder_functor functor;
   LevenbergMarquardt<lmder_functor> lm(functor);
   info = lm.lmder1(x);
-
+  EIGEN_UNUSED_VARIABLE(info)
   // check return value
-  VERIFY_IS_EQUAL(info, 1);
-  VERIFY_IS_EQUAL(lm.nfev(), 6);
-  VERIFY_IS_EQUAL(lm.njev(), 5);
+  // VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(lm.nfev(), 6);
+  // VERIFY_IS_EQUAL(lm.njev(), 5);
 
   // check norm
   VERIFY_IS_APPROX(lm.fvec().blueNorm(), 0.09063596);
@@ -104,11 +104,12 @@ void testLmder()
   lmder_functor functor;
   LevenbergMarquardt<lmder_functor> lm(functor);
   info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check return values
-  VERIFY_IS_EQUAL(info, 1);
-  VERIFY_IS_EQUAL(lm.nfev(), 6);
-  VERIFY_IS_EQUAL(lm.njev(), 5);
+  // VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(lm.nfev(), 6);
+  // VERIFY_IS_EQUAL(lm.njev(), 5);
 
   // check norm
   fnorm = lm.fvec().blueNorm();
@@ -177,9 +178,10 @@ void testLmdif1()
   lmdif_functor functor;
   DenseIndex nfev;
   info = LevenbergMarquardt<lmdif_functor>::lmdif1(functor, x, &nfev);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check return value
-  VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(info, 1);
 //   VERIFY_IS_EQUAL(nfev, 26);
 
   // check norm
@@ -208,9 +210,10 @@ void testLmdif()
   NumericalDiff<lmdif_functor> numDiff(functor);
   LevenbergMarquardt<NumericalDiff<lmdif_functor> > lm(numDiff);
   info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check return values
-  VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(info, 1);
 //   VERIFY_IS_EQUAL(lm.nfev(), 26);
 
   // check norm
@@ -293,11 +296,12 @@ void testNistChwirut2(void)
   chwirut2_functor functor;
   LevenbergMarquardt<chwirut2_functor> lm(functor);
   info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check return value
-  VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(info, 1);
 //   VERIFY_IS_EQUAL(lm.nfev(), 10);
-  VERIFY_IS_EQUAL(lm.njev(), 8);
+  // VERIFY_IS_EQUAL(lm.njev(), 8);
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 5.1304802941E+02);
   // check x
@@ -314,11 +318,12 @@ void testNistChwirut2(void)
   lm.setFtol(1.E6*NumTraits<double>::epsilon());
   lm.setXtol(1.E6*NumTraits<double>::epsilon());
   info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check return value
-  VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(info, 1);
 //   VERIFY_IS_EQUAL(lm.nfev(), 7);
-  VERIFY_IS_EQUAL(lm.njev(), 6);
+  // VERIFY_IS_EQUAL(lm.njev(), 6);
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 5.1304802941E+02);
   // check x
@@ -373,11 +378,12 @@ void testNistMisra1a(void)
   misra1a_functor functor;
   LevenbergMarquardt<misra1a_functor> lm(functor);
   info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check return value
-  VERIFY_IS_EQUAL(info, 1);
-  VERIFY_IS_EQUAL(lm.nfev(), 19);
-  VERIFY_IS_EQUAL(lm.njev(), 15);
+  // VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(lm.nfev(), 19);
+  // VERIFY_IS_EQUAL(lm.njev(), 15);
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 1.2455138894E-01);
   // check x
@@ -390,11 +396,12 @@ void testNistMisra1a(void)
   x<< 250., 0.0005;
   // do the computation
   info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check return value
-  VERIFY_IS_EQUAL(info, 1);
-  VERIFY_IS_EQUAL(lm.nfev(), 5);
-  VERIFY_IS_EQUAL(lm.njev(), 4);
+  // VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(lm.nfev(), 5);
+  // VERIFY_IS_EQUAL(lm.njev(), 4);
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 1.2455138894E-01);
   // check x
@@ -464,11 +471,12 @@ void testNistHahn1(void)
   hahn1_functor functor;
   LevenbergMarquardt<hahn1_functor> lm(functor);
   info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check return value
-  VERIFY_IS_EQUAL(info, 1);
-  VERIFY_IS_EQUAL(lm.nfev(), 11);
-  VERIFY_IS_EQUAL(lm.njev(), 10);
+  // VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(lm.nfev(), 11);
+  // VERIFY_IS_EQUAL(lm.njev(), 10);
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 1.5324382854E+00);
   // check x
@@ -486,11 +494,12 @@ void testNistHahn1(void)
   x<< .1, -.1, .005, -.000001, -.005, .0001, -.0000001;
   // do the computation
   info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check return value
-  VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(info, 1);
 //   VERIFY_IS_EQUAL(lm.nfev(), 11);
-  VERIFY_IS_EQUAL(lm.njev(), 10);
+  // VERIFY_IS_EQUAL(lm.njev(), 10);
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 1.5324382854E+00);
   // check x
@@ -550,11 +559,12 @@ void testNistMisra1d(void)
   misra1d_functor functor;
   LevenbergMarquardt<misra1d_functor> lm(functor);
   info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check return value
-  VERIFY_IS_EQUAL(info, 1);
-  VERIFY_IS_EQUAL(lm.nfev(), 9);
-  VERIFY_IS_EQUAL(lm.njev(), 7);
+  // VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(lm.nfev(), 9);
+  // VERIFY_IS_EQUAL(lm.njev(), 7);
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 5.6419295283E-02);
   // check x
@@ -567,11 +577,12 @@ void testNistMisra1d(void)
   x<< 450., 0.0003;
   // do the computation
   info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check return value
-  VERIFY_IS_EQUAL(info, 1);
-  VERIFY_IS_EQUAL(lm.nfev(), 4);
-  VERIFY_IS_EQUAL(lm.njev(), 3);
+  // VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(lm.nfev(), 4);
+  // VERIFY_IS_EQUAL(lm.njev(), 3);
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 5.6419295283E-02);
   // check x
@@ -628,11 +639,12 @@ void testNistLanczos1(void)
   lanczos1_functor functor;
   LevenbergMarquardt<lanczos1_functor> lm(functor);
   info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check return value
-  VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeErrorTooSmall);
-  VERIFY_IS_EQUAL(lm.nfev(), 79);
-  VERIFY_IS_EQUAL(lm.njev(), 72);
+  // VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeErrorTooSmall);
+  // VERIFY_IS_EQUAL(lm.nfev(), 79);
+  // VERIFY_IS_EQUAL(lm.njev(), 72);
   // check norm^2
   VERIFY(lm.fvec().squaredNorm() <= 1.4307867721E-25);
   // check x
@@ -649,11 +661,12 @@ void testNistLanczos1(void)
   x<< 0.5, 0.7, 3.6, 4.2, 4., 6.3;
   // do the computation
   info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check return value
-  VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeErrorTooSmall);
-  VERIFY_IS_EQUAL(lm.nfev(), 9);
-  VERIFY_IS_EQUAL(lm.njev(), 8);
+  // VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeErrorTooSmall);
+  // VERIFY_IS_EQUAL(lm.nfev(), 9);
+  // VERIFY_IS_EQUAL(lm.njev(), 8);
   // check norm^2
   VERIFY(lm.fvec().squaredNorm() <= 1.4307867721E-25);
   // check x
@@ -714,11 +727,12 @@ void testNistRat42(void)
   rat42_functor functor;
   LevenbergMarquardt<rat42_functor> lm(functor);
   info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check return value
-  VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeReductionTooSmall);
-  VERIFY_IS_EQUAL(lm.nfev(), 10);
-  VERIFY_IS_EQUAL(lm.njev(), 8);
+  // VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeReductionTooSmall);
+  // VERIFY_IS_EQUAL(lm.nfev(), 10);
+  // VERIFY_IS_EQUAL(lm.njev(), 8);
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 8.0565229338E+00);
   // check x
@@ -732,11 +746,12 @@ void testNistRat42(void)
   x<< 75., 2.5, 0.07;
   // do the computation
   info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check return value
-  VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeReductionTooSmall);
-  VERIFY_IS_EQUAL(lm.nfev(), 6);
-  VERIFY_IS_EQUAL(lm.njev(), 5);
+  // VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeReductionTooSmall);
+  // VERIFY_IS_EQUAL(lm.nfev(), 6);
+  // VERIFY_IS_EQUAL(lm.njev(), 5);
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 8.0565229338E+00);
   // check x
@@ -787,14 +802,15 @@ void testNistMGH10(void)
   /*
    * First try
    */
-  x<< 2., 400000., 25000.;
+  x << 2., 400000., 25000.;
   // do the computation
   MGH10_functor functor;
   LevenbergMarquardt<MGH10_functor> lm(functor);
   info = lm.minimize(x);
-  ++g_test_level;
-  VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeReductionTooSmall);
-  --g_test_level;
+  EIGEN_UNUSED_VARIABLE(info)
+  // ++g_test_level;
+  // VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeReductionTooSmall);
+  // --g_test_level;
   // was: VERIFY_IS_EQUAL(info, 1);
 
   // check norm^2
@@ -805,11 +821,11 @@ void testNistMGH10(void)
   VERIFY_IS_APPROX(x[2], 3.4522363462E+02);
   
   // check return value
-
-  ++g_test_level;
-  VERIFY_IS_EQUAL(lm.nfev(), 284 );
-  VERIFY_IS_EQUAL(lm.njev(), 249 );
-  --g_test_level;
+  
+  // ++g_test_level;
+  // VERIFY_IS_EQUAL(lm.nfev(), 284 );
+  // VERIFY_IS_EQUAL(lm.njev(), 249 );
+  // --g_test_level;
   VERIFY(lm.nfev() < 284 * LM_EVAL_COUNT_TOL);
   VERIFY(lm.njev() < 249 * LM_EVAL_COUNT_TOL);
 
@@ -819,11 +835,12 @@ void testNistMGH10(void)
   x<< 0.02, 4000., 250.;
   // do the computation
   info = lm.minimize(x);
-  ++g_test_level;
-  VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeReductionTooSmall);
-  // was: VERIFY_IS_EQUAL(info, 1);
-  --g_test_level;
-
+  EIGEN_UNUSED_VARIABLE(info)
+  // ++g_test_level;
+  // VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeReductionTooSmall);
+  // // was: VERIFY_IS_EQUAL(info, 1);
+  // --g_test_level;
+  
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 8.7945855171E+01);
   // check x
@@ -832,10 +849,10 @@ void testNistMGH10(void)
   VERIFY_IS_APPROX(x[2], 3.4522363462E+02);
   
   // check return value
-  ++g_test_level;
-  VERIFY_IS_EQUAL(lm.nfev(), 126);
-  VERIFY_IS_EQUAL(lm.njev(), 116);
-  --g_test_level;
+  // ++g_test_level;
+  // VERIFY_IS_EQUAL(lm.nfev(), 126);
+  // VERIFY_IS_EQUAL(lm.njev(), 116);
+  // --g_test_level;
   VERIFY(lm.nfev() < 126 * LM_EVAL_COUNT_TOL);
   VERIFY(lm.njev() < 116 * LM_EVAL_COUNT_TOL);
 }
@@ -888,6 +905,7 @@ void testNistBoxBOD(void)
   lm.setXtol(1.E6*NumTraits<double>::epsilon());
   lm.setFactor(10);
   info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 1.1680088766E+03);
@@ -896,9 +914,9 @@ void testNistBoxBOD(void)
   VERIFY_IS_APPROX(x[1], 5.4723748542E-01);
   
   // check return value
-  VERIFY_IS_EQUAL(info, 1);
-  VERIFY(lm.nfev() < 31); // 31
-  VERIFY(lm.njev() < 25); // 25
+  // VERIFY_IS_EQUAL(info, 1);
+  // VERIFY(lm.nfev() < 31); // 31
+  // VERIFY(lm.njev() < 25); // 25
 
   /*
    * Second try
@@ -909,13 +927,14 @@ void testNistBoxBOD(void)
   lm.setFtol(NumTraits<double>::epsilon());
   lm.setXtol( NumTraits<double>::epsilon());
   info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check return value
-  VERIFY_IS_EQUAL(info, 1); 
-  ++g_test_level;
-  VERIFY_IS_EQUAL(lm.nfev(), 16 );
-  VERIFY_IS_EQUAL(lm.njev(), 15 );
-  --g_test_level;
+  // VERIFY_IS_EQUAL(info, 1);
+  // ++g_test_level;
+  // VERIFY_IS_EQUAL(lm.nfev(), 16 );
+  // VERIFY_IS_EQUAL(lm.njev(), 15 );
+  // --g_test_level;
   VERIFY(lm.nfev() < 16 * LM_EVAL_COUNT_TOL);
   VERIFY(lm.njev() < 15 * LM_EVAL_COUNT_TOL);
   // check norm^2
@@ -975,6 +994,7 @@ void testNistMGH17(void)
   lm.setXtol(NumTraits<double>::epsilon());
   lm.setMaxfev(1000);
   info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 5.4648946975E-05);
@@ -987,8 +1007,8 @@ void testNistMGH17(void)
   
     // check return value
 //   VERIFY_IS_EQUAL(info, 2);  //FIXME Use (lm.info() == Success)
-  VERIFY(lm.nfev() < 700 ); // 602
-  VERIFY(lm.njev() < 600 ); // 545
+  // VERIFY(lm.nfev() < 700 ); // 602
+  // VERIFY(lm.njev() < 600 ); // 545
 
   /*
    * Second try
@@ -997,11 +1017,12 @@ void testNistMGH17(void)
   // do the computation
   lm.resetParameters();
   info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check return value
-  VERIFY_IS_EQUAL(info, 1);
-  VERIFY_IS_EQUAL(lm.nfev(), 18);
-  VERIFY_IS_EQUAL(lm.njev(), 15);
+  // VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(lm.nfev(), 18);
+  // VERIFY_IS_EQUAL(lm.njev(), 15);
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 5.4648946975E-05);
   // check x
@@ -1063,6 +1084,7 @@ void testNistMGH09(void)
   LevenbergMarquardt<MGH09_functor> lm(functor);
   lm.setMaxfev(1000);
   info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 3.0750560385E-04);
@@ -1072,9 +1094,9 @@ void testNistMGH09(void)
   VERIFY_IS_APPROX(x[2], 0.12305309914); // should be 1.2305650693E-01
   VERIFY_IS_APPROX(x[3], 0.13605395375); // should be 1.3606233068E-01
   // check return value
-  VERIFY_IS_EQUAL(info, 1); 
-  VERIFY(lm.nfev() < 510 ); // 490
-  VERIFY(lm.njev() < 400 ); // 376
+  // VERIFY_IS_EQUAL(info, 1); 
+  // VERIFY(lm.nfev() < 510 ); // 490
+  // VERIFY(lm.njev() < 400 ); // 376
 
   /*
    * Second try
@@ -1083,11 +1105,12 @@ void testNistMGH09(void)
   // do the computation
   lm.resetParameters();
   info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check return value
-  VERIFY_IS_EQUAL(info, 1);
-  VERIFY_IS_EQUAL(lm.nfev(), 18);
-  VERIFY_IS_EQUAL(lm.njev(), 16);
+  // VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(lm.nfev(), 18);
+  // VERIFY_IS_EQUAL(lm.njev(), 16);
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 3.0750560385E-04);
   // check x
@@ -1149,11 +1172,12 @@ void testNistBennett5(void)
   LevenbergMarquardt<Bennett5_functor> lm(functor);
   lm.setMaxfev(1000);
   info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check return value
-  VERIFY_IS_EQUAL(info, 1);
-  VERIFY_IS_EQUAL(lm.nfev(), 758);
-  VERIFY_IS_EQUAL(lm.njev(), 744);
+  // VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(lm.nfev(), 758);
+  // VERIFY_IS_EQUAL(lm.njev(), 744);
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 5.2404744073E-04);
   // check x
@@ -1167,11 +1191,12 @@ void testNistBennett5(void)
   // do the computation
   lm.resetParameters();
   info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check return value
-  VERIFY_IS_EQUAL(info, 1);
-  VERIFY_IS_EQUAL(lm.nfev(), 203);
-  VERIFY_IS_EQUAL(lm.njev(), 192);
+  // VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(lm.nfev(), 203);
+  // VERIFY_IS_EQUAL(lm.njev(), 192);
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 5.2404744073E-04);
   // check x
@@ -1237,11 +1262,12 @@ void testNistThurber(void)
   lm.setFtol(1.E4*NumTraits<double>::epsilon());
   lm.setXtol(1.E4*NumTraits<double>::epsilon());
   info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check return value
-  VERIFY_IS_EQUAL(info, 1);
-  VERIFY_IS_EQUAL(lm.nfev(), 39);
-  VERIFY_IS_EQUAL(lm.njev(), 36);
+  // VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(lm.nfev(), 39);
+  // VERIFY_IS_EQUAL(lm.njev(), 36);
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 5.6427082397E+03);
   // check x
@@ -1262,11 +1288,12 @@ void testNistThurber(void)
   lm.setFtol(1.E4*NumTraits<double>::epsilon());
   lm.setXtol(1.E4*NumTraits<double>::epsilon());
   info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check return value
-  VERIFY_IS_EQUAL(info, 1);
-  VERIFY_IS_EQUAL(lm.nfev(), 29);
-  VERIFY_IS_EQUAL(lm.njev(), 28);
+  // VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(lm.nfev(), 29);
+  // VERIFY_IS_EQUAL(lm.njev(), 28);
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 5.6427082397E+03);
   // check x
@@ -1329,11 +1356,12 @@ void testNistRat43(void)
   lm.setFtol(1.E6*NumTraits<double>::epsilon());
   lm.setXtol(1.E6*NumTraits<double>::epsilon());
   info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check return value
-  VERIFY_IS_EQUAL(info, 1);
-  VERIFY_IS_EQUAL(lm.nfev(), 27);
-  VERIFY_IS_EQUAL(lm.njev(), 20);
+  // VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(lm.nfev(), 27);
+  // VERIFY_IS_EQUAL(lm.njev(), 20);
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 8.7864049080E+03);
   // check x
@@ -1351,11 +1379,12 @@ void testNistRat43(void)
   lm.setFtol(1.E5*NumTraits<double>::epsilon());
   lm.setXtol(1.E5*NumTraits<double>::epsilon());
   info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check return value
-  VERIFY_IS_EQUAL(info, 1);
-  VERIFY_IS_EQUAL(lm.nfev(), 9);
-  VERIFY_IS_EQUAL(lm.njev(), 8);
+  // VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(lm.nfev(), 9);
+  // VERIFY_IS_EQUAL(lm.njev(), 8);
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 8.7864049080E+03);
   // check x
@@ -1414,11 +1443,12 @@ void testNistEckerle4(void)
   eckerle4_functor functor;
   LevenbergMarquardt<eckerle4_functor> lm(functor);
   info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check return value
-  VERIFY_IS_EQUAL(info, 1);
-  VERIFY_IS_EQUAL(lm.nfev(), 18);
-  VERIFY_IS_EQUAL(lm.njev(), 15);
+  // VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(lm.nfev(), 18);
+  // VERIFY_IS_EQUAL(lm.njev(), 15);
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 1.4635887487E-03);
   // check x
@@ -1432,11 +1462,12 @@ void testNistEckerle4(void)
   x<< 1.5, 5., 450.;
   // do the computation
   info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)
 
   // check return value
-  VERIFY_IS_EQUAL(info, 1);
-  VERIFY_IS_EQUAL(lm.nfev(), 7);
-  VERIFY_IS_EQUAL(lm.njev(), 6);
+  // VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(lm.nfev(), 7);
+  // VERIFY_IS_EQUAL(lm.njev(), 6);
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 1.4635887487E-03);
   // check x
diff --git a/libs/eigen/unsupported/test/matrix_power.cpp b/libs/eigen/unsupported/test/matrix_power.cpp
index dbaf9db..b13eb5a 100644
--- a/libs/eigen/unsupported/test/matrix_power.cpp
+++ b/libs/eigen/unsupported/test/matrix_power.cpp
@@ -104,8 +104,8 @@ void testSingular(const MatrixType& m_const, const typename MatrixType::RealScal
   MatrixType& m = const_cast<MatrixType&>(m_const);
 
   const int IsComplex = NumTraits<typename internal::traits<MatrixType>::Scalar>::IsComplex;
-  typedef typename internal::conditional<IsComplex, TriangularView<MatrixType,Upper>, const MatrixType&>::type TriangularType;
-  typename internal::conditional< IsComplex, ComplexSchur<MatrixType>, RealSchur<MatrixType> >::type schur;
+  typedef std::conditional_t<IsComplex, TriangularView<MatrixType,Upper>, const MatrixType&> TriangularType;
+  std::conditional_t< IsComplex, ComplexSchur<MatrixType>, RealSchur<MatrixType> > schur;
   MatrixType T;
 
   for (int i=0; i < g_repeat; ++i) {
@@ -171,7 +171,7 @@ EIGEN_DECLARE_TEST(matrix_power)
   CALL_SUBTEST_5(testGeneral(Matrix3cf(),        1e-4f));
   CALL_SUBTEST_8(testGeneral(Matrix4f(),         1e-4f));
   CALL_SUBTEST_6(testGeneral(MatrixXf(2,2),      1e-3f)); // see bug 614
-  CALL_SUBTEST_9(testGeneral(MatrixXe(7,7),      1e-13L));
+  CALL_SUBTEST_9(testGeneral(MatrixXe(7,7),      1e-12L));
   CALL_SUBTEST_10(testGeneral(Matrix3d(),        1e-13));
   CALL_SUBTEST_11(testGeneral(Matrix3f(),        1e-4f));
   CALL_SUBTEST_12(testGeneral(Matrix3e(),        1e-13L));
@@ -184,7 +184,7 @@ EIGEN_DECLARE_TEST(matrix_power)
   CALL_SUBTEST_5(testSingular(Matrix3cf(),        1e-4f));
   CALL_SUBTEST_8(testSingular(Matrix4f(),         1e-4f));
   CALL_SUBTEST_6(testSingular(MatrixXf(2,2),      1e-3f));
-  CALL_SUBTEST_9(testSingular(MatrixXe(7,7),      1e-13L));
+  CALL_SUBTEST_9(testSingular(MatrixXe(7,7),      1e-12L));
   CALL_SUBTEST_10(testSingular(Matrix3d(),        1e-13));
   CALL_SUBTEST_11(testSingular(Matrix3f(),        1e-4f));
   CALL_SUBTEST_12(testSingular(Matrix3e(),        1e-13L));
@@ -197,7 +197,7 @@ EIGEN_DECLARE_TEST(matrix_power)
   CALL_SUBTEST_5(testLogThenExp(Matrix3cf(),        1e-4f));
   CALL_SUBTEST_8(testLogThenExp(Matrix4f(),         1e-4f));
   CALL_SUBTEST_6(testLogThenExp(MatrixXf(2,2),      1e-3f));
-  CALL_SUBTEST_9(testLogThenExp(MatrixXe(7,7),      1e-13L));
+  CALL_SUBTEST_9(testLogThenExp(MatrixXe(7,7),      1e-12L));
   CALL_SUBTEST_10(testLogThenExp(Matrix3d(),        1e-13));
   CALL_SUBTEST_11(testLogThenExp(Matrix3f(),        1e-4f));
   CALL_SUBTEST_12(testLogThenExp(Matrix3e(),        1e-13L));
diff --git a/libs/eigen/unsupported/test/mklfft.cpp b/libs/eigen/unsupported/test/mklfft.cpp
new file mode 100644
index 0000000..631dd20
--- /dev/null
+++ b/libs/eigen/unsupported/test/mklfft.cpp
@@ -0,0 +1,2 @@
+#define EIGEN_MKL_DEFAULT 1
+#include "fft_test_shared.h"
diff --git a/libs/eigen/unsupported/test/pocketfft.cpp b/libs/eigen/unsupported/test/pocketfft.cpp
new file mode 100644
index 0000000..5e8a0b6
--- /dev/null
+++ b/libs/eigen/unsupported/test/pocketfft.cpp
@@ -0,0 +1,2 @@
+#define EIGEN_POCKETFFT_DEFAULT 1
+#include "fft_test_shared.h"
diff --git a/libs/eigen/unsupported/test/polynomialsolver.cpp b/libs/eigen/unsupported/test/polynomialsolver.cpp
index 4ff9bda..fed9f89 100644
--- a/libs/eigen/unsupported/test/polynomialsolver.cpp
+++ b/libs/eigen/unsupported/test/polynomialsolver.cpp
@@ -179,29 +179,29 @@ void evalSolverSugarFunction( const POLYNOMIAL& pols, const ROOTS& roots, const
 }
 
 
-template<typename _Scalar, int _Deg>
+template<typename Scalar_, int Deg_>
 void polynomialsolver(int deg)
 {
-  typedef typename NumTraits<_Scalar>::Real RealScalar;
-  typedef internal::increment_if_fixed_size<_Deg>     Dim;
-  typedef Matrix<_Scalar,Dim::ret,1>                  PolynomialType;
-  typedef Matrix<_Scalar,_Deg,1>                      EvalRootsType;
-  typedef Matrix<RealScalar,_Deg,1>                   RealRootsType;
+  typedef typename NumTraits<Scalar_>::Real RealScalar;
+  typedef internal::increment_if_fixed_size<Deg_>     Dim;
+  typedef Matrix<Scalar_,Dim::ret,1>                  PolynomialType;
+  typedef Matrix<Scalar_,Deg_,1>                      EvalRootsType;
+  typedef Matrix<RealScalar,Deg_,1>                   RealRootsType;
 
   cout << "Standard cases" << endl;
   PolynomialType pols = PolynomialType::Random(deg+1);
-  evalSolver<_Deg,PolynomialType>( pols );
+  evalSolver<Deg_,PolynomialType>( pols );
 
   cout << "Hard cases" << endl;
-  _Scalar multipleRoot = internal::random<_Scalar>();
+  Scalar_ multipleRoot = internal::random<Scalar_>();
   EvalRootsType allRoots = EvalRootsType::Constant(deg,multipleRoot);
   roots_to_monicPolynomial( allRoots, pols );
-  evalSolver<_Deg,PolynomialType>( pols );
+  evalSolver<Deg_,PolynomialType>( pols );
 
   cout << "Test sugar" << endl;
   RealRootsType realRoots = RealRootsType::Random(deg);
   roots_to_monicPolynomial( realRoots, pols );
-  evalSolverSugarFunction<_Deg>(
+  evalSolverSugarFunction<Deg_>(
       pols,
       realRoots.template cast <std::complex<RealScalar> >().eval(),
       realRoots );
diff --git a/libs/eigen/unsupported/test/polynomialutils.cpp b/libs/eigen/unsupported/test/polynomialutils.cpp
index 8ff4519..15a0f27 100644
--- a/libs/eigen/unsupported/test/polynomialutils.cpp
+++ b/libs/eigen/unsupported/test/polynomialutils.cpp
@@ -25,12 +25,12 @@ struct increment_if_fixed_size
 }
 }
 
-template<typename _Scalar, int _Deg>
+template<typename Scalar_, int Deg_>
 void realRoots_to_monicPolynomial_test(int deg)
 {
-  typedef internal::increment_if_fixed_size<_Deg>            Dim;
-  typedef Matrix<_Scalar,Dim::ret,1>                  PolynomialType;
-  typedef Matrix<_Scalar,_Deg,1>                      EvalRootsType;
+  typedef internal::increment_if_fixed_size<Deg_>            Dim;
+  typedef Matrix<Scalar_,Dim::ret,1>                  PolynomialType;
+  typedef Matrix<Scalar_,Deg_,1>                      EvalRootsType;
 
   PolynomialType pols(deg+1);
   EvalRootsType roots = EvalRootsType::Random(deg);
@@ -40,43 +40,43 @@ void realRoots_to_monicPolynomial_test(int deg)
   for( int i=0; i<roots.size(); ++i ){
     evr[i] = std::abs( poly_eval( pols, roots[i] ) ); }
 
-  bool evalToZero = evr.isZero( test_precision<_Scalar>() );
+  bool evalToZero = evr.isZero( test_precision<Scalar_>() );
   if( !evalToZero ){
     cerr << evr.transpose() << endl; }
   VERIFY( evalToZero );
 }
 
-template<typename _Scalar> void realRoots_to_monicPolynomial_scalar()
+template<typename Scalar_> void realRoots_to_monicPolynomial_scalar()
 {
-  CALL_SUBTEST_2( (realRoots_to_monicPolynomial_test<_Scalar,2>(2)) );
-  CALL_SUBTEST_3( (realRoots_to_monicPolynomial_test<_Scalar,3>(3)) );
-  CALL_SUBTEST_4( (realRoots_to_monicPolynomial_test<_Scalar,4>(4)) );
-  CALL_SUBTEST_5( (realRoots_to_monicPolynomial_test<_Scalar,5>(5)) );
-  CALL_SUBTEST_6( (realRoots_to_monicPolynomial_test<_Scalar,6>(6)) );
-  CALL_SUBTEST_7( (realRoots_to_monicPolynomial_test<_Scalar,7>(7)) );
-  CALL_SUBTEST_8( (realRoots_to_monicPolynomial_test<_Scalar,17>(17)) );
+  CALL_SUBTEST_2( (realRoots_to_monicPolynomial_test<Scalar_,2>(2)) );
+  CALL_SUBTEST_3( (realRoots_to_monicPolynomial_test<Scalar_,3>(3)) );
+  CALL_SUBTEST_4( (realRoots_to_monicPolynomial_test<Scalar_,4>(4)) );
+  CALL_SUBTEST_5( (realRoots_to_monicPolynomial_test<Scalar_,5>(5)) );
+  CALL_SUBTEST_6( (realRoots_to_monicPolynomial_test<Scalar_,6>(6)) );
+  CALL_SUBTEST_7( (realRoots_to_monicPolynomial_test<Scalar_,7>(7)) );
+  CALL_SUBTEST_8( (realRoots_to_monicPolynomial_test<Scalar_,17>(17)) );
 
-  CALL_SUBTEST_9( (realRoots_to_monicPolynomial_test<_Scalar,Dynamic>(
+  CALL_SUBTEST_9( (realRoots_to_monicPolynomial_test<Scalar_,Dynamic>(
           internal::random<int>(18,26) )) );
 }
 
 
 
 
-template<typename _Scalar, int _Deg>
+template<typename Scalar_, int Deg_>
 void CauchyBounds(int deg)
 {
-  typedef internal::increment_if_fixed_size<_Deg>            Dim;
-  typedef Matrix<_Scalar,Dim::ret,1>                  PolynomialType;
-  typedef Matrix<_Scalar,_Deg,1>                      EvalRootsType;
+  typedef internal::increment_if_fixed_size<Deg_>            Dim;
+  typedef Matrix<Scalar_,Dim::ret,1>                  PolynomialType;
+  typedef Matrix<Scalar_,Deg_,1>                      EvalRootsType;
 
   PolynomialType pols(deg+1);
   EvalRootsType roots = EvalRootsType::Random(deg);
   roots_to_monicPolynomial( roots, pols );
-  _Scalar M = cauchy_max_bound( pols );
-  _Scalar m = cauchy_min_bound( pols );
-  _Scalar Max = roots.array().abs().maxCoeff();
-  _Scalar min = roots.array().abs().minCoeff();
+  Scalar_ M = cauchy_max_bound( pols );
+  Scalar_ m = cauchy_min_bound( pols );
+  Scalar_ Max = roots.array().abs().maxCoeff();
+  Scalar_ min = roots.array().abs().minCoeff();
   bool eval = (M >= Max) && (m <= min);
   if( !eval )
   {
@@ -87,17 +87,17 @@ void CauchyBounds(int deg)
   VERIFY( eval );
 }
 
-template<typename _Scalar> void CauchyBounds_scalar()
+template<typename Scalar_> void CauchyBounds_scalar()
 {
-  CALL_SUBTEST_2( (CauchyBounds<_Scalar,2>(2)) );
-  CALL_SUBTEST_3( (CauchyBounds<_Scalar,3>(3)) );
-  CALL_SUBTEST_4( (CauchyBounds<_Scalar,4>(4)) );
-  CALL_SUBTEST_5( (CauchyBounds<_Scalar,5>(5)) );
-  CALL_SUBTEST_6( (CauchyBounds<_Scalar,6>(6)) );
-  CALL_SUBTEST_7( (CauchyBounds<_Scalar,7>(7)) );
-  CALL_SUBTEST_8( (CauchyBounds<_Scalar,17>(17)) );
+  CALL_SUBTEST_2( (CauchyBounds<Scalar_,2>(2)) );
+  CALL_SUBTEST_3( (CauchyBounds<Scalar_,3>(3)) );
+  CALL_SUBTEST_4( (CauchyBounds<Scalar_,4>(4)) );
+  CALL_SUBTEST_5( (CauchyBounds<Scalar_,5>(5)) );
+  CALL_SUBTEST_6( (CauchyBounds<Scalar_,6>(6)) );
+  CALL_SUBTEST_7( (CauchyBounds<Scalar_,7>(7)) );
+  CALL_SUBTEST_8( (CauchyBounds<Scalar_,17>(17)) );
 
-  CALL_SUBTEST_9( (CauchyBounds<_Scalar,Dynamic>(
+  CALL_SUBTEST_9( (CauchyBounds<Scalar_,Dynamic>(
           internal::random<int>(18,26) )) );
 }
 
diff --git a/libs/eigen/unsupported/test/sparse_extra.cpp b/libs/eigen/unsupported/test/sparse_extra.cpp
index 602c2cb..4a1f938 100644
--- a/libs/eigen/unsupported/test/sparse_extra.cpp
+++ b/libs/eigen/unsupported/test/sparse_extra.cpp
@@ -7,32 +7,8 @@
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
-
-// import basic and product tests for deprecated DynamicSparseMatrix
-#if 0 // sparse_basic(DynamicSparseMatrix) does not compile at all -> disabled
-static long g_realloc_count = 0;
-#define EIGEN_SPARSE_COMPRESSED_STORAGE_REALLOCATE_PLUGIN g_realloc_count++;
-
-static long g_dense_op_sparse_count = 0;
-#define EIGEN_SPARSE_ASSIGNMENT_FROM_DENSE_OP_SPARSE_PLUGIN g_dense_op_sparse_count++;
-#define EIGEN_SPARSE_ASSIGNMENT_FROM_SPARSE_ADD_DENSE_PLUGIN g_dense_op_sparse_count+=10;
-#define EIGEN_SPARSE_ASSIGNMENT_FROM_SPARSE_SUB_DENSE_PLUGIN g_dense_op_sparse_count+=20;
-
-#define EIGEN_SPARSE_TEST_INCLUDED_FROM_SPARSE_EXTRA 1
-#endif
-
-#define EIGEN_NO_DEPRECATED_WARNING
-// Disable counting of temporaries, since sparse_product(DynamicSparseMatrix)
-// has an extra copy-assignment.
-#define EIGEN_SPARSE_PRODUCT_IGNORE_TEMPORARY_COUNT
 #include "sparse_product.cpp"
 
-#if 0 // sparse_basic(DynamicSparseMatrix) does not compile at all -> disabled
-#include "sparse_basic.cpp"
-#endif
-
-#if EIGEN_HAS_CXX11
-
 #ifdef min
 #undef min
 #endif
@@ -41,12 +17,6 @@ static long g_dense_op_sparse_count = 0;
 #undef max
 #endif
 
-#include <unordered_map>
-#define EIGEN_UNORDERED_MAP_SUPPORT
-
-#endif
-
-
 #include <Eigen/SparseExtra>
 
 template<typename SetterType,typename DenseType, typename Scalar, int Options>
@@ -67,21 +37,6 @@ bool test_random_setter(SparseMatrix<Scalar,Options>& sm, const DenseType& ref,
   return sm.isApprox(ref);
 }
 
-template<typename SetterType,typename DenseType, typename T>
-bool test_random_setter(DynamicSparseMatrix<T>& sm, const DenseType& ref, const std::vector<Vector2i>& nonzeroCoords)
-{
-  sm.setZero();
-  std::vector<Vector2i> remaining = nonzeroCoords;
-  while(!remaining.empty())
-  {
-    int i = internal::random<int>(0,static_cast<int>(remaining.size())-1);
-    sm.coeffRef(remaining[i].x(),remaining[i].y()) = ref.coeff(remaining[i].x(),remaining[i].y());
-    remaining[i] = remaining.back();
-    remaining.pop_back();
-  }
-  return sm.isApprox(ref);
-}
-
 template<typename SparseMatrixType> void sparse_extra(const SparseMatrixType& ref)
 {
   const Index rows = ref.rows();
@@ -136,9 +91,7 @@ template<typename SparseMatrixType> void sparse_extra(const SparseMatrixType& re
 //   VERIFY_IS_APPROX(m, refMat);
 
     VERIFY(( test_random_setter<RandomSetter<SparseMatrixType, StdMapTraits> >(m,refMat,nonzeroCoords) ));
-    #ifdef EIGEN_UNORDERED_MAP_SUPPORT
     VERIFY(( test_random_setter<RandomSetter<SparseMatrixType, StdUnorderedMapTraits> >(m,refMat,nonzeroCoords) ));
-    #endif
     #ifdef EIGEN_GOOGLEHASH_SUPPORT
     VERIFY(( test_random_setter<RandomSetter<SparseMatrixType, GoogleDenseHashMapTraits> >(m,refMat,nonzeroCoords) ));
     VERIFY(( test_random_setter<RandomSetter<SparseMatrixType, GoogleSparseHashMapTraits> >(m,refMat,nonzeroCoords) ));
@@ -187,6 +140,72 @@ void check_marketio_vector()
   VERIFY_IS_EQUAL(v1,v2);
 }
 
+template<typename DenseMatrixType>
+void check_marketio_dense()
+{
+  Index rows=DenseMatrixType::MaxRowsAtCompileTime;
+  if (DenseMatrixType::MaxRowsAtCompileTime==Dynamic){
+    rows=internal::random<Index>(1,100);
+  }else if(DenseMatrixType::RowsAtCompileTime==Dynamic){
+    rows=internal::random<Index>(1,DenseMatrixType::MaxRowsAtCompileTime);
+  }
+
+  Index cols =DenseMatrixType::MaxColsAtCompileTime; 
+  if (DenseMatrixType::MaxColsAtCompileTime==Dynamic){
+    cols=internal::random<Index>(1,100);
+  }else if(DenseMatrixType::ColsAtCompileTime==Dynamic){
+    cols=internal::random<Index>(1,DenseMatrixType::MaxColsAtCompileTime);
+  }
+
+  DenseMatrixType m1, m2;
+  m1= DenseMatrixType::Random(rows,cols);
+  saveMarketDense(m1, "dense_extra.mtx");
+  loadMarketDense(m2, "dense_extra.mtx");
+  VERIFY_IS_EQUAL(m1,m2);
+}
+
+template <typename Scalar>
+void check_sparse_inverse() {
+  typedef SparseMatrix<Scalar> MatrixType;
+ 
+  Matrix<Scalar, -1, -1> A;
+  A.resize(1000, 1000);
+  A.fill(0);
+  A.setIdentity();
+  A.col(0).array() += 1;
+  A.row(0).array() += 2;
+  A.col(2).array() += 3;
+  A.row(7).array() += 3;
+  A.col(9).array() += 3;
+  A.block(3, 4, 4, 2).array() += 9;
+  A.middleRows(10, 50).array() += 3;
+  A.middleCols(50, 50).array() += 40;
+  A.block(500, 300, 40, 20).array() += 10;
+  A.transposeInPlace();
+
+  Eigen::SparseLU<MatrixType> slu;
+  slu.compute(A.sparseView());
+  Matrix<Scalar, -1, -1> Id(A.rows(), A.cols());
+  Id.setIdentity();
+  Matrix<Scalar, -1, -1> inv = slu.solve(Id);
+
+  const MatrixType sparseInv = Eigen::SparseInverse<Scalar>().compute(A.sparseView()).inverse();
+
+  Scalar sumdiff = 0;  // Check the diff only of the non-zero elements
+  for (Eigen::Index j = 0; j < A.cols(); j++) {
+    for (typename MatrixType::InnerIterator iter(sparseInv, j); iter; ++iter) {
+      const Scalar diff = std::abs(inv(iter.row(), iter.col()) - iter.value());
+      VERIFY_IS_APPROX_OR_LESS_THAN(diff, 1e-11);
+
+      if (iter.value() != 0) {
+        sumdiff += diff;
+      }
+    }
+  }
+
+  VERIFY_IS_APPROX_OR_LESS_THAN(sumdiff, 1e-10);
+}
+
 EIGEN_DECLARE_TEST(sparse_extra)
 {
   for(int i = 0; i < g_repeat; i++) {
@@ -195,22 +214,24 @@ EIGEN_DECLARE_TEST(sparse_extra)
     CALL_SUBTEST_2( sparse_extra(SparseMatrix<std::complex<double> >(s, s)) );
     CALL_SUBTEST_1( sparse_extra(SparseMatrix<double>(s, s)) );
 
-    CALL_SUBTEST_3( sparse_extra(DynamicSparseMatrix<double>(s, s)) );
-//    CALL_SUBTEST_3(( sparse_basic(DynamicSparseMatrix<double>(s, s)) ));
-//    CALL_SUBTEST_3(( sparse_basic(DynamicSparseMatrix<double,ColMajor,long int>(s, s)) ));
-
-    CALL_SUBTEST_3( (sparse_product<DynamicSparseMatrix<float, ColMajor> >()) );
-    CALL_SUBTEST_3( (sparse_product<DynamicSparseMatrix<float, RowMajor> >()) );
-
-    CALL_SUBTEST_4( (check_marketio<SparseMatrix<float,ColMajor,int> >()) );
-    CALL_SUBTEST_4( (check_marketio<SparseMatrix<double,ColMajor,int> >()) );
-    CALL_SUBTEST_4( (check_marketio<SparseMatrix<std::complex<float>,ColMajor,int> >()) );
-    CALL_SUBTEST_4( (check_marketio<SparseMatrix<std::complex<double>,ColMajor,int> >()) );
-    CALL_SUBTEST_4( (check_marketio<SparseMatrix<float,ColMajor,long int> >()) );
-    CALL_SUBTEST_4( (check_marketio<SparseMatrix<double,ColMajor,long int> >()) );
-    CALL_SUBTEST_4( (check_marketio<SparseMatrix<std::complex<float>,ColMajor,long int> >()) );
-    CALL_SUBTEST_4( (check_marketio<SparseMatrix<std::complex<double>,ColMajor,long int> >()) );
+    CALL_SUBTEST_3( (check_marketio<SparseMatrix<float,ColMajor,int> >()) );
+    CALL_SUBTEST_3( (check_marketio<SparseMatrix<double,ColMajor,int> >()) );
+    CALL_SUBTEST_3( (check_marketio<SparseMatrix<std::complex<float>,ColMajor,int> >()) );
+    CALL_SUBTEST_3( (check_marketio<SparseMatrix<std::complex<double>,ColMajor,int> >()) );
+    CALL_SUBTEST_3( (check_marketio<SparseMatrix<float,ColMajor,long int> >()) );
+    CALL_SUBTEST_3( (check_marketio<SparseMatrix<double,ColMajor,long int> >()) );
+    CALL_SUBTEST_3( (check_marketio<SparseMatrix<std::complex<float>,ColMajor,long int> >()) );
+    CALL_SUBTEST_3( (check_marketio<SparseMatrix<std::complex<double>,ColMajor,long int> >()) );
 
+    CALL_SUBTEST_4( (check_marketio_dense<Matrix<float,Dynamic,Dynamic> >()) );
+    CALL_SUBTEST_4( (check_marketio_dense<Matrix<float,Dynamic,Dynamic,RowMajor> >()) );
+    CALL_SUBTEST_4( (check_marketio_dense<Matrix<double,Dynamic,Dynamic> >()) );
+    CALL_SUBTEST_4( (check_marketio_dense<Matrix<std::complex<float>,Dynamic,Dynamic> >()) );
+    CALL_SUBTEST_4( (check_marketio_dense<Matrix<std::complex<double>,Dynamic,Dynamic> >()) );
+    CALL_SUBTEST_4( (check_marketio_dense<Matrix<float,Dynamic,3> >()) );
+    CALL_SUBTEST_4( (check_marketio_dense<Matrix<double,3,Dynamic> >()) );
+    CALL_SUBTEST_4( (check_marketio_dense<Matrix<double,3,4> >()) );
+    CALL_SUBTEST_4( (check_marketio_dense<Matrix<double,Dynamic,Dynamic,ColMajor,5,5> >()) );
 
     CALL_SUBTEST_5( (check_marketio_vector<Matrix<float,1,Dynamic> >()) );
     CALL_SUBTEST_5( (check_marketio_vector<Matrix<double,1,Dynamic> >()) );
@@ -221,6 +242,8 @@ EIGEN_DECLARE_TEST(sparse_extra)
     CALL_SUBTEST_5( (check_marketio_vector<Matrix<std::complex<float>,Dynamic,1> >()) );
     CALL_SUBTEST_5( (check_marketio_vector<Matrix<std::complex<double>,Dynamic,1> >()) );
 
+    CALL_SUBTEST_6((check_sparse_inverse<double>()));
+
     TEST_SET_BUT_UNUSED_VARIABLE(s);
   }
 }
diff --git a/libs/eigen/unsupported/test/special_functions.cpp b/libs/eigen/unsupported/test/special_functions.cpp
index 589bb76..756f031 100644
--- a/libs/eigen/unsupported/test/special_functions.cpp
+++ b/libs/eigen/unsupported/test/special_functions.cpp
@@ -191,10 +191,10 @@ template<typename ArrayType> void array_special_functions()
 
   // Check the zeta function against scipy.special.zeta
   {
-    ArrayType x(10), q(10), res(10), ref(10);
-    x << 1.5,   4, 10.5, 10000.5,    3,      1,    0.9,  2,  3,  4;
-    q <<   2, 1.5,    3,  1.0001, -2.5, 1.2345, 1.2345, -1, -2, -3;
-    ref << 1.61237534869, 0.234848505667, 1.03086757337e-5, 0.367879440865, 0.054102025820864097, plusinf, nan, plusinf, nan, plusinf;
+    ArrayType x(11), q(11), res(11), ref(11);
+    x << 1.5,   4, 10.5, 10000.5,    3,      1,    0.9,  2,  3,  4, 2000;
+    q <<   2, 1.5,    3,  1.0001, -2.5, 1.2345, 1.2345, -1, -2, -3, 2000;
+    ref << 1.61237534869, 0.234848505667, 1.03086757337e-5, 0.367879440865, 0.054102025820864097, plusinf, nan, plusinf, nan, plusinf, 0;
     CALL_SUBTEST( verify_component_wise(ref, ref); );
     CALL_SUBTEST( res = x.zeta(q); verify_component_wise(res, ref); );
     CALL_SUBTEST( res = zeta(x,q); verify_component_wise(res, ref); );
diff --git a/libs/eigen/unsupported/test/special_packetmath.cpp b/libs/eigen/unsupported/test/special_packetmath.cpp
index 31233f1..faf10ef 100644
--- a/libs/eigen/unsupported/test/special_packetmath.cpp
+++ b/libs/eigen/unsupported/test/special_packetmath.cpp
@@ -114,7 +114,7 @@ template<typename Scalar,typename Packet> void packetmath_real()
                   Scalar(std::pow(Scalar(10), internal::random<Scalar>(Scalar(-1),Scalar(2))));
   }
 
-#if EIGEN_HAS_C99_MATH && (EIGEN_COMP_CXXVER >= 11)
+#if EIGEN_HAS_C99_MATH
   CHECK_CWISE1_IF(internal::packet_traits<Scalar>::HasLGamma, std::lgamma, internal::plgamma);
   CHECK_CWISE1_IF(internal::packet_traits<Scalar>::HasErf, std::erf, internal::perf);
   CHECK_CWISE1_IF(internal::packet_traits<Scalar>::HasErfc, std::erfc, internal::perfc);
diff --git a/libs/whisper-com b/libs/whisper-com
index 8c6bb1b..6b8d6ad 160000
--- a/libs/whisper-com
+++ b/libs/whisper-com
@@ -1 +1 @@
-Subproject commit 8c6bb1bb0d58330a68530882f5c4077e7f674e1c
+Subproject commit 6b8d6ad2655db00509bf692dd07e1254858c81fc