ADD: new track message, Entity class and Position class

This commit is contained in:
Henry Winkel
2022-12-20 17:20:35 +01:00
parent 469ecfb099
commit 98ebb563a8
2114 changed files with 482360 additions and 24 deletions

View File

@@ -0,0 +1,222 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2009 Ilya Baran <ibaran@mit.edu>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
#include <Eigen/StdVector>
#include <Eigen/Geometry>
#include <unsupported/Eigen/BVH>
namespace Eigen {
template<typename Scalar, int Dim> AlignedBox<Scalar, Dim> bounding_box(const Matrix<Scalar, Dim, 1> &v) { return AlignedBox<Scalar, Dim>(v); }
}
template<int Dim>
struct Ball
{
EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(double, Dim)
typedef Matrix<double, Dim, 1> VectorType;
Ball() {}
Ball(const VectorType &c, double r) : center(c), radius(r) {}
VectorType center;
double radius;
};
template<int Dim> AlignedBox<double, Dim> bounding_box(const Ball<Dim> &b)
{ return AlignedBox<double, Dim>(b.center.array() - b.radius, b.center.array() + b.radius); }
inline double SQR(double x) { return x * x; }
template<int Dim>
struct BallPointStuff //this class provides functions to be both an intersector and a minimizer, both for a ball and a point and for two trees
{
typedef double Scalar;
typedef Matrix<double, Dim, 1> VectorType;
typedef Ball<Dim> BallType;
typedef AlignedBox<double, Dim> BoxType;
BallPointStuff() : calls(0), count(0) {}
BallPointStuff(const VectorType &inP) : p(inP), calls(0), count(0) {}
bool intersectVolume(const BoxType &r) { ++calls; return r.contains(p); }
bool intersectObject(const BallType &b) {
++calls;
if((b.center - p).squaredNorm() < SQR(b.radius))
++count;
return false; //continue
}
bool intersectVolumeVolume(const BoxType &r1, const BoxType &r2) { ++calls; return !(r1.intersection(r2)).isNull(); }
bool intersectVolumeObject(const BoxType &r, const BallType &b) { ++calls; return r.squaredExteriorDistance(b.center) < SQR(b.radius); }
bool intersectObjectVolume(const BallType &b, const BoxType &r) { ++calls; return r.squaredExteriorDistance(b.center) < SQR(b.radius); }
bool intersectObjectObject(const BallType &b1, const BallType &b2){
++calls;
if((b1.center - b2.center).norm() < b1.radius + b2.radius)
++count;
return false;
}
bool intersectVolumeObject(const BoxType &r, const VectorType &v) { ++calls; return r.contains(v); }
bool intersectObjectObject(const BallType &b, const VectorType &v){
++calls;
if((b.center - v).squaredNorm() < SQR(b.radius))
++count;
return false;
}
double minimumOnVolume(const BoxType &r) { ++calls; return r.squaredExteriorDistance(p); }
double minimumOnObject(const BallType &b) { ++calls; return (std::max)(0., (b.center - p).squaredNorm() - SQR(b.radius)); }
double minimumOnVolumeVolume(const BoxType &r1, const BoxType &r2) { ++calls; return r1.squaredExteriorDistance(r2); }
double minimumOnVolumeObject(const BoxType &r, const BallType &b) { ++calls; return SQR((std::max)(0., r.exteriorDistance(b.center) - b.radius)); }
double minimumOnObjectVolume(const BallType &b, const BoxType &r) { ++calls; return SQR((std::max)(0., r.exteriorDistance(b.center) - b.radius)); }
double minimumOnObjectObject(const BallType &b1, const BallType &b2){ ++calls; return SQR((std::max)(0., (b1.center - b2.center).norm() - b1.radius - b2.radius)); }
double minimumOnVolumeObject(const BoxType &r, const VectorType &v) { ++calls; return r.squaredExteriorDistance(v); }
double minimumOnObjectObject(const BallType &b, const VectorType &v){ ++calls; return SQR((std::max)(0., (b.center - v).norm() - b.radius)); }
VectorType p;
int calls;
int count;
};
template<int Dim>
struct TreeTest
{
typedef Matrix<double, Dim, 1> VectorType;
typedef std::vector<VectorType, aligned_allocator<VectorType> > VectorTypeList;
typedef Ball<Dim> BallType;
typedef std::vector<BallType, aligned_allocator<BallType> > BallTypeList;
typedef AlignedBox<double, Dim> BoxType;
void testIntersect1()
{
BallTypeList b;
for(int i = 0; i < 500; ++i) {
b.push_back(BallType(VectorType::Random(), 0.5 * internal::random(0., 1.)));
}
KdBVH<double, Dim, BallType> tree(b.begin(), b.end());
VectorType pt = VectorType::Random();
BallPointStuff<Dim> i1(pt), i2(pt);
for(int i = 0; i < (int)b.size(); ++i)
i1.intersectObject(b[i]);
BVIntersect(tree, i2);
VERIFY(i1.count == i2.count);
}
void testMinimize1()
{
BallTypeList b;
for(int i = 0; i < 500; ++i) {
b.push_back(BallType(VectorType::Random(), 0.01 * internal::random(0., 1.)));
}
KdBVH<double, Dim, BallType> tree(b.begin(), b.end());
VectorType pt = VectorType::Random();
BallPointStuff<Dim> i1(pt), i2(pt);
double m1 = (std::numeric_limits<double>::max)(), m2 = m1;
for(int i = 0; i < (int)b.size(); ++i)
m1 = (std::min)(m1, i1.minimumOnObject(b[i]));
m2 = BVMinimize(tree, i2);
VERIFY_IS_APPROX(m1, m2);
}
void testIntersect2()
{
BallTypeList b;
VectorTypeList v;
for(int i = 0; i < 50; ++i) {
b.push_back(BallType(VectorType::Random(), 0.5 * internal::random(0., 1.)));
for(int j = 0; j < 3; ++j)
v.push_back(VectorType::Random());
}
KdBVH<double, Dim, BallType> tree(b.begin(), b.end());
KdBVH<double, Dim, VectorType> vTree(v.begin(), v.end());
BallPointStuff<Dim> i1, i2;
for(int i = 0; i < (int)b.size(); ++i)
for(int j = 0; j < (int)v.size(); ++j)
i1.intersectObjectObject(b[i], v[j]);
BVIntersect(tree, vTree, i2);
VERIFY(i1.count == i2.count);
}
void testMinimize2()
{
BallTypeList b;
VectorTypeList v;
for(int i = 0; i < 50; ++i) {
b.push_back(BallType(VectorType::Random(), 1e-7 + 1e-6 * internal::random(0., 1.)));
for(int j = 0; j < 3; ++j)
v.push_back(VectorType::Random());
}
KdBVH<double, Dim, BallType> tree(b.begin(), b.end());
KdBVH<double, Dim, VectorType> vTree(v.begin(), v.end());
BallPointStuff<Dim> i1, i2;
double m1 = (std::numeric_limits<double>::max)(), m2 = m1;
for(int i = 0; i < (int)b.size(); ++i)
for(int j = 0; j < (int)v.size(); ++j)
m1 = (std::min)(m1, i1.minimumOnObjectObject(b[i], v[j]));
m2 = BVMinimize(tree, vTree, i2);
VERIFY_IS_APPROX(m1, m2);
}
};
EIGEN_DECLARE_TEST(BVH)
{
for(int i = 0; i < g_repeat; i++) {
#ifdef EIGEN_TEST_PART_1
TreeTest<2> test2;
CALL_SUBTEST(test2.testIntersect1());
CALL_SUBTEST(test2.testMinimize1());
CALL_SUBTEST(test2.testIntersect2());
CALL_SUBTEST(test2.testMinimize2());
#endif
#ifdef EIGEN_TEST_PART_2
TreeTest<3> test3;
CALL_SUBTEST(test3.testIntersect1());
CALL_SUBTEST(test3.testMinimize1());
CALL_SUBTEST(test3.testIntersect2());
CALL_SUBTEST(test3.testMinimize2());
#endif
#ifdef EIGEN_TEST_PART_3
TreeTest<4> test4;
CALL_SUBTEST(test4.testIntersect1());
CALL_SUBTEST(test4.testMinimize1());
CALL_SUBTEST(test4.testIntersect2());
CALL_SUBTEST(test4.testMinimize2());
#endif
}
}

View File

@@ -0,0 +1,417 @@
# The file split_test_helper.h was generated at first run,
# it is now included in test/
if(EXISTS ${CMAKE_CURRENT_BINARY_DIR}/split_test_helper.h)
file(REMOVE ${CMAKE_CURRENT_BINARY_DIR}/split_test_helper.h)
endif()
set_property(GLOBAL PROPERTY EIGEN_CURRENT_SUBPROJECT "Unsupported")
add_custom_target(BuildUnsupported)
include_directories(../../test ../../unsupported ../../Eigen
${CMAKE_CURRENT_BINARY_DIR}/../../test)
find_package (Threads)
find_package(GoogleHash)
if(GoogleHash_FOUND)
add_definitions("-DEIGEN_GOOGLEHASH_SUPPORT")
include_directories(${GOOGLEHASH_INCLUDES})
ei_add_property(EIGEN_TESTED_BACKENDS "GoogleHash, ")
else()
ei_add_property(EIGEN_MISSING_BACKENDS "GoogleHash, ")
endif()
find_package(Adolc)
if(Adolc_FOUND)
include_directories(${ADOLC_INCLUDES})
ei_add_property(EIGEN_TESTED_BACKENDS "Adolc, ")
if(EIGEN_TEST_CXX11)
ei_add_test(forward_adolc "" ${ADOLC_LIBRARIES})
else()
message(STATUS "Adolc found, but tests require C++11 mode")
endif()
else()
ei_add_property(EIGEN_MISSING_BACKENDS "Adolc, ")
endif()
# this test seems to never have been successful on x87, so is considered to contain a FP-related bug.
# see thread: "non-linear optimization test summary"
ei_add_test(NonLinearOptimization)
ei_add_test(NumericalDiff)
ei_add_test(autodiff_scalar)
ei_add_test(autodiff)
ei_add_test(BVH)
ei_add_test(matrix_exponential)
ei_add_test(matrix_function)
ei_add_test(matrix_power)
ei_add_test(matrix_square_root)
ei_add_test(alignedvector3)
ei_add_test(FFT)
ei_add_test(EulerAngles)
find_package(MPREAL)
if(MPREAL_FOUND AND EIGEN_COMPILER_SUPPORT_CPP11)
ei_add_property(EIGEN_TESTED_BACKENDS "MPFR C++, ")
include_directories(${MPREAL_INCLUDES})
ei_add_test(mpreal_support "-std=c++11" "${MPREAL_LIBRARIES}" )
else()
ei_add_property(EIGEN_MISSING_BACKENDS "MPFR C++, ")
endif()
ei_add_test(sparse_extra "" "")
find_package(FFTW)
if(FFTW_FOUND)
ei_add_property(EIGEN_TESTED_BACKENDS "fftw, ")
include_directories( ${FFTW_INCLUDES} )
if(FFTWL_LIB)
ei_add_test(FFTW "-DEIGEN_FFTW_DEFAULT -DEIGEN_HAS_FFTWL" "${FFTW_LIBRARIES}" )
else()
ei_add_test(FFTW "-DEIGEN_FFTW_DEFAULT" "${FFTW_LIBRARIES}" )
endif()
else()
ei_add_property(EIGEN_MISSING_BACKENDS "fftw, ")
endif()
option(EIGEN_TEST_OPENGL "Enable OpenGL support in unit tests" OFF)
if(EIGEN_TEST_OPENGL)
find_package(OpenGL)
find_package(GLUT)
find_package(GLEW)
if(OPENGL_FOUND AND GLUT_FOUND AND GLEW_FOUND)
include_directories(${OPENGL_INCLUDE_DIR} ${GLUT_INCLUDE_DIR} ${GLEW_INCLUDE_DIRS})
ei_add_property(EIGEN_TESTED_BACKENDS "OpenGL, ")
set(EIGEN_GL_LIB ${GLUT_LIBRARIES} ${GLEW_LIBRARIES} ${OPENGL_LIBRARIES})
ei_add_test(openglsupport "" "${EIGEN_GL_LIB}" )
else()
ei_add_property(EIGEN_MISSING_BACKENDS "OpenGL, ")
endif()
else()
ei_add_property(EIGEN_MISSING_BACKENDS "OpenGL, ")
endif()
ei_add_test(polynomialsolver)
ei_add_test(polynomialutils)
ei_add_test(splines)
ei_add_test(gmres)
ei_add_test(dgmres)
ei_add_test(minres)
ei_add_test(idrs)
ei_add_test(levenberg_marquardt)
ei_add_test(kronecker_product)
ei_add_test(bessel_functions)
ei_add_test(special_functions)
ei_add_test(special_packetmath "-DEIGEN_FAST_MATH=1")
if(EIGEN_TEST_CXX11)
if(EIGEN_TEST_SYCL)
set(EIGEN_SYCL ON)
# Forward CMake options as preprocessor definitions
if(EIGEN_SYCL_USE_DEFAULT_SELECTOR)
add_definitions(-DEIGEN_SYCL_USE_DEFAULT_SELECTOR=${EIGEN_SYCL_USE_DEFAULT_SELECTOR})
endif()
if(EIGEN_SYCL_NO_LOCAL_MEM)
add_definitions(-DEIGEN_SYCL_NO_LOCAL_MEM=${EIGEN_SYCL_NO_LOCAL_MEM})
endif()
if(EIGEN_SYCL_LOCAL_MEM)
add_definitions(-DEIGEN_SYCL_LOCAL_MEM=${EIGEN_SYCL_LOCAL_MEM})
endif()
if(EIGEN_SYCL_MAX_GLOBAL_RANGE)
add_definitions(-DEIGEN_SYCL_MAX_GLOBAL_RANGE=${EIGEN_SYCL_MAX_GLOBAL_RANGE})
endif()
if(EIGEN_SYCL_LOCAL_THREAD_DIM0)
add_definitions(-DEIGEN_SYCL_LOCAL_THREAD_DIM0=${EIGEN_SYCL_LOCAL_THREAD_DIM0})
endif()
if(EIGEN_SYCL_LOCAL_THREAD_DIM1)
add_definitions(-DEIGEN_SYCL_LOCAL_THREAD_DIM1=${EIGEN_SYCL_LOCAL_THREAD_DIM1})
endif()
if(EIGEN_SYCL_REG_M)
add_definitions(-DEIGEN_SYCL_REG_M=${EIGEN_SYCL_REG_M})
endif()
if(EIGEN_SYCL_REG_N)
add_definitions(-DEIGEN_SYCL_REG_N=${EIGEN_SYCL_REG_N})
endif()
if(EIGEN_SYCL_USE_PROGRAM_CLASS)
add_definitions(-DEIGEN_SYCL_USE_PROGRAM_CLASS=${EIGEN_SYCL_USE_PROGRAM_CLASS})
endif()
if(EIGEN_SYCL_ASYNC_EXECUTION)
add_definitions(-DEIGEN_SYCL_ASYNC_EXECUTION=${EIGEN_SYCL_ASYNC_EXECUTION})
endif()
if(EIGEN_SYCL_DISABLE_SKINNY)
add_definitions(-DEIGEN_SYCL_DISABLE_SKINNY=${EIGEN_SYCL_DISABLE_SKINNY})
endif()
if(EIGEN_SYCL_DISABLE_DOUBLE_BUFFER)
add_definitions(-DEIGEN_SYCL_DISABLE_DOUBLE_BUFFER=${EIGEN_SYCL_DISABLE_DOUBLE_BUFFER})
endif()
if(EIGEN_SYCL_DISABLE_RANK1)
add_definitions(-DEIGEN_SYCL_DISABLE_RANK1=${EIGEN_SYCL_DISABLE_RANK1})
endif()
if(EIGEN_SYCL_DISABLE_SCALAR)
add_definitions(-DEIGEN_SYCL_DISABLE_SCALAR=${EIGEN_SYCL_DISABLE_SCALAR})
endif()
if(EIGEN_SYCL_DISABLE_GEMV)
add_definitions(-DEIGEN_SYCL_DISABLE_GEMV=${EIGEN_SYCL_DISABLE_GEMV})
endif()
if(EIGEN_SYCL_DISABLE_ARM_GPU_CACHE_OPTIMISATION)
add_definitions(-DEIGEN_SYCL_DISABLE_ARM_GPU_CACHE_OPTIMISATION=${EIGEN_SYCL_DISABLE_ARM_GPU_CACHE_OPTIMISATION})
endif()
if(EIGEN_SYCL_TRISYCL)
# triSYCL now requires c++17.
set(CMAKE_CXX_STANDARD 17)
else()
if(MSVC)
# Set the host and device compilers C++ standard to C++14. On Windows setting this to C++11
# can cause issues with the ComputeCpp device compiler parsing Visual Studio Headers.
set(CMAKE_CXX_STANDARD 14)
list(APPEND COMPUTECPP_USER_FLAGS -DWIN32)
else()
set(CMAKE_CXX_STANDARD 11)
list(APPEND COMPUTECPP_USER_FLAGS -Wall)
endif()
# The following flags are not supported by Clang and can cause warnings
# if used with -Werror so they are removed here.
if(COMPUTECPP_USE_COMPILER_DRIVER)
set(CMAKE_CXX_COMPILER ${ComputeCpp_DEVICE_COMPILER_EXECUTABLE})
string(REPLACE "-Wlogical-op" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
string(REPLACE "-Wno-psabi" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
string(REPLACE "-ansi" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
endif()
list(APPEND COMPUTECPP_USER_FLAGS
-DEIGEN_NO_ASSERTION_CHECKING=1
-no-serial-memop
-Xclang
-cl-mad-enable)
endif()
ei_add_test(cxx11_tensor_sycl ${STD_CXX_FLAG})
ei_add_test(cxx11_tensor_image_op_sycl ${STD_CXX_FLAG})
ei_add_test(cxx11_tensor_math_sycl ${STD_CXX_FLAG})
ei_add_test(cxx11_tensor_forced_eval_sycl ${STD_CXX_FLAG})
ei_add_test(cxx11_tensor_broadcast_sycl ${STD_CXX_FLAG})
ei_add_test(cxx11_tensor_device_sycl ${STD_CXX_FLAG})
ei_add_test(cxx11_tensor_reduction_sycl ${STD_CXX_FLAG})
ei_add_test(cxx11_tensor_morphing_sycl ${STD_CXX_FLAG})
ei_add_test(cxx11_tensor_shuffling_sycl ${STD_CXX_FLAG})
ei_add_test(cxx11_tensor_padding_sycl ${STD_CXX_FLAG})
ei_add_test(cxx11_tensor_builtins_sycl ${STD_CXX_FLAG})
ei_add_test(cxx11_tensor_contract_sycl ${STD_CXX_FLAG})
ei_add_test(cxx11_tensor_concatenation_sycl ${STD_CXX_FLAG})
ei_add_test(cxx11_tensor_reverse_sycl ${STD_CXX_FLAG})
ei_add_test(cxx11_tensor_convolution_sycl ${STD_CXX_FLAG})
ei_add_test(cxx11_tensor_striding_sycl ${STD_CXX_FLAG})
ei_add_test(cxx11_tensor_chipping_sycl ${STD_CXX_FLAG})
ei_add_test(cxx11_tensor_layout_swap_sycl ${STD_CXX_FLAG})
ei_add_test(cxx11_tensor_inflation_sycl ${STD_CXX_FLAG})
ei_add_test(cxx11_tensor_random_sycl ${STD_CXX_FLAG})
ei_add_test(cxx11_tensor_generator_sycl ${STD_CXX_FLAG})
ei_add_test(cxx11_tensor_patch_sycl ${STD_CXX_FLAG})
ei_add_test(cxx11_tensor_image_patch_sycl ${STD_CXX_FLAG})
ei_add_test(cxx11_tensor_volume_patch_sycl ${STD_CXX_FLAG})
ei_add_test(cxx11_tensor_argmax_sycl ${STD_CXX_FLAG})
ei_add_test(cxx11_tensor_custom_op_sycl ${STD_CXX_FLAG})
ei_add_test(cxx11_tensor_scan_sycl ${STD_CXX_FLAG})
set(EIGEN_SYCL OFF)
endif()
ei_add_test(cxx11_eventcount "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
ei_add_test(cxx11_runqueue "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
ei_add_test(cxx11_non_blocking_thread_pool "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
ei_add_test(cxx11_meta)
ei_add_test(cxx11_maxsizevector)
ei_add_test(cxx11_tensor_argmax)
ei_add_test(cxx11_tensor_assign)
ei_add_test(cxx11_tensor_block_access)
ei_add_test(cxx11_tensor_block_eval)
ei_add_test(cxx11_tensor_block_io)
ei_add_test(cxx11_tensor_broadcasting)
ei_add_test(cxx11_tensor_casts)
ei_add_test(cxx11_tensor_chipping)
ei_add_test(cxx11_tensor_comparisons)
ei_add_test(cxx11_tensor_concatenation)
ei_add_test(cxx11_tensor_const)
ei_add_test(cxx11_tensor_contraction)
ei_add_test(cxx11_tensor_convolution)
ei_add_test(cxx11_tensor_custom_index)
ei_add_test(cxx11_tensor_custom_op)
ei_add_test(cxx11_tensor_dimension)
ei_add_test(cxx11_tensor_empty)
ei_add_test(cxx11_tensor_executor "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
ei_add_test(cxx11_tensor_expr)
ei_add_test(cxx11_tensor_fft)
ei_add_test(cxx11_tensor_fixed_size)
ei_add_test(cxx11_tensor_forced_eval)
ei_add_test(cxx11_tensor_generator)
ei_add_test(cxx11_tensor_ifft)
ei_add_test(cxx11_tensor_image_patch)
ei_add_test(cxx11_tensor_index_list)
ei_add_test(cxx11_tensor_inflation)
ei_add_test(cxx11_tensor_intdiv)
ei_add_test(cxx11_tensor_io)
ei_add_test(cxx11_tensor_layout_swap)
ei_add_test(cxx11_tensor_lvalue)
ei_add_test(cxx11_tensor_map)
ei_add_test(cxx11_tensor_math)
ei_add_test(cxx11_tensor_mixed_indices)
ei_add_test(cxx11_tensor_morphing)
ei_add_test(cxx11_tensor_move)
ei_add_test(cxx11_tensor_notification "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
ei_add_test(cxx11_tensor_of_complex)
ei_add_test(cxx11_tensor_of_const_values)
ei_add_test(cxx11_tensor_of_strings)
ei_add_test(cxx11_tensor_padding)
ei_add_test(cxx11_tensor_patch)
ei_add_test(cxx11_tensor_random)
ei_add_test(cxx11_tensor_reduction)
ei_add_test(cxx11_tensor_ref)
ei_add_test(cxx11_tensor_roundings)
ei_add_test(cxx11_tensor_scan)
ei_add_test(cxx11_tensor_shuffling)
ei_add_test(cxx11_tensor_simple)
ei_add_test(cxx11_tensor_striding)
ei_add_test(cxx11_tensor_sugar)
ei_add_test(cxx11_tensor_thread_local "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
ei_add_test(cxx11_tensor_thread_pool "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
ei_add_test(cxx11_tensor_trace)
ei_add_test(cxx11_tensor_volume_patch)
# ei_add_test(cxx11_tensor_symmetry)
if("${CMAKE_SIZEOF_VOID_P}" EQUAL "8" AND NOT CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
# This test requires __uint128_t which is only available on 64bit systems
ei_add_test(cxx11_tensor_uint128)
endif()
endif()
# These tests needs nvcc
find_package(CUDA 7.0)
if(CUDA_FOUND AND EIGEN_TEST_CUDA)
# Make sure to compile without the -pedantic, -Wundef, -Wnon-virtual-dtor
# and -fno-check-new flags since they trigger thousands of compilation warnings
# in the CUDA runtime
# Also remove -ansi that is incompatible with std=c++11.
string(REPLACE "-pedantic" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
string(REPLACE "-Wundef" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
string(REPLACE "-Wnon-virtual-dtor" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
string(REPLACE "-fno-check-new" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
string(REPLACE "-ansi" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
message(STATUS "Flags used to compile cuda code: " ${CMAKE_CXX_FLAGS})
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
set(CUDA_NVCC_FLAGS "-ccbin ${CMAKE_C_COMPILER}" CACHE STRING "nvcc flags" FORCE)
endif()
if(EIGEN_TEST_CUDA_CLANG)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
string(APPEND CMAKE_CXX_FLAGS " --cuda-path=${CUDA_TOOLKIT_ROOT_DIR}")
foreach(ARCH IN LISTS EIGEN_CUDA_COMPUTE_ARCH)
string(APPEND CMAKE_CXX_FLAGS " --cuda-gpu-arch=sm_${ARCH}")
endforeach()
endif()
set(EIGEN_CUDA_RELAXED_CONSTEXPR "--expt-relaxed-constexpr")
if (${CUDA_VERSION} STREQUAL "7.0")
set(EIGEN_CUDA_RELAXED_CONSTEXPR "--relaxed-constexpr")
endif()
set(NVCC_ARCH_FLAGS)
foreach(ARCH IN LISTS EIGEN_CUDA_COMPUTE_ARCH)
string(APPEND NVCC_ARCH_FLAGS " -gencode arch=compute_${ARCH},code=sm_${ARCH}")
endforeach()
set(CUDA_NVCC_FLAGS "${EIGEN_CUDA_RELAXED_CONSTEXPR} -Xcudafe \"--display_error_number\" ${NVCC_ARCH_FLAGS} ${CUDA_NVCC_FLAGS}")
cuda_include_directories("${CMAKE_CURRENT_BINARY_DIR}" "${CUDA_TOOLKIT_ROOT_DIR}/include")
set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu")
ei_add_test(cxx11_tensor_complex_gpu)
ei_add_test(cxx11_tensor_complex_cwise_ops_gpu)
ei_add_test(cxx11_tensor_reduction_gpu)
ei_add_test(cxx11_tensor_argmax_gpu)
ei_add_test(cxx11_tensor_cast_float16_gpu)
ei_add_test(cxx11_tensor_scan_gpu)
set(EIGEN_CUDA_OLDEST_COMPUTE_ARCH 9999)
foreach(ARCH IN LISTS EIGEN_CUDA_COMPUTE_ARCH)
if(${ARCH} LESS ${EIGEN_CUDA_OLDEST_COMPUTE_ARCH})
set(EIGEN_CUDA_OLDEST_COMPUTE_ARCH ${ARCH})
endif()
endforeach()
# Contractions require arch 3.0 or higher
if (${EIGEN_CUDA_OLDEST_COMPUTE_ARCH} GREATER 29)
ei_add_test(cxx11_tensor_device)
ei_add_test(cxx11_tensor_gpu)
ei_add_test(cxx11_tensor_contract_gpu)
ei_add_test(cxx11_tensor_of_float16_gpu)
endif()
# The random number generation code requires arch 3.5 or greater.
if (${EIGEN_CUDA_OLDEST_COMPUTE_ARCH} GREATER 34)
ei_add_test(cxx11_tensor_random_gpu)
endif()
unset(EIGEN_ADD_TEST_FILENAME_EXTENSION)
endif()
# Add HIP specific tests
if (EIGEN_TEST_HIP)
set(HIP_PATH "/opt/rocm/hip" CACHE STRING "Path to the HIP installation.")
if (EXISTS ${HIP_PATH})
list(APPEND CMAKE_MODULE_PATH ${HIP_PATH}/cmake)
find_package(HIP REQUIRED)
if (HIP_FOUND)
execute_process(COMMAND ${HIP_PATH}/bin/hipconfig --platform OUTPUT_VARIABLE HIP_PLATFORM)
if ((${HIP_PLATFORM} STREQUAL "hcc") OR (${HIP_PLATFORM} STREQUAL "amd"))
include_directories(${CMAKE_CURRENT_BINARY_DIR})
include_directories(${HIP_PATH}/include)
set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu")
#
# complex datatype is not yet supported by HIP
# so leaving out those tests for now
#
# ei_add_test(cxx11_tensor_complex_gpu)
# ei_add_test(cxx11_tensor_complex_cwise_ops_gpu)
#
ei_add_test(cxx11_tensor_reduction_gpu)
ei_add_test(cxx11_tensor_argmax_gpu)
ei_add_test(cxx11_tensor_cast_float16_gpu)
ei_add_test(cxx11_tensor_scan_gpu)
ei_add_test(cxx11_tensor_device)
ei_add_test(cxx11_tensor_gpu)
ei_add_test(cxx11_tensor_contract_gpu)
ei_add_test(cxx11_tensor_of_float16_gpu)
ei_add_test(cxx11_tensor_random_gpu)
unset(EIGEN_ADD_TEST_FILENAME_EXTENSION)
elseif ((${HIP_PLATFORM} STREQUAL "nvcc") OR (${HIP_PLATFORM} STREQUAL "nvidia"))
message(FATAL_ERROR "HIP_PLATFORM = nvcc is not supported within Eigen")
else ()
message(FATAL_ERROR "Unknown HIP_PLATFORM = ${HIP_PLATFORM}")
endif()
endif()
else ()
message(FATAL_ERROR "EIGEN_TEST_HIP is ON, but the specified HIP_PATH (${HIP_PATH}) does not exist")
endif()
endif()

View File

@@ -0,0 +1,296 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2015 Tal Hadad <tal_hd@hotmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
#include <unsupported/Eigen/EulerAngles>
using namespace Eigen;
// Unfortunately, we need to specialize it in order to work. (We could add it in main.h test framework)
template <typename Scalar, class System>
bool verifyIsApprox(const Eigen::EulerAngles<Scalar, System>& a, const Eigen::EulerAngles<Scalar, System>& b)
{
return verifyIsApprox(a.angles(), b.angles());
}
// Verify that x is in the approxed range [a, b]
#define VERIFY_APPROXED_RANGE(a, x, b) \
do { \
VERIFY_IS_APPROX_OR_LESS_THAN(a, x); \
VERIFY_IS_APPROX_OR_LESS_THAN(x, b); \
} while(0)
const char X = EULER_X;
const char Y = EULER_Y;
const char Z = EULER_Z;
template<typename Scalar, class EulerSystem>
void verify_euler(const EulerAngles<Scalar, EulerSystem>& e)
{
typedef EulerAngles<Scalar, EulerSystem> EulerAnglesType;
typedef Matrix<Scalar,3,3> Matrix3;
typedef Matrix<Scalar,3,1> Vector3;
typedef Quaternion<Scalar> QuaternionType;
typedef AngleAxis<Scalar> AngleAxisType;
const Scalar ONE = Scalar(1);
const Scalar HALF_PI = Scalar(EIGEN_PI / 2);
const Scalar PI = Scalar(EIGEN_PI);
// It's very important calc the acceptable precision depending on the distance from the pole.
const Scalar longitudeRadius = std::abs(
EulerSystem::IsTaitBryan ?
std::cos(e.beta()) :
std::sin(e.beta())
);
Scalar precision = test_precision<Scalar>() / longitudeRadius;
Scalar betaRangeStart, betaRangeEnd;
if (EulerSystem::IsTaitBryan)
{
betaRangeStart = -HALF_PI;
betaRangeEnd = HALF_PI;
}
else
{
if (!EulerSystem::IsBetaOpposite)
{
betaRangeStart = 0;
betaRangeEnd = PI;
}
else
{
betaRangeStart = -PI;
betaRangeEnd = 0;
}
}
const Vector3 I_ = EulerAnglesType::AlphaAxisVector();
const Vector3 J_ = EulerAnglesType::BetaAxisVector();
const Vector3 K_ = EulerAnglesType::GammaAxisVector();
// Is approx checks
VERIFY(e.isApprox(e));
VERIFY_IS_APPROX(e, e);
VERIFY_IS_NOT_APPROX(e, EulerAnglesType(e.alpha() + ONE, e.beta() + ONE, e.gamma() + ONE));
const Matrix3 m(e);
VERIFY_IS_APPROX(Scalar(m.determinant()), ONE);
EulerAnglesType ebis(m);
// When no roll(acting like polar representation), we have the best precision.
// One of those cases is when the Euler angles are on the pole, and because it's singular case,
// the computation returns no roll.
if (ebis.beta() == 0)
precision = test_precision<Scalar>();
// Check that eabis in range
VERIFY_APPROXED_RANGE(-PI, ebis.alpha(), PI);
VERIFY_APPROXED_RANGE(betaRangeStart, ebis.beta(), betaRangeEnd);
VERIFY_APPROXED_RANGE(-PI, ebis.gamma(), PI);
const Matrix3 mbis(AngleAxisType(ebis.alpha(), I_) * AngleAxisType(ebis.beta(), J_) * AngleAxisType(ebis.gamma(), K_));
VERIFY_IS_APPROX(Scalar(mbis.determinant()), ONE);
VERIFY_IS_APPROX(mbis, ebis.toRotationMatrix());
/*std::cout << "===================\n" <<
"e: " << e << std::endl <<
"eabis: " << eabis.transpose() << std::endl <<
"m: " << m << std::endl <<
"mbis: " << mbis << std::endl <<
"X: " << (m * Vector3::UnitX()).transpose() << std::endl <<
"X: " << (mbis * Vector3::UnitX()).transpose() << std::endl;*/
VERIFY(m.isApprox(mbis, precision));
// Test if ea and eabis are the same
// Need to check both singular and non-singular cases
// There are two singular cases.
// 1. When I==K and sin(ea(1)) == 0
// 2. When I!=K and cos(ea(1)) == 0
// TODO: Make this test work well, and use range saturation function.
/*// If I==K, and ea[1]==0, then there no unique solution.
// The remark apply in the case where I!=K, and |ea[1]| is close to +-pi/2.
if( (i!=k || ea[1]!=0) && (i==k || !internal::isApprox(abs(ea[1]),Scalar(EIGEN_PI/2),test_precision<Scalar>())) )
VERIFY_IS_APPROX(ea, eabis);*/
// Quaternions
const QuaternionType q(e);
ebis = q;
const QuaternionType qbis(ebis);
VERIFY(internal::isApprox<Scalar>(std::abs(q.dot(qbis)), ONE, precision));
//VERIFY_IS_APPROX(eabis, eabis2);// Verify that the euler angles are still the same
// A suggestion for simple product test when will be supported.
/*EulerAnglesType e2(PI/2, PI/2, PI/2);
Matrix3 m2(e2);
VERIFY_IS_APPROX(e*e2, m*m2);*/
}
template<signed char A, signed char B, signed char C, typename Scalar>
void verify_euler_vec(const Matrix<Scalar,3,1>& ea)
{
verify_euler(EulerAngles<Scalar, EulerSystem<A, B, C> >(ea[0], ea[1], ea[2]));
}
template<signed char A, signed char B, signed char C, typename Scalar>
void verify_euler_all_neg(const Matrix<Scalar,3,1>& ea)
{
verify_euler_vec<+A,+B,+C>(ea);
verify_euler_vec<+A,+B,-C>(ea);
verify_euler_vec<+A,-B,+C>(ea);
verify_euler_vec<+A,-B,-C>(ea);
verify_euler_vec<-A,+B,+C>(ea);
verify_euler_vec<-A,+B,-C>(ea);
verify_euler_vec<-A,-B,+C>(ea);
verify_euler_vec<-A,-B,-C>(ea);
}
template<typename Scalar> void check_all_var(const Matrix<Scalar,3,1>& ea)
{
verify_euler_all_neg<X,Y,Z>(ea);
verify_euler_all_neg<X,Y,X>(ea);
verify_euler_all_neg<X,Z,Y>(ea);
verify_euler_all_neg<X,Z,X>(ea);
verify_euler_all_neg<Y,Z,X>(ea);
verify_euler_all_neg<Y,Z,Y>(ea);
verify_euler_all_neg<Y,X,Z>(ea);
verify_euler_all_neg<Y,X,Y>(ea);
verify_euler_all_neg<Z,X,Y>(ea);
verify_euler_all_neg<Z,X,Z>(ea);
verify_euler_all_neg<Z,Y,X>(ea);
verify_euler_all_neg<Z,Y,Z>(ea);
}
template<typename Scalar> void check_singular_cases(const Scalar& singularBeta)
{
typedef Matrix<Scalar,3,1> Vector3;
const Scalar PI = Scalar(EIGEN_PI);
for (Scalar epsilon = NumTraits<Scalar>::epsilon(); epsilon < 1; epsilon *= Scalar(1.2))
{
check_all_var(Vector3(PI/4, singularBeta, PI/3));
check_all_var(Vector3(PI/4, singularBeta - epsilon, PI/3));
check_all_var(Vector3(PI/4, singularBeta - Scalar(1.5)*epsilon, PI/3));
check_all_var(Vector3(PI/4, singularBeta - 2*epsilon, PI/3));
check_all_var(Vector3(PI*Scalar(0.8), singularBeta - epsilon, Scalar(0.9)*PI));
check_all_var(Vector3(PI*Scalar(-0.9), singularBeta + epsilon, PI*Scalar(0.3)));
check_all_var(Vector3(PI*Scalar(-0.6), singularBeta + Scalar(1.5)*epsilon, PI*Scalar(0.3)));
check_all_var(Vector3(PI*Scalar(-0.5), singularBeta + 2*epsilon, PI*Scalar(0.4)));
check_all_var(Vector3(PI*Scalar(0.9), singularBeta + epsilon, Scalar(0.8)*PI));
}
// This one for sanity, it had a problem with near pole cases in float scalar.
check_all_var(Vector3(PI*Scalar(0.8), singularBeta - Scalar(1E-6), Scalar(0.9)*PI));
}
template<typename Scalar> void eulerangles_manual()
{
typedef Matrix<Scalar,3,1> Vector3;
typedef Matrix<Scalar,Dynamic,1> VectorX;
const Vector3 Zero = Vector3::Zero();
const Scalar PI = Scalar(EIGEN_PI);
check_all_var(Zero);
// singular cases
check_singular_cases(PI/2);
check_singular_cases(-PI/2);
check_singular_cases(Scalar(0));
check_singular_cases(Scalar(-0));
check_singular_cases(PI);
check_singular_cases(-PI);
// non-singular cases
VectorX alpha = VectorX::LinSpaced(20, Scalar(-0.99) * PI, PI);
VectorX beta = VectorX::LinSpaced(20, Scalar(-0.49) * PI, Scalar(0.49) * PI);
VectorX gamma = VectorX::LinSpaced(20, Scalar(-0.99) * PI, PI);
for (int i = 0; i < alpha.size(); ++i) {
for (int j = 0; j < beta.size(); ++j) {
for (int k = 0; k < gamma.size(); ++k) {
check_all_var(Vector3(alpha(i), beta(j), gamma(k)));
}
}
}
}
template<typename Scalar> void eulerangles_rand()
{
typedef Matrix<Scalar,3,3> Matrix3;
typedef Matrix<Scalar,3,1> Vector3;
typedef Array<Scalar,3,1> Array3;
typedef Quaternion<Scalar> Quaternionx;
typedef AngleAxis<Scalar> AngleAxisType;
Scalar a = internal::random<Scalar>(-Scalar(EIGEN_PI), Scalar(EIGEN_PI));
Quaternionx q1;
q1 = AngleAxisType(a, Vector3::Random().normalized());
Matrix3 m;
m = q1;
Vector3 ea = m.eulerAngles(0,1,2);
check_all_var(ea);
ea = m.eulerAngles(0,1,0);
check_all_var(ea);
// Check with purely random Quaternion:
q1.coeffs() = Quaternionx::Coefficients::Random().normalized();
m = q1;
ea = m.eulerAngles(0,1,2);
check_all_var(ea);
ea = m.eulerAngles(0,1,0);
check_all_var(ea);
// Check with random angles in range [0:pi]x[-pi:pi]x[-pi:pi].
ea = (Array3::Random() + Array3(1,0,0))*Scalar(EIGEN_PI)*Array3(0.5,1,1);
check_all_var(ea);
ea[2] = ea[0] = internal::random<Scalar>(0,Scalar(EIGEN_PI));
check_all_var(ea);
ea[0] = ea[1] = internal::random<Scalar>(0,Scalar(EIGEN_PI));
check_all_var(ea);
ea[1] = 0;
check_all_var(ea);
ea.head(2).setZero();
check_all_var(ea);
ea.setZero();
check_all_var(ea);
}
EIGEN_DECLARE_TEST(EulerAngles)
{
// Simple cast test
EulerAnglesXYZd onesEd(1, 1, 1);
EulerAnglesXYZf onesEf = onesEd.cast<float>();
VERIFY_IS_APPROX(onesEd, onesEf.cast<double>());
// Simple Construction from Vector3 test
VERIFY_IS_APPROX(onesEd, EulerAnglesXYZd(Vector3d::Ones()));
CALL_SUBTEST_1( eulerangles_manual<float>() );
CALL_SUBTEST_2( eulerangles_manual<double>() );
for(int i = 0; i < g_repeat; i++) {
CALL_SUBTEST_3( eulerangles_rand<float>() );
CALL_SUBTEST_4( eulerangles_rand<double>() );
}
// TODO: Add tests for auto diff
// TODO: Add tests for complex numbers
}

View File

@@ -0,0 +1,2 @@
#define test_FFTW test_FFT
#include "FFTW.cpp"

View File

@@ -0,0 +1,262 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2009 Mark Borgerding mark a borgerding net
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
#include <unsupported/Eigen/FFT>
template <typename T>
std::complex<T> RandomCpx() { return std::complex<T>( (T)(rand()/(T)RAND_MAX - .5), (T)(rand()/(T)RAND_MAX - .5) ); }
using namespace std;
using namespace Eigen;
template < typename T>
complex<long double> promote(complex<T> x) { return complex<long double>((long double)x.real(),(long double)x.imag()); }
complex<long double> promote(float x) { return complex<long double>((long double)x); }
complex<long double> promote(double x) { return complex<long double>((long double)x); }
complex<long double> promote(long double x) { return complex<long double>((long double)x); }
template <typename VT1,typename VT2>
long double fft_rmse( const VT1 & fftbuf,const VT2 & timebuf)
{
long double totalpower=0;
long double difpower=0;
long double pi = acos((long double)-1 );
for (size_t k0=0;k0<(size_t)fftbuf.size();++k0) {
complex<long double> acc = 0;
long double phinc = (long double)(-2.)*k0* pi / timebuf.size();
for (size_t k1=0;k1<(size_t)timebuf.size();++k1) {
acc += promote( timebuf[k1] ) * exp( complex<long double>(0,k1*phinc) );
}
totalpower += numext::abs2(acc);
complex<long double> x = promote(fftbuf[k0]);
complex<long double> dif = acc - x;
difpower += numext::abs2(dif);
//cerr << k0 << "\t" << acc << "\t" << x << "\t" << sqrt(numext::abs2(dif)) << endl;
}
cerr << "rmse:" << sqrt(difpower/totalpower) << endl;
return sqrt(difpower/totalpower);
}
template <typename VT1,typename VT2>
long double dif_rmse( const VT1 buf1,const VT2 buf2)
{
long double totalpower=0;
long double difpower=0;
size_t n = (min)( buf1.size(),buf2.size() );
for (size_t k=0;k<n;++k) {
totalpower += (long double)((numext::abs2( buf1[k] ) + numext::abs2(buf2[k]) )/2);
difpower += (long double)(numext::abs2(buf1[k] - buf2[k]));
}
return sqrt(difpower/totalpower);
}
enum { StdVectorContainer, EigenVectorContainer };
template<int Container, typename Scalar> struct VectorType;
template<typename Scalar> struct VectorType<StdVectorContainer,Scalar>
{
typedef vector<Scalar> type;
};
template<typename Scalar> struct VectorType<EigenVectorContainer,Scalar>
{
typedef Matrix<Scalar,Dynamic,1> type;
};
template <int Container, typename T>
void test_scalar_generic(int nfft)
{
typedef typename FFT<T>::Complex Complex;
typedef typename FFT<T>::Scalar Scalar;
typedef typename VectorType<Container,Scalar>::type ScalarVector;
typedef typename VectorType<Container,Complex>::type ComplexVector;
FFT<T> fft;
ScalarVector tbuf(nfft);
ComplexVector freqBuf;
for (int k=0;k<nfft;++k)
tbuf[k]= (T)( rand()/(double)RAND_MAX - .5);
// make sure it DOESN'T give the right full spectrum answer
// if we've asked for half-spectrum
fft.SetFlag(fft.HalfSpectrum );
fft.fwd( freqBuf,tbuf);
VERIFY((size_t)freqBuf.size() == (size_t)( (nfft>>1)+1) );
VERIFY( T(fft_rmse(freqBuf,tbuf)) < test_precision<T>() );// gross check
fft.ClearFlag(fft.HalfSpectrum );
fft.fwd( freqBuf,tbuf);
VERIFY( (size_t)freqBuf.size() == (size_t)nfft);
VERIFY( T(fft_rmse(freqBuf,tbuf)) < test_precision<T>() );// gross check
if (nfft&1)
return; // odd FFTs get the wrong size inverse FFT
ScalarVector tbuf2;
fft.inv( tbuf2 , freqBuf);
VERIFY( T(dif_rmse(tbuf,tbuf2)) < test_precision<T>() );// gross check
// verify that the Unscaled flag takes effect
ScalarVector tbuf3;
fft.SetFlag(fft.Unscaled);
fft.inv( tbuf3 , freqBuf);
for (int k=0;k<nfft;++k)
tbuf3[k] *= T(1./nfft);
//for (size_t i=0;i<(size_t) tbuf.size();++i)
// cout << "freqBuf=" << freqBuf[i] << " in2=" << tbuf3[i] << " - in=" << tbuf[i] << " => " << (tbuf3[i] - tbuf[i] ) << endl;
VERIFY( T(dif_rmse(tbuf,tbuf3)) < test_precision<T>() );// gross check
// verify that ClearFlag works
fft.ClearFlag(fft.Unscaled);
fft.inv( tbuf2 , freqBuf);
VERIFY( T(dif_rmse(tbuf,tbuf2)) < test_precision<T>() );// gross check
}
template <typename T>
void test_scalar(int nfft)
{
test_scalar_generic<StdVectorContainer,T>(nfft);
//test_scalar_generic<EigenVectorContainer,T>(nfft);
}
template <int Container, typename T>
void test_complex_generic(int nfft)
{
typedef typename FFT<T>::Complex Complex;
typedef typename VectorType<Container,Complex>::type ComplexVector;
FFT<T> fft;
ComplexVector inbuf(nfft);
ComplexVector outbuf;
ComplexVector buf3;
for (int k=0;k<nfft;++k)
inbuf[k]= Complex( (T)(rand()/(double)RAND_MAX - .5), (T)(rand()/(double)RAND_MAX - .5) );
fft.fwd( outbuf , inbuf);
VERIFY( T(fft_rmse(outbuf,inbuf)) < test_precision<T>() );// gross check
fft.inv( buf3 , outbuf);
VERIFY( T(dif_rmse(inbuf,buf3)) < test_precision<T>() );// gross check
// verify that the Unscaled flag takes effect
ComplexVector buf4;
fft.SetFlag(fft.Unscaled);
fft.inv( buf4 , outbuf);
for (int k=0;k<nfft;++k)
buf4[k] *= T(1./nfft);
VERIFY( T(dif_rmse(inbuf,buf4)) < test_precision<T>() );// gross check
// verify that ClearFlag works
fft.ClearFlag(fft.Unscaled);
fft.inv( buf3 , outbuf);
VERIFY( T(dif_rmse(inbuf,buf3)) < test_precision<T>() );// gross check
}
template <typename T>
void test_complex(int nfft)
{
test_complex_generic<StdVectorContainer,T>(nfft);
test_complex_generic<EigenVectorContainer,T>(nfft);
}
/*
template <typename T,int nrows,int ncols>
void test_complex2d()
{
typedef typename Eigen::FFT<T>::Complex Complex;
FFT<T> fft;
Eigen::Matrix<Complex,nrows,ncols> src,src2,dst,dst2;
src = Eigen::Matrix<Complex,nrows,ncols>::Random();
//src = Eigen::Matrix<Complex,nrows,ncols>::Identity();
for (int k=0;k<ncols;k++) {
Eigen::Matrix<Complex,nrows,1> tmpOut;
fft.fwd( tmpOut,src.col(k) );
dst2.col(k) = tmpOut;
}
for (int k=0;k<nrows;k++) {
Eigen::Matrix<Complex,1,ncols> tmpOut;
fft.fwd( tmpOut, dst2.row(k) );
dst2.row(k) = tmpOut;
}
fft.fwd2(dst.data(),src.data(),ncols,nrows);
fft.inv2(src2.data(),dst.data(),ncols,nrows);
VERIFY( (src-src2).norm() < test_precision<T>() );
VERIFY( (dst-dst2).norm() < test_precision<T>() );
}
*/
void test_return_by_value(int len)
{
VectorXf in;
VectorXf in1;
in.setRandom( len );
VectorXcf out1,out2;
FFT<float> fft;
fft.SetFlag(fft.HalfSpectrum );
fft.fwd(out1,in);
out2 = fft.fwd(in);
VERIFY( (out1-out2).norm() < test_precision<float>() );
in1 = fft.inv(out1);
VERIFY( (in1-in).norm() < test_precision<float>() );
}
EIGEN_DECLARE_TEST(FFTW)
{
CALL_SUBTEST( test_return_by_value(32) );
//CALL_SUBTEST( ( test_complex2d<float,4,8> () ) ); CALL_SUBTEST( ( test_complex2d<double,4,8> () ) );
//CALL_SUBTEST( ( test_complex2d<long double,4,8> () ) );
CALL_SUBTEST( test_complex<float>(32) ); CALL_SUBTEST( test_complex<double>(32) );
CALL_SUBTEST( test_complex<float>(256) ); CALL_SUBTEST( test_complex<double>(256) );
CALL_SUBTEST( test_complex<float>(3*8) ); CALL_SUBTEST( test_complex<double>(3*8) );
CALL_SUBTEST( test_complex<float>(5*32) ); CALL_SUBTEST( test_complex<double>(5*32) );
CALL_SUBTEST( test_complex<float>(2*3*4) ); CALL_SUBTEST( test_complex<double>(2*3*4) );
CALL_SUBTEST( test_complex<float>(2*3*4*5) ); CALL_SUBTEST( test_complex<double>(2*3*4*5) );
CALL_SUBTEST( test_complex<float>(2*3*4*5*7) ); CALL_SUBTEST( test_complex<double>(2*3*4*5*7) );
CALL_SUBTEST( test_scalar<float>(32) ); CALL_SUBTEST( test_scalar<double>(32) );
CALL_SUBTEST( test_scalar<float>(45) ); CALL_SUBTEST( test_scalar<double>(45) );
CALL_SUBTEST( test_scalar<float>(50) ); CALL_SUBTEST( test_scalar<double>(50) );
CALL_SUBTEST( test_scalar<float>(256) ); CALL_SUBTEST( test_scalar<double>(256) );
CALL_SUBTEST( test_scalar<float>(2*3*4*5*7) ); CALL_SUBTEST( test_scalar<double>(2*3*4*5*7) );
#ifdef EIGEN_HAS_FFTWL
CALL_SUBTEST( test_complex<long double>(32) );
CALL_SUBTEST( test_complex<long double>(256) );
CALL_SUBTEST( test_complex<long double>(3*8) );
CALL_SUBTEST( test_complex<long double>(5*32) );
CALL_SUBTEST( test_complex<long double>(2*3*4) );
CALL_SUBTEST( test_complex<long double>(2*3*4*5) );
CALL_SUBTEST( test_complex<long double>(2*3*4*5*7) );
CALL_SUBTEST( test_scalar<long double>(32) );
CALL_SUBTEST( test_scalar<long double>(45) );
CALL_SUBTEST( test_scalar<long double>(50) );
CALL_SUBTEST( test_scalar<long double>(256) );
CALL_SUBTEST( test_scalar<long double>(2*3*4*5*7) );
#endif
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,114 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2009 Thomas Capricelli <orzel@freehackers.org>
#include <stdio.h>
#include "main.h"
#include <unsupported/Eigen/NumericalDiff>
// Generic functor
template<typename _Scalar, int NX=Dynamic, int NY=Dynamic>
struct Functor
{
typedef _Scalar Scalar;
enum {
InputsAtCompileTime = NX,
ValuesAtCompileTime = NY
};
typedef Matrix<Scalar,InputsAtCompileTime,1> InputType;
typedef Matrix<Scalar,ValuesAtCompileTime,1> ValueType;
typedef Matrix<Scalar,ValuesAtCompileTime,InputsAtCompileTime> JacobianType;
int m_inputs, m_values;
Functor() : m_inputs(InputsAtCompileTime), m_values(ValuesAtCompileTime) {}
Functor(int inputs_, int values_) : m_inputs(inputs_), m_values(values_) {}
int inputs() const { return m_inputs; }
int values() const { return m_values; }
};
struct my_functor : Functor<double>
{
my_functor(void): Functor<double>(3,15) {}
int operator()(const VectorXd &x, VectorXd &fvec) const
{
double tmp1, tmp2, tmp3;
double y[15] = {1.4e-1, 1.8e-1, 2.2e-1, 2.5e-1, 2.9e-1, 3.2e-1, 3.5e-1,
3.9e-1, 3.7e-1, 5.8e-1, 7.3e-1, 9.6e-1, 1.34, 2.1, 4.39};
for (int i = 0; i < values(); i++)
{
tmp1 = i+1;
tmp2 = 16 - i - 1;
tmp3 = (i>=8)? tmp2 : tmp1;
fvec[i] = y[i] - (x[0] + tmp1/(x[1]*tmp2 + x[2]*tmp3));
}
return 0;
}
int actual_df(const VectorXd &x, MatrixXd &fjac) const
{
double tmp1, tmp2, tmp3, tmp4;
for (int i = 0; i < values(); i++)
{
tmp1 = i+1;
tmp2 = 16 - i - 1;
tmp3 = (i>=8)? tmp2 : tmp1;
tmp4 = (x[1]*tmp2 + x[2]*tmp3); tmp4 = tmp4*tmp4;
fjac(i,0) = -1;
fjac(i,1) = tmp1*tmp2/tmp4;
fjac(i,2) = tmp1*tmp3/tmp4;
}
return 0;
}
};
void test_forward()
{
VectorXd x(3);
MatrixXd jac(15,3);
MatrixXd actual_jac(15,3);
my_functor functor;
x << 0.082, 1.13, 2.35;
// real one
functor.actual_df(x, actual_jac);
// std::cout << actual_jac << std::endl << std::endl;
// using NumericalDiff
NumericalDiff<my_functor> numDiff(functor);
numDiff.df(x, jac);
// std::cout << jac << std::endl;
VERIFY_IS_APPROX(jac, actual_jac);
}
void test_central()
{
VectorXd x(3);
MatrixXd jac(15,3);
MatrixXd actual_jac(15,3);
my_functor functor;
x << 0.082, 1.13, 2.35;
// real one
functor.actual_df(x, actual_jac);
// using NumericalDiff
NumericalDiff<my_functor,Central> numDiff(functor);
numDiff.df(x, jac);
VERIFY_IS_APPROX(jac, actual_jac);
}
EIGEN_DECLARE_TEST(NumericalDiff)
{
CALL_SUBTEST(test_forward());
CALL_SUBTEST(test_central());
}

View File

@@ -0,0 +1,87 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2009 Gael Guennebaud <g.gael@free.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
#include <unsupported/Eigen/AlignedVector3>
namespace Eigen {
template<typename T,typename Derived>
T test_relative_error(const AlignedVector3<T> &a, const MatrixBase<Derived> &b)
{
return test_relative_error(a.coeffs().template head<3>(), b);
}
}
template<typename Scalar>
void alignedvector3()
{
Scalar s1 = internal::random<Scalar>();
Scalar s2 = internal::random<Scalar>();
typedef Matrix<Scalar,3,1> RefType;
typedef Matrix<Scalar,3,3> Mat33;
typedef AlignedVector3<Scalar> FastType;
RefType r1(RefType::Random()), r2(RefType::Random()), r3(RefType::Random()),
r4(RefType::Random()), r5(RefType::Random());
FastType f1(r1), f2(r2), f3(r3), f4(r4), f5(r5);
Mat33 m1(Mat33::Random());
VERIFY_IS_APPROX(f1,r1);
VERIFY_IS_APPROX(f4,r4);
VERIFY_IS_APPROX(f4+f1,r4+r1);
VERIFY_IS_APPROX(f4-f1,r4-r1);
VERIFY_IS_APPROX(f4+f1-f2,r4+r1-r2);
VERIFY_IS_APPROX(f4+=f3,r4+=r3);
VERIFY_IS_APPROX(f4-=f5,r4-=r5);
VERIFY_IS_APPROX(f4-=f5+f1,r4-=r5+r1);
VERIFY_IS_APPROX(f5+f1-s1*f2,r5+r1-s1*r2);
VERIFY_IS_APPROX(f5+f1/s2-s1*f2,r5+r1/s2-s1*r2);
VERIFY_IS_APPROX(m1*f4,m1*r4);
VERIFY_IS_APPROX(f4.transpose()*m1,r4.transpose()*m1);
VERIFY_IS_APPROX(f2.dot(f3),r2.dot(r3));
VERIFY_IS_APPROX(f2.cross(f3),r2.cross(r3));
VERIFY_IS_APPROX(f2.norm(),r2.norm());
VERIFY_IS_APPROX(f2.normalized(),r2.normalized());
VERIFY_IS_APPROX((f2+f1).normalized(),(r2+r1).normalized());
f2.normalize();
r2.normalize();
VERIFY_IS_APPROX(f2,r2);
{
FastType f6 = RefType::Zero();
FastType f7 = FastType::Zero();
VERIFY_IS_APPROX(f6,f7);
f6 = r4+r1;
VERIFY_IS_APPROX(f6,r4+r1);
f6 -= Scalar(2)*r4;
VERIFY_IS_APPROX(f6,r1-r4);
}
FastType f8, f9(0,0,0);
VERIFY_IS_APPROX(f9-f1,-f1);
std::stringstream ss1, ss2;
ss1 << f1;
ss2 << r1;
VERIFY(ss1.str()==ss2.str());
}
EIGEN_DECLARE_TEST(alignedvector3)
{
for(int i = 0; i < g_repeat; i++) {
CALL_SUBTEST( alignedvector3<float>() );
}
}

View File

@@ -0,0 +1,387 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2009 Gael Guennebaud <g.gael@free.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
#include <unsupported/Eigen/AutoDiff>
template<typename Scalar>
EIGEN_DONT_INLINE Scalar foo(const Scalar& x, const Scalar& y)
{
using namespace std;
// return x+std::sin(y);
EIGEN_ASM_COMMENT("mybegin");
// pow(float, int) promotes to pow(double, double)
return x*2 - 1 + static_cast<Scalar>(pow(1+x,2)) + 2*sqrt(y*y+0) - 4 * sin(0+x) + 2 * cos(y+0) - exp(Scalar(-0.5)*x*x+0);
//return x+2*y*x;//x*2 -std::pow(x,2);//(2*y/x);// - y*2;
EIGEN_ASM_COMMENT("myend");
}
template<typename Vector>
EIGEN_DONT_INLINE typename Vector::Scalar foo(const Vector& p)
{
typedef typename Vector::Scalar Scalar;
return (p-Vector(Scalar(-1),Scalar(1.))).norm() + (p.array() * p.array()).sum() + p.dot(p);
}
template<typename _Scalar, int NX=Dynamic, int NY=Dynamic>
struct TestFunc1
{
typedef _Scalar Scalar;
enum {
InputsAtCompileTime = NX,
ValuesAtCompileTime = NY
};
typedef Matrix<Scalar,InputsAtCompileTime,1> InputType;
typedef Matrix<Scalar,ValuesAtCompileTime,1> ValueType;
typedef Matrix<Scalar,ValuesAtCompileTime,InputsAtCompileTime> JacobianType;
int m_inputs, m_values;
TestFunc1() : m_inputs(InputsAtCompileTime), m_values(ValuesAtCompileTime) {}
TestFunc1(int inputs_, int values_) : m_inputs(inputs_), m_values(values_) {}
int inputs() const { return m_inputs; }
int values() const { return m_values; }
template<typename T>
void operator() (const Matrix<T,InputsAtCompileTime,1>& x, Matrix<T,ValuesAtCompileTime,1>* _v) const
{
Matrix<T,ValuesAtCompileTime,1>& v = *_v;
v[0] = 2 * x[0] * x[0] + x[0] * x[1];
v[1] = 3 * x[1] * x[0] + 0.5 * x[1] * x[1];
if(inputs()>2)
{
v[0] += 0.5 * x[2];
v[1] += x[2];
}
if(values()>2)
{
v[2] = 3 * x[1] * x[0] * x[0];
}
if (inputs()>2 && values()>2)
v[2] *= x[2];
}
void operator() (const InputType& x, ValueType* v, JacobianType* _j) const
{
(*this)(x, v);
if(_j)
{
JacobianType& j = *_j;
j(0,0) = 4 * x[0] + x[1];
j(1,0) = 3 * x[1];
j(0,1) = x[0];
j(1,1) = 3 * x[0] + 2 * 0.5 * x[1];
if (inputs()>2)
{
j(0,2) = 0.5;
j(1,2) = 1;
}
if(values()>2)
{
j(2,0) = 3 * x[1] * 2 * x[0];
j(2,1) = 3 * x[0] * x[0];
}
if (inputs()>2 && values()>2)
{
j(2,0) *= x[2];
j(2,1) *= x[2];
j(2,2) = 3 * x[1] * x[0] * x[0];
j(2,2) = 3 * x[1] * x[0] * x[0];
}
}
}
};
#if EIGEN_HAS_VARIADIC_TEMPLATES
/* Test functor for the C++11 features. */
template <typename Scalar>
struct integratorFunctor
{
typedef Matrix<Scalar, 2, 1> InputType;
typedef Matrix<Scalar, 2, 1> ValueType;
/*
* Implementation starts here.
*/
integratorFunctor(const Scalar gain) : _gain(gain) {}
integratorFunctor(const integratorFunctor& f) : _gain(f._gain) {}
const Scalar _gain;
template <typename T1, typename T2>
void operator() (const T1 &input, T2 *output, const Scalar dt) const
{
T2 &o = *output;
/* Integrator to test the AD. */
o[0] = input[0] + input[1] * dt * _gain;
o[1] = input[1] * _gain;
}
/* Only needed for the test */
template <typename T1, typename T2, typename T3>
void operator() (const T1 &input, T2 *output, T3 *jacobian, const Scalar dt) const
{
T2 &o = *output;
/* Integrator to test the AD. */
o[0] = input[0] + input[1] * dt * _gain;
o[1] = input[1] * _gain;
if (jacobian)
{
T3 &j = *jacobian;
j(0, 0) = 1;
j(0, 1) = dt * _gain;
j(1, 0) = 0;
j(1, 1) = _gain;
}
}
};
template<typename Func> void forward_jacobian_cpp11(const Func& f)
{
typedef typename Func::ValueType::Scalar Scalar;
typedef typename Func::ValueType ValueType;
typedef typename Func::InputType InputType;
typedef typename AutoDiffJacobian<Func>::JacobianType JacobianType;
InputType x = InputType::Random(InputType::RowsAtCompileTime);
ValueType y, yref;
JacobianType j, jref;
const Scalar dt = internal::random<double>();
jref.setZero();
yref.setZero();
f(x, &yref, &jref, dt);
//std::cerr << "y, yref, jref: " << "\n";
//std::cerr << y.transpose() << "\n\n";
//std::cerr << yref << "\n\n";
//std::cerr << jref << "\n\n";
AutoDiffJacobian<Func> autoj(f);
autoj(x, &y, &j, dt);
//std::cerr << "y j (via autodiff): " << "\n";
//std::cerr << y.transpose() << "\n\n";
//std::cerr << j << "\n\n";
VERIFY_IS_APPROX(y, yref);
VERIFY_IS_APPROX(j, jref);
}
#endif
template<typename Func> void forward_jacobian(const Func& f)
{
typename Func::InputType x = Func::InputType::Random(f.inputs());
typename Func::ValueType y(f.values()), yref(f.values());
typename Func::JacobianType j(f.values(),f.inputs()), jref(f.values(),f.inputs());
jref.setZero();
yref.setZero();
f(x,&yref,&jref);
// std::cerr << y.transpose() << "\n\n";;
// std::cerr << j << "\n\n";;
j.setZero();
y.setZero();
AutoDiffJacobian<Func> autoj(f);
autoj(x, &y, &j);
// std::cerr << y.transpose() << "\n\n";;
// std::cerr << j << "\n\n";;
VERIFY_IS_APPROX(y, yref);
VERIFY_IS_APPROX(j, jref);
}
// TODO also check actual derivatives!
template <int>
void test_autodiff_scalar()
{
Vector2f p = Vector2f::Random();
typedef AutoDiffScalar<Vector2f> AD;
AD ax(p.x(),Vector2f::UnitX());
AD ay(p.y(),Vector2f::UnitY());
AD res = foo<AD>(ax,ay);
VERIFY_IS_APPROX(res.value(), foo(p.x(),p.y()));
}
// TODO also check actual derivatives!
template <int>
void test_autodiff_vector()
{
Vector2f p = Vector2f::Random();
typedef AutoDiffScalar<Vector2f> AD;
typedef Matrix<AD,2,1> VectorAD;
VectorAD ap = p.cast<AD>();
ap.x().derivatives() = Vector2f::UnitX();
ap.y().derivatives() = Vector2f::UnitY();
AD res = foo<VectorAD>(ap);
VERIFY_IS_APPROX(res.value(), foo(p));
}
template <int>
void test_autodiff_jacobian()
{
CALL_SUBTEST(( forward_jacobian(TestFunc1<double,2,2>()) ));
CALL_SUBTEST(( forward_jacobian(TestFunc1<double,2,3>()) ));
CALL_SUBTEST(( forward_jacobian(TestFunc1<double,3,2>()) ));
CALL_SUBTEST(( forward_jacobian(TestFunc1<double,3,3>()) ));
CALL_SUBTEST(( forward_jacobian(TestFunc1<double>(3,3)) ));
#if EIGEN_HAS_VARIADIC_TEMPLATES
CALL_SUBTEST(( forward_jacobian_cpp11(integratorFunctor<double>(10)) ));
#endif
}
template <int>
void test_autodiff_hessian()
{
typedef AutoDiffScalar<VectorXd> AD;
typedef Matrix<AD,Eigen::Dynamic,1> VectorAD;
typedef AutoDiffScalar<VectorAD> ADD;
typedef Matrix<ADD,Eigen::Dynamic,1> VectorADD;
VectorADD x(2);
double s1 = internal::random<double>(), s2 = internal::random<double>(), s3 = internal::random<double>(), s4 = internal::random<double>();
x(0).value()=s1;
x(1).value()=s2;
//set unit vectors for the derivative directions (partial derivatives of the input vector)
x(0).derivatives().resize(2);
x(0).derivatives().setZero();
x(0).derivatives()(0)= 1;
x(1).derivatives().resize(2);
x(1).derivatives().setZero();
x(1).derivatives()(1)=1;
//repeat partial derivatives for the inner AutoDiffScalar
x(0).value().derivatives() = VectorXd::Unit(2,0);
x(1).value().derivatives() = VectorXd::Unit(2,1);
//set the hessian matrix to zero
for(int idx=0; idx<2; idx++) {
x(0).derivatives()(idx).derivatives() = VectorXd::Zero(2);
x(1).derivatives()(idx).derivatives() = VectorXd::Zero(2);
}
ADD y = sin(AD(s3)*x(0) + AD(s4)*x(1));
VERIFY_IS_APPROX(y.value().derivatives()(0), y.derivatives()(0).value());
VERIFY_IS_APPROX(y.value().derivatives()(1), y.derivatives()(1).value());
VERIFY_IS_APPROX(y.value().derivatives()(0), s3*std::cos(s1*s3+s2*s4));
VERIFY_IS_APPROX(y.value().derivatives()(1), s4*std::cos(s1*s3+s2*s4));
VERIFY_IS_APPROX(y.derivatives()(0).derivatives(), -std::sin(s1*s3+s2*s4)*Vector2d(s3*s3,s4*s3));
VERIFY_IS_APPROX(y.derivatives()(1).derivatives(), -std::sin(s1*s3+s2*s4)*Vector2d(s3*s4,s4*s4));
ADD z = x(0)*x(1);
VERIFY_IS_APPROX(z.derivatives()(0).derivatives(), Vector2d(0,1));
VERIFY_IS_APPROX(z.derivatives()(1).derivatives(), Vector2d(1,0));
}
double bug_1222() {
typedef Eigen::AutoDiffScalar<Eigen::Vector3d> AD;
const double _cv1_3 = 1.0;
const AD chi_3 = 1.0;
// this line did not work, because operator+ returns ADS<DerType&>, which then cannot be converted to ADS<DerType>
const AD denom = chi_3 + _cv1_3;
return denom.value();
}
#ifdef EIGEN_TEST_PART_5
double bug_1223() {
using std::min;
typedef Eigen::AutoDiffScalar<Eigen::Vector3d> AD;
const double _cv1_3 = 1.0;
const AD chi_3 = 1.0;
const AD denom = 1.0;
// failed because implementation of min attempts to construct ADS<DerType&> via constructor AutoDiffScalar(const Real& value)
// without initializing m_derivatives (which is a reference in this case)
#define EIGEN_TEST_SPACE
const AD t = min EIGEN_TEST_SPACE (denom / chi_3, 1.0);
const AD t2 = min EIGEN_TEST_SPACE (denom / (chi_3 * _cv1_3), 1.0);
return t.value() + t2.value();
}
// regression test for some compilation issues with specializations of ScalarBinaryOpTraits
void bug_1260() {
Matrix4d A = Matrix4d::Ones();
Vector4d v = Vector4d::Ones();
A*v;
}
// check a compilation issue with numext::max
double bug_1261() {
typedef AutoDiffScalar<Matrix2d> AD;
typedef Matrix<AD,2,1> VectorAD;
VectorAD v(0.,0.);
const AD maxVal = v.maxCoeff();
const AD minVal = v.minCoeff();
return maxVal.value() + minVal.value();
}
double bug_1264() {
typedef AutoDiffScalar<Vector2d> AD;
const AD s = 0.;
const Matrix<AD, 3, 1> v1(0.,0.,0.);
const Matrix<AD, 3, 1> v2 = (s + 3.0) * v1;
return v2(0).value();
}
// check with expressions on constants
double bug_1281() {
int n = 2;
typedef AutoDiffScalar<VectorXd> AD;
const AD c = 1.;
AD x0(2,n,0);
AD y1 = (AD(c)+AD(c))*x0;
y1 = x0 * (AD(c)+AD(c));
AD y2 = (-AD(c))+x0;
y2 = x0+(-AD(c));
AD y3 = (AD(c)*(-AD(c))+AD(c))*x0;
y3 = x0 * (AD(c)*(-AD(c))+AD(c));
return (y1+y2+y3).value();
}
#endif
EIGEN_DECLARE_TEST(autodiff)
{
for(int i = 0; i < g_repeat; i++) {
CALL_SUBTEST_1( test_autodiff_scalar<1>() );
CALL_SUBTEST_2( test_autodiff_vector<1>() );
CALL_SUBTEST_3( test_autodiff_jacobian<1>() );
CALL_SUBTEST_4( test_autodiff_hessian<1>() );
}
CALL_SUBTEST_5( bug_1222() );
CALL_SUBTEST_5( bug_1223() );
CALL_SUBTEST_5( bug_1260() );
CALL_SUBTEST_5( bug_1261() );
CALL_SUBTEST_5( bug_1281() );
}

View File

@@ -0,0 +1,101 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2013 Christoph Hertzberg <chtz@informatik.uni-bremen.de>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
#include <unsupported/Eigen/AutoDiff>
/*
* In this file scalar derivations are tested for correctness.
* TODO add more tests!
*/
template<typename Scalar> void check_atan2()
{
typedef Matrix<Scalar, 1, 1> Deriv1;
typedef AutoDiffScalar<Deriv1> AD;
AD x(internal::random<Scalar>(-3.0, 3.0), Deriv1::UnitX());
using std::exp;
Scalar r = exp(internal::random<Scalar>(-10, 10));
AD s = sin(x), c = cos(x);
AD res = atan2(r*s, r*c);
VERIFY_IS_APPROX(res.value(), x.value());
VERIFY_IS_APPROX(res.derivatives(), x.derivatives());
res = atan2(r*s+0, r*c+0);
VERIFY_IS_APPROX(res.value(), x.value());
VERIFY_IS_APPROX(res.derivatives(), x.derivatives());
}
template<typename Scalar> void check_hyperbolic_functions()
{
using std::sinh;
using std::cosh;
using std::tanh;
typedef Matrix<Scalar, 1, 1> Deriv1;
typedef AutoDiffScalar<Deriv1> AD;
Deriv1 p = Deriv1::Random();
AD val(p.x(),Deriv1::UnitX());
Scalar cosh_px = std::cosh(p.x());
AD res1 = tanh(val);
VERIFY_IS_APPROX(res1.value(), std::tanh(p.x()));
VERIFY_IS_APPROX(res1.derivatives().x(), Scalar(1.0) / (cosh_px * cosh_px));
AD res2 = sinh(val);
VERIFY_IS_APPROX(res2.value(), std::sinh(p.x()));
VERIFY_IS_APPROX(res2.derivatives().x(), cosh_px);
AD res3 = cosh(val);
VERIFY_IS_APPROX(res3.value(), cosh_px);
VERIFY_IS_APPROX(res3.derivatives().x(), std::sinh(p.x()));
// Check constant values.
const Scalar sample_point = Scalar(1) / Scalar(3);
val = AD(sample_point,Deriv1::UnitX());
res1 = tanh(val);
VERIFY_IS_APPROX(res1.derivatives().x(), Scalar(0.896629559604914));
res2 = sinh(val);
VERIFY_IS_APPROX(res2.derivatives().x(), Scalar(1.056071867829939));
res3 = cosh(val);
VERIFY_IS_APPROX(res3.derivatives().x(), Scalar(0.339540557256150));
}
template <typename Scalar>
void check_limits_specialization()
{
typedef Eigen::Matrix<Scalar, 1, 1> Deriv;
typedef Eigen::AutoDiffScalar<Deriv> AD;
typedef std::numeric_limits<AD> A;
typedef std::numeric_limits<Scalar> B;
// workaround "unused typedef" warning:
VERIFY(!bool(internal::is_same<B, A>::value));
#if EIGEN_HAS_CXX11
VERIFY(bool(std::is_base_of<B, A>::value));
#endif
}
EIGEN_DECLARE_TEST(autodiff_scalar)
{
for(int i = 0; i < g_repeat; i++) {
CALL_SUBTEST_1( check_atan2<float>() );
CALL_SUBTEST_2( check_atan2<double>() );
CALL_SUBTEST_3( check_hyperbolic_functions<float>() );
CALL_SUBTEST_4( check_hyperbolic_functions<double>() );
CALL_SUBTEST_5( check_limits_specialization<double>());
}
}

View File

@@ -0,0 +1,370 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2016 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
#include "../Eigen/SpecialFunctions"
template<typename X, typename Y>
void verify_component_wise(const X& x, const Y& y)
{
for(Index i=0; i<x.size(); ++i)
{
if((numext::isfinite)(y(i))) {
VERIFY_IS_APPROX( x(i), y(i) );
}
else if((numext::isnan)(y(i)))
VERIFY((numext::isnan)(x(i)));
else
VERIFY_IS_EQUAL( x(i), y(i) );
}
}
template<typename ArrayType> void array_bessel_functions()
{
// Test Bessel function i0. Reference results obtained with SciPy.
{
ArrayType x(21);
ArrayType expected(21);
ArrayType res(21);
x << -20.0, -18.0, -16.0, -14.0, -12.0, -10.0, -8.0, -6.0, -4.0, -2.0, 0.0,
2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0;
expected << 4.35582826e+07, 6.21841242e+06, 8.93446228e+05, 1.29418563e+05,
1.89489253e+04, 2.81571663e+03, 4.27564116e+02, 6.72344070e+01,
1.13019220e+01, 2.27958530e+00, 1.00000000e+00, 2.27958530e+00,
1.13019220e+01, 6.72344070e+01, 4.27564116e+02, 2.81571663e+03,
1.89489253e+04, 1.29418563e+05, 8.93446228e+05, 6.21841242e+06,
4.35582826e+07;
CALL_SUBTEST(res = bessel_i0(x);
verify_component_wise(res, expected););
}
// Test Bessel function i0e. Reference results obtained with SciPy.
{
ArrayType x(21);
ArrayType expected(21);
ArrayType res(21);
x << -20.0, -18.0, -16.0, -14.0, -12.0, -10.0, -8.0, -6.0, -4.0, -2.0, 0.0,
2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0;
expected << 0.0897803118848, 0.0947062952128, 0.100544127361,
0.107615251671, 0.116426221213, 0.127833337163, 0.143431781857,
0.16665743264, 0.207001921224, 0.308508322554, 1.0, 0.308508322554,
0.207001921224, 0.16665743264, 0.143431781857, 0.127833337163,
0.116426221213, 0.107615251671, 0.100544127361, 0.0947062952128,
0.0897803118848;
CALL_SUBTEST(res = bessel_i0e(x);
verify_component_wise(res, expected););
}
// Test Bessel function i1. Reference results obtained with SciPy.
{
ArrayType x(21);
ArrayType expected(21);
ArrayType res(21);
x << -20.0, -18.0, -16.0, -14.0, -12.0, -10.0, -8.0, -6.0, -4.0, -2.0, 0.0,
2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0;
expected << -4.24549734e+07, -6.04313324e+06, -8.65059436e+05, -1.24707259e+05,
-1.81413488e+04, -2.67098830e+03, -3.99873137e+02, -6.13419368e+01,
-9.75946515e+00, -1.59063685e+00, 0.00000000e+00, 1.59063685e+00,
9.75946515e+00, 6.13419368e+01, 3.99873137e+02, 2.67098830e+03,
1.81413488e+04, 1.24707259e+05, 8.65059436e+05, 6.04313324e+06,
4.24549734e+07;
CALL_SUBTEST(res = bessel_i1(x);
verify_component_wise(res, expected););
}
// Test Bessel function i1e. Reference results obtained with SciPy.
{
ArrayType x(21);
ArrayType expected(21);
ArrayType res(21);
x << -20.0, -18.0, -16.0, -14.0, -12.0, -10.0, -8.0, -6.0, -4.0, -2.0, 0.0,
2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0;
expected << -0.0875062221833, -0.092036796872, -0.0973496147565,
-0.103697667463, -0.11146429929, -0.121262681384, -0.134142493293,
-0.152051459309, -0.178750839502, -0.215269289249, 0.0, 0.215269289249,
0.178750839502, 0.152051459309, 0.134142493293, 0.121262681384,
0.11146429929, 0.103697667463, 0.0973496147565, 0.092036796872,
0.0875062221833;
CALL_SUBTEST(res = bessel_i1e(x);
verify_component_wise(res, expected););
}
// Test Bessel function j0. Reference results obtained with SciPy.
{
ArrayType x(77);
ArrayType expected(77);
ArrayType res(77);
x << -38., -37., -36., -35., -34., -33., -32., -31., -30.,
-29., -28., -27., -26., -25., -24., -23., -22., -21., -20., -19.,
-18., -17., -16., -15., -14., -13., -12., -11., -10., -9., -8.,
-7., -6., -5., -4., -3., -2., -1., 0., 1., 2., 3.,
4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14.,
15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.,
26., 27., 28., 29., 30., 31., 32., 33., 34., 35., 36.,
37., 38.;
expected << 0.11433274, 0.01086237, -0.10556738,
-0.12684568, -0.03042119, 0.09727067, 0.13807901, 0.05120815,
-0.08636798, -0.14784876, -0.07315701, 0.07274192, 0.15599932,
0.09626678, -0.05623027, -0.16241278, -0.12065148, 0.03657907,
0.16702466, 0.14662944, -0.01335581, -0.16985425, -0.17489907,
-0.01422447, 0.17107348, 0.2069261 , 0.04768931, -0.1711903 ,
-0.24593576, -0.09033361, 0.17165081, 0.30007927, 0.15064526,
-0.17759677, -0.39714981, -0.26005195, 0.22389078, 0.76519769,
1. , 0.76519769, 0.22389078, -0.26005195, -0.39714981,
-0.17759677, 0.15064526, 0.30007927, 0.17165081, -0.09033361,
-0.24593576, -0.1711903 , 0.04768931, 0.2069261 , 0.17107348,
-0.01422447, -0.17489907, -0.16985425, -0.01335581, 0.14662944,
0.16702466, 0.03657907, -0.12065148, -0.16241278, -0.05623027,
0.09626678, 0.15599932, 0.07274192, -0.07315701, -0.14784876,
-0.08636798, 0.05120815, 0.13807901, 0.09727067, -0.03042119,
-0.12684568, -0.10556738, 0.01086237, 0.11433274;
CALL_SUBTEST(res = bessel_j0(x);
verify_component_wise(res, expected););
}
// Test Bessel function j1. Reference results obtained with SciPy.
{
ArrayType x(81);
ArrayType expected(81);
ArrayType res(81);
x << -40., -39., -38., -37., -36., -35., -34., -33., -32., -31., -30.,
-29., -28., -27., -26., -25., -24., -23., -22., -21., -20., -19.,
-18., -17., -16., -15., -14., -13., -12., -11., -10., -9., -8.,
-7., -6., -5., -4., -3., -2., -1., 0., 1., 2., 3.,
4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14.,
15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.,
26., 27., 28., 29., 30., 31., 32., 33., 34., 35., 36.,
37., 38., 39., 40.;
expected << -0.12603832, -0.0640561 , 0.05916189, 0.13058004, 0.08232981,
-0.04399094, -0.13297118, -0.10061965, 0.02658903, 0.13302432,
0.11875106, -0.0069342 , -0.13055149, -0.13658472, -0.01504573,
0.12535025, 0.15403807, 0.03951932, -0.11717779, -0.17112027,
-0.06683312, 0.10570143, 0.18799489, 0.09766849, -0.09039718,
-0.20510404, -0.13337515, 0.07031805, 0.2234471 , 0.1767853 ,
-0.04347275, -0.24531179, -0.23463635, 0.00468282, 0.27668386,
0.32757914, 0.06604333, -0.33905896, -0.57672481, -0.44005059,
0. , 0.44005059, 0.57672481, 0.33905896, -0.06604333,
-0.32757914, -0.27668386, -0.00468282, 0.23463635, 0.24531179,
0.04347275, -0.1767853 , -0.2234471 , -0.07031805, 0.13337515,
0.20510404, 0.09039718, -0.09766849, -0.18799489, -0.10570143,
0.06683312, 0.17112027, 0.11717779, -0.03951932, -0.15403807,
-0.12535025, 0.01504573, 0.13658472, 0.13055149, 0.0069342 ,
-0.11875106, -0.13302432, -0.02658903, 0.10061965, 0.13297118,
0.04399094, -0.08232981, -0.13058004, -0.05916189, 0.0640561 ,
0.12603832;
CALL_SUBTEST(res = bessel_j1(x);
verify_component_wise(res, expected););
}
// Test Bessel function k0e. Reference results obtained with SciPy.
{
ArrayType x(42);
ArrayType expected(42);
ArrayType res(42);
x << 0.25, 0.5, 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.,
13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.,
26., 27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38.,
39., 40.;
expected << 1.97933385, 1.52410939, 1.14446308, 0.84156822,
0.6977616 , 0.60929767, 0.54780756, 0.50186313, 0.4658451 ,
0.43662302, 0.41229555, 0.39163193, 0.3737955 , 0.35819488,
0.34439865, 0.33208364, 0.32100235, 0.31096159, 0.30180802,
0.29341821, 0.28569149, 0.27854488, 0.2719092 , 0.26572635,
0.25994703, 0.25452917, 0.2494366 , 0.24463801, 0.24010616,
0.23581722, 0.23175022, 0.22788667, 0.22421014, 0.22070602,
0.21736123, 0.21416406, 0.21110397, 0.20817141, 0.20535778,
0.20265524, 0.20005668, 0.19755558;
CALL_SUBTEST(res = bessel_k0e(x);
verify_component_wise(res, expected););
}
// Test Bessel function k0. Reference results obtained with SciPy.
{
ArrayType x(42);
ArrayType expected(42);
ArrayType res(42);
x << 0.25, 0.5, 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.,
13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.,
26., 27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38.,
39., 40.;
expected << 1.54150675, 0.92441907, 4.21024438e-01, 1.13893873e-01,
3.47395044e-02, 1.11596761e-02, 3.69109833e-03, 1.24399433e-03,
4.24795742e-04, 1.46470705e-04, 5.08813130e-05, 1.77800623e-05,
6.24302055e-06, 2.20082540e-06, 7.78454386e-07, 2.76137082e-07,
9.81953648e-08, 3.49941166e-08, 1.24946640e-08, 4.46875334e-09,
1.60067129e-09, 5.74123782e-10, 2.06176797e-10, 7.41235161e-11,
2.66754511e-11, 9.60881878e-12, 3.46416156e-12, 1.24987740e-12,
4.51286453e-13, 1.63053459e-13, 5.89495073e-14, 2.13247750e-14,
7.71838266e-15, 2.79505752e-15, 1.01266123e-15, 3.67057597e-16,
1.33103515e-16, 4.82858338e-17, 1.75232770e-17, 6.36161716e-18,
2.31029936e-18, 8.39286110e-19;
CALL_SUBTEST(res = bessel_k0(x);
verify_component_wise(res, expected););
}
// Test Bessel function k0e. Reference results obtained with SciPy.
{
ArrayType x(42);
ArrayType expected(42);
ArrayType res(42);
x << 0.25, 0.5, 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.,
13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.,
26., 27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38.,
39., 40.;
expected << 1.97933385, 1.52410939, 1.14446308, 0.84156822,
0.6977616 , 0.60929767, 0.54780756, 0.50186313,
0.4658451 , 0.43662302, 0.41229555, 0.39163193,
0.3737955 , 0.35819488, 0.34439865, 0.33208364,
0.32100235, 0.31096159, 0.30180802, 0.29341821,
0.28569149, 0.27854488, 0.2719092 , 0.26572635,
0.25994703, 0.25452917, 0.2494366 , 0.24463801,
0.24010616, 0.23581722, 0.23175022, 0.22788667,
0.22421014, 0.22070602, 0.21736123, 0.21416406,
0.21110397, 0.20817141, 0.20535778, 0.20265524,
0.20005668, 0.19755558;
CALL_SUBTEST(res = bessel_k0e(x);
verify_component_wise(res, expected););
}
// Test Bessel function k1. Reference results obtained with SciPy.
{
ArrayType x(42);
ArrayType expected(42);
ArrayType res(42);
x << 0.25, 0.5, 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.,
13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.,
26., 27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38.,
39., 40.;
expected << 3.74702597, 1.65644112, 6.01907230e-01, 1.39865882e-01,
4.01564311e-02, 1.24834989e-02, 4.04461345e-03, 1.34391972e-03,
4.54182487e-04, 1.55369212e-04, 5.36370164e-05, 1.86487735e-05,
6.52086067e-06, 2.29075746e-06, 8.07858841e-07, 2.85834365e-07,
1.01417294e-07, 3.60715712e-08, 1.28570417e-08, 4.59124963e-09,
1.64226697e-09, 5.88305797e-10, 2.11029922e-10, 7.57898116e-11,
2.72493059e-11, 9.80699893e-12, 3.53277807e-12, 1.27369078e-12,
4.59568940e-13, 1.65940011e-13, 5.99574032e-14, 2.16773200e-14,
7.84189960e-15, 2.83839927e-15, 1.02789171e-15, 3.72416929e-16,
1.34991783e-16, 4.89519373e-17, 1.77585196e-17, 6.44478588e-18,
2.33973340e-18, 8.49713195e-19;
CALL_SUBTEST(res = bessel_k1(x);
verify_component_wise(res, expected););
}
// Test Bessel function k1e. Reference results obtained with SciPy.
{
ArrayType x(42);
ArrayType expected(42);
ArrayType res(42);
x << 0.25, 0.5, 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.,
13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.,
26., 27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38.,
39., 40.;
expected << 4.81127659, 2.73100971, 1.63615349, 1.03347685,
0.80656348, 0.68157595, 0.60027386, 0.54217591,
0.49807158, 0.46314909, 0.43462525, 0.41076657,
0.39043094, 0.37283175, 0.35740757, 0.34374563,
0.33153489, 0.32053597, 0.31056123, 0.30146131,
0.29311559, 0.2854255 , 0.27830958, 0.27169987,
0.26553913, 0.25977879, 0.25437733, 0.249299 ,
0.24451285, 0.23999191, 0.2357126 , 0.23165413,
0.22779816, 0.22412841, 0.22063036, 0.21729103,
0.21409878, 0.21104314, 0.20811462, 0.20530466,
0.20260547, 0.20000997;
CALL_SUBTEST(res = bessel_k1e(x);
verify_component_wise(res, expected););
}
// Test Bessel function y0. Reference results obtained with SciPy.
{
ArrayType x(42);
ArrayType expected(42);
ArrayType res(42);
x << 0.25, 0.5, 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.,
13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.,
26., 27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38.,
39., 40.;
expected << -0.93157302, -0.44451873, 0.08825696, 0.51037567, 0.37685001,
-0.01694074, -0.30851763, -0.28819468, -0.02594974, 0.22352149,
0.2499367 , 0.05567117, -0.16884732, -0.22523731, -0.07820786,
0.12719257, 0.2054643 , 0.095811 , -0.0926372 , -0.18755216,
-0.10951969, 0.0626406 , 0.17020176, 0.1198876 , -0.03598179,
-0.15283403, -0.12724943, 0.01204463, 0.13521498, 0.13183647,
0.00948116, -0.11729573, -0.13383266, -0.02874248, 0.09913483,
0.13340405, 0.04579799, -0.08085609, -0.13071488, -0.06066076,
0.06262353, 0.12593642;
CALL_SUBTEST(res = bessel_y0(x);
verify_component_wise(res, expected););
}
// Test Bessel function y1. Reference results obtained with SciPy.
{
ArrayType x(42);
ArrayType expected(42);
ArrayType res(42);
x << 0.25, 0.5, 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.,
13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.,
26., 27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38.,
39., 40.;
expected << -2.70410523, -1.47147239, -0.78121282, -0.10703243,
0.32467442, 0.39792571, 0.14786314, -0.17501034, -0.30266724,
-0.15806046, 0.10431458, 0.24901542, 0.16370554, -0.05709922,
-0.21008141, -0.16664484, 0.02107363, 0.17797517, 0.16720504,
0.00815513, -0.14956011, -0.16551161, -0.03253926, 0.12340586,
0.1616692 , 0.05305978, -0.09882996, -0.15579655, -0.07025124,
0.07552213, 0.14803412, 0.08442557, -0.05337283, -0.13854483,
-0.09578012, 0.03238588, 0.12751273, 0.10445477, -0.01262946,
-0.11514066, -0.11056411, -0.00579351;
CALL_SUBTEST(res = bessel_y1(x);
verify_component_wise(res, expected););
}
}
EIGEN_DECLARE_TEST(bessel_functions)
{
CALL_SUBTEST_1(array_bessel_functions<ArrayXf>());
CALL_SUBTEST_2(array_bessel_functions<ArrayXd>());
}

View File

@@ -0,0 +1,142 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2016 Dmitry Vyukov <dvyukov@google.com>
// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#define EIGEN_USE_THREADS
#include "main.h"
#include <Eigen/CXX11/ThreadPool>
// Visual studio doesn't implement a rand_r() function since its
// implementation of rand() is already thread safe
int rand_reentrant(unsigned int* s) {
#ifdef EIGEN_COMP_MSVC_STRICT
EIGEN_UNUSED_VARIABLE(s);
return rand();
#else
return rand_r(s);
#endif
}
static void test_basic_eventcount()
{
MaxSizeVector<EventCount::Waiter> waiters(1);
waiters.resize(1);
EventCount ec(waiters);
EventCount::Waiter& w = waiters[0];
ec.Notify(false);
ec.Prewait();
ec.Notify(true);
ec.CommitWait(&w);
ec.Prewait();
ec.CancelWait();
}
// Fake bounded counter-based queue.
struct TestQueue {
std::atomic<int> val_;
static const int kQueueSize = 10;
TestQueue() : val_() {}
~TestQueue() { VERIFY_IS_EQUAL(val_.load(), 0); }
bool Push() {
int val = val_.load(std::memory_order_relaxed);
for (;;) {
VERIFY_GE(val, 0);
VERIFY_LE(val, kQueueSize);
if (val == kQueueSize) return false;
if (val_.compare_exchange_weak(val, val + 1, std::memory_order_relaxed))
return true;
}
}
bool Pop() {
int val = val_.load(std::memory_order_relaxed);
for (;;) {
VERIFY_GE(val, 0);
VERIFY_LE(val, kQueueSize);
if (val == 0) return false;
if (val_.compare_exchange_weak(val, val - 1, std::memory_order_relaxed))
return true;
}
}
bool Empty() { return val_.load(std::memory_order_relaxed) == 0; }
};
const int TestQueue::kQueueSize;
// A number of producers send messages to a set of consumers using a set of
// fake queues. Ensure that it does not crash, consumers don't deadlock and
// number of blocked and unblocked threads match.
static void test_stress_eventcount()
{
const int kThreads = std::thread::hardware_concurrency();
static const int kEvents = 1 << 16;
static const int kQueues = 10;
MaxSizeVector<EventCount::Waiter> waiters(kThreads);
waiters.resize(kThreads);
EventCount ec(waiters);
TestQueue queues[kQueues];
std::vector<std::unique_ptr<std::thread>> producers;
for (int i = 0; i < kThreads; i++) {
producers.emplace_back(new std::thread([&ec, &queues]() {
unsigned int rnd = static_cast<unsigned int>(std::hash<std::thread::id>()(std::this_thread::get_id()));
for (int j = 0; j < kEvents; j++) {
unsigned idx = rand_reentrant(&rnd) % kQueues;
if (queues[idx].Push()) {
ec.Notify(false);
continue;
}
EIGEN_THREAD_YIELD();
j--;
}
}));
}
std::vector<std::unique_ptr<std::thread>> consumers;
for (int i = 0; i < kThreads; i++) {
consumers.emplace_back(new std::thread([&ec, &queues, &waiters, i]() {
EventCount::Waiter& w = waiters[i];
unsigned int rnd = static_cast<unsigned int>(std::hash<std::thread::id>()(std::this_thread::get_id()));
for (int j = 0; j < kEvents; j++) {
unsigned idx = rand_reentrant(&rnd) % kQueues;
if (queues[idx].Pop()) continue;
j--;
ec.Prewait();
bool empty = true;
for (int q = 0; q < kQueues; q++) {
if (!queues[q].Empty()) {
empty = false;
break;
}
}
if (!empty) {
ec.CancelWait();
continue;
}
ec.CommitWait(&w);
}
}));
}
for (int i = 0; i < kThreads; i++) {
producers[i]->join();
consumers[i]->join();
}
}
EIGEN_DECLARE_TEST(cxx11_eventcount)
{
CALL_SUBTEST(test_basic_eventcount());
CALL_SUBTEST(test_stress_eventcount());
}

View File

@@ -0,0 +1,77 @@
#include "main.h"
#include <exception> // std::exception
#include <unsupported/Eigen/CXX11/Tensor>
struct Foo
{
static Index object_count;
static Index object_limit;
EIGEN_ALIGN_TO_BOUNDARY(128) int dummy;
Foo(int x=0) : dummy(x)
{
#ifdef EIGEN_EXCEPTIONS
// TODO: Is this the correct way to handle this?
if (Foo::object_count > Foo::object_limit) { std::cout << "\nThrow!\n"; throw Foo::Fail(); }
#endif
std::cout << '+';
++Foo::object_count;
eigen_assert((internal::UIntPtr(this) & (127)) == 0);
}
Foo(const Foo&)
{
std::cout << 'c';
++Foo::object_count;
eigen_assert((internal::UIntPtr(this) & (127)) == 0);
}
~Foo()
{
std::cout << '~';
--Foo::object_count;
}
class Fail : public std::exception {};
};
Index Foo::object_count = 0;
Index Foo::object_limit = 0;
EIGEN_DECLARE_TEST(cxx11_maxsizevector)
{
typedef MaxSizeVector<Foo> VectorX;
Foo::object_count = 0;
for(int r = 0; r < g_repeat; r++) {
Index rows = internal::random<Index>(3,30);
Foo::object_limit = internal::random<Index>(0, rows - 2);
std::cout << "object_limit = " << Foo::object_limit << std::endl;
bool exception_raised = false;
#ifdef EIGEN_EXCEPTIONS
try
{
#endif
std::cout << "\nVectorX m(" << rows << ");\n";
VectorX vect(rows);
for(int i=0; i<rows; ++i)
vect.push_back(Foo());
#ifdef EIGEN_EXCEPTIONS
VERIFY(false); // not reached if exceptions are enabled
}
catch (const Foo::Fail&) { exception_raised = true; }
VERIFY(exception_raised);
#endif
VERIFY_IS_EQUAL(Index(0), Foo::object_count);
{
Foo::object_limit = rows+1;
VectorX vect2(rows, Foo());
VERIFY_IS_EQUAL(Foo::object_count, rows);
}
VERIFY_IS_EQUAL(Index(0), Foo::object_count);
std::cout << '\n';
}
}

View File

@@ -0,0 +1,357 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2013 Christian Seiler <christian@iwakd.de>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
#include <array>
#include <Eigen/CXX11/src/util/CXX11Meta.h>
using Eigen::internal::is_same;
using Eigen::internal::type_list;
using Eigen::internal::numeric_list;
using Eigen::internal::gen_numeric_list;
using Eigen::internal::gen_numeric_list_reversed;
using Eigen::internal::gen_numeric_list_swapped_pair;
using Eigen::internal::gen_numeric_list_repeated;
using Eigen::internal::concat;
using Eigen::internal::mconcat;
using Eigen::internal::take;
using Eigen::internal::skip;
using Eigen::internal::slice;
using Eigen::internal::get;
using Eigen::internal::id_numeric;
using Eigen::internal::id_type;
using Eigen::internal::is_same_gf;
using Eigen::internal::apply_op_from_left;
using Eigen::internal::apply_op_from_right;
using Eigen::internal::contained_in_list;
using Eigen::internal::contained_in_list_gf;
using Eigen::internal::arg_prod;
using Eigen::internal::arg_sum;
using Eigen::internal::sum_op;
using Eigen::internal::product_op;
using Eigen::internal::array_reverse;
using Eigen::internal::array_sum;
using Eigen::internal::array_prod;
using Eigen::internal::array_reduce;
using Eigen::internal::array_zip;
using Eigen::internal::array_zip_and_reduce;
using Eigen::internal::array_apply;
using Eigen::internal::array_apply_and_reduce;
using Eigen::internal::repeat;
using Eigen::internal::instantiate_by_c_array;
struct dummy_a {};
struct dummy_b {};
struct dummy_c {};
struct dummy_d {};
struct dummy_e {};
// dummy operation for testing apply
template<typename A, typename B> struct dummy_op;
template<> struct dummy_op<dummy_a, dummy_b> { typedef dummy_c type; };
template<> struct dummy_op<dummy_b, dummy_a> { typedef dummy_d type; };
template<> struct dummy_op<dummy_b, dummy_c> { typedef dummy_a type; };
template<> struct dummy_op<dummy_c, dummy_b> { typedef dummy_d type; };
template<> struct dummy_op<dummy_c, dummy_a> { typedef dummy_b type; };
template<> struct dummy_op<dummy_a, dummy_c> { typedef dummy_d type; };
template<> struct dummy_op<dummy_a, dummy_a> { typedef dummy_e type; };
template<> struct dummy_op<dummy_b, dummy_b> { typedef dummy_e type; };
template<> struct dummy_op<dummy_c, dummy_c> { typedef dummy_e type; };
template<typename A, typename B> struct dummy_test { constexpr static bool value = false; constexpr static int global_flags = 0; };
template<> struct dummy_test<dummy_a, dummy_a> { constexpr static bool value = true; constexpr static int global_flags = 1; };
template<> struct dummy_test<dummy_b, dummy_b> { constexpr static bool value = true; constexpr static int global_flags = 2; };
template<> struct dummy_test<dummy_c, dummy_c> { constexpr static bool value = true; constexpr static int global_flags = 4; };
struct times2_op { template<typename A> static A run(A v) { return v * 2; } };
struct dummy_inst
{
int c;
dummy_inst() : c(0) {}
explicit dummy_inst(int) : c(1) {}
dummy_inst(int, int) : c(2) {}
dummy_inst(int, int, int) : c(3) {}
dummy_inst(int, int, int, int) : c(4) {}
dummy_inst(int, int, int, int, int) : c(5) {}
};
static void test_gen_numeric_list()
{
VERIFY((is_same<typename gen_numeric_list<int, 0>::type, numeric_list<int>>::value));
VERIFY((is_same<typename gen_numeric_list<int, 1>::type, numeric_list<int, 0>>::value));
VERIFY((is_same<typename gen_numeric_list<int, 2>::type, numeric_list<int, 0, 1>>::value));
VERIFY((is_same<typename gen_numeric_list<int, 5>::type, numeric_list<int, 0, 1, 2, 3, 4>>::value));
VERIFY((is_same<typename gen_numeric_list<int, 10>::type, numeric_list<int, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9>>::value));
VERIFY((is_same<typename gen_numeric_list<int, 0, 42>::type, numeric_list<int>>::value));
VERIFY((is_same<typename gen_numeric_list<int, 1, 42>::type, numeric_list<int, 42>>::value));
VERIFY((is_same<typename gen_numeric_list<int, 2, 42>::type, numeric_list<int, 42, 43>>::value));
VERIFY((is_same<typename gen_numeric_list<int, 5, 42>::type, numeric_list<int, 42, 43, 44, 45, 46>>::value));
VERIFY((is_same<typename gen_numeric_list<int, 10, 42>::type, numeric_list<int, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51>>::value));
VERIFY((is_same<typename gen_numeric_list_reversed<int, 0>::type, numeric_list<int>>::value));
VERIFY((is_same<typename gen_numeric_list_reversed<int, 1>::type, numeric_list<int, 0>>::value));
VERIFY((is_same<typename gen_numeric_list_reversed<int, 2>::type, numeric_list<int, 1, 0>>::value));
VERIFY((is_same<typename gen_numeric_list_reversed<int, 5>::type, numeric_list<int, 4, 3, 2, 1, 0>>::value));
VERIFY((is_same<typename gen_numeric_list_reversed<int, 10>::type, numeric_list<int, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0>>::value));
VERIFY((is_same<typename gen_numeric_list_reversed<int, 0, 42>::type, numeric_list<int>>::value));
VERIFY((is_same<typename gen_numeric_list_reversed<int, 1, 42>::type, numeric_list<int, 42>>::value));
VERIFY((is_same<typename gen_numeric_list_reversed<int, 2, 42>::type, numeric_list<int, 43, 42>>::value));
VERIFY((is_same<typename gen_numeric_list_reversed<int, 5, 42>::type, numeric_list<int, 46, 45, 44, 43, 42>>::value));
VERIFY((is_same<typename gen_numeric_list_reversed<int, 10, 42>::type, numeric_list<int, 51, 50, 49, 48, 47, 46, 45, 44, 43, 42>>::value));
VERIFY((is_same<typename gen_numeric_list_swapped_pair<int, 0, 2, 3>::type, numeric_list<int>>::value));
VERIFY((is_same<typename gen_numeric_list_swapped_pair<int, 1, 2, 3>::type, numeric_list<int, 0>>::value));
VERIFY((is_same<typename gen_numeric_list_swapped_pair<int, 2, 2, 3>::type, numeric_list<int, 0, 1>>::value));
VERIFY((is_same<typename gen_numeric_list_swapped_pair<int, 5, 2, 3>::type, numeric_list<int, 0, 1, 3, 2, 4>>::value));
VERIFY((is_same<typename gen_numeric_list_swapped_pair<int, 10, 2, 3>::type, numeric_list<int, 0, 1, 3, 2, 4, 5, 6, 7, 8, 9>>::value));
VERIFY((is_same<typename gen_numeric_list_swapped_pair<int, 0, 44, 45, 42>::type, numeric_list<int>>::value));
VERIFY((is_same<typename gen_numeric_list_swapped_pair<int, 1, 44, 45, 42>::type, numeric_list<int, 42>>::value));
VERIFY((is_same<typename gen_numeric_list_swapped_pair<int, 2, 44, 45, 42>::type, numeric_list<int, 42, 43>>::value));
VERIFY((is_same<typename gen_numeric_list_swapped_pair<int, 5, 44, 45, 42>::type, numeric_list<int, 42, 43, 45, 44, 46>>::value));
VERIFY((is_same<typename gen_numeric_list_swapped_pair<int, 10, 44, 45, 42>::type, numeric_list<int, 42, 43, 45, 44, 46, 47, 48, 49, 50, 51>>::value));
VERIFY((is_same<typename gen_numeric_list_repeated<int, 0, 0>::type, numeric_list<int>>::value));
VERIFY((is_same<typename gen_numeric_list_repeated<int, 1, 0>::type, numeric_list<int, 0>>::value));
VERIFY((is_same<typename gen_numeric_list_repeated<int, 2, 0>::type, numeric_list<int, 0, 0>>::value));
VERIFY((is_same<typename gen_numeric_list_repeated<int, 5, 0>::type, numeric_list<int, 0, 0, 0, 0, 0>>::value));
VERIFY((is_same<typename gen_numeric_list_repeated<int, 10, 0>::type, numeric_list<int, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0>>::value));
}
static void test_concat()
{
VERIFY((is_same<typename concat<type_list<dummy_a, dummy_a>, type_list<>>::type, type_list<dummy_a, dummy_a>>::value));
VERIFY((is_same<typename concat<type_list<>, type_list<dummy_a, dummy_a>>::type, type_list<dummy_a, dummy_a>>::value));
VERIFY((is_same<typename concat<type_list<dummy_a, dummy_a>, type_list<dummy_a, dummy_a>>::type, type_list<dummy_a, dummy_a, dummy_a, dummy_a>>::value));
VERIFY((is_same<typename concat<type_list<dummy_a, dummy_a>, type_list<dummy_b, dummy_c>>::type, type_list<dummy_a, dummy_a, dummy_b, dummy_c>>::value));
VERIFY((is_same<typename concat<type_list<dummy_a>, type_list<dummy_b, dummy_c>>::type, type_list<dummy_a, dummy_b, dummy_c>>::value));
VERIFY((is_same<typename concat<numeric_list<int, 0, 0>, numeric_list<int>>::type, numeric_list<int, 0, 0>>::value));
VERIFY((is_same<typename concat<numeric_list<int>, numeric_list<int, 0, 0>>::type, numeric_list<int, 0, 0>>::value));
VERIFY((is_same<typename concat<numeric_list<int, 0, 0>, numeric_list<int, 0, 0>>::type, numeric_list<int, 0, 0, 0, 0>>::value));
VERIFY((is_same<typename concat<numeric_list<int, 0, 0>, numeric_list<int, 1, 2>>::type, numeric_list<int, 0, 0, 1, 2>>::value));
VERIFY((is_same<typename concat<numeric_list<int, 0>, numeric_list<int, 1, 2>>::type, numeric_list<int, 0, 1, 2>>::value));
VERIFY((is_same<typename mconcat<type_list<dummy_a>>::type, type_list<dummy_a>>::value));
VERIFY((is_same<typename mconcat<type_list<dummy_a>, type_list<dummy_b>>::type, type_list<dummy_a, dummy_b>>::value));
VERIFY((is_same<typename mconcat<type_list<dummy_a>, type_list<dummy_b>, type_list<dummy_c>>::type, type_list<dummy_a, dummy_b, dummy_c>>::value));
VERIFY((is_same<typename mconcat<type_list<dummy_a>, type_list<dummy_b, dummy_c>>::type, type_list<dummy_a, dummy_b, dummy_c>>::value));
VERIFY((is_same<typename mconcat<type_list<dummy_a, dummy_b>, type_list<dummy_c>>::type, type_list<dummy_a, dummy_b, dummy_c>>::value));
VERIFY((is_same<typename mconcat<numeric_list<int, 0>>::type, numeric_list<int, 0>>::value));
VERIFY((is_same<typename mconcat<numeric_list<int, 0>, numeric_list<int, 1>>::type, numeric_list<int, 0, 1>>::value));
VERIFY((is_same<typename mconcat<numeric_list<int, 0>, numeric_list<int, 1>, numeric_list<int, 2>>::type, numeric_list<int, 0, 1, 2>>::value));
VERIFY((is_same<typename mconcat<numeric_list<int, 0>, numeric_list<int, 1, 2>>::type, numeric_list<int, 0, 1, 2>>::value));
VERIFY((is_same<typename mconcat<numeric_list<int, 0, 1>, numeric_list<int, 2>>::type, numeric_list<int, 0, 1, 2>>::value));
}
static void test_slice()
{
typedef type_list<dummy_a, dummy_a, dummy_b, dummy_b, dummy_c, dummy_c> tl;
typedef numeric_list<int, 0, 1, 2, 3, 4, 5> il;
VERIFY((is_same<typename take<0, tl>::type, type_list<>>::value));
VERIFY((is_same<typename take<1, tl>::type, type_list<dummy_a>>::value));
VERIFY((is_same<typename take<2, tl>::type, type_list<dummy_a, dummy_a>>::value));
VERIFY((is_same<typename take<3, tl>::type, type_list<dummy_a, dummy_a, dummy_b>>::value));
VERIFY((is_same<typename take<4, tl>::type, type_list<dummy_a, dummy_a, dummy_b, dummy_b>>::value));
VERIFY((is_same<typename take<5, tl>::type, type_list<dummy_a, dummy_a, dummy_b, dummy_b, dummy_c>>::value));
VERIFY((is_same<typename take<6, tl>::type, type_list<dummy_a, dummy_a, dummy_b, dummy_b, dummy_c, dummy_c>>::value));
VERIFY((is_same<typename take<0, il>::type, numeric_list<int>>::value));
VERIFY((is_same<typename take<1, il>::type, numeric_list<int, 0>>::value));
VERIFY((is_same<typename take<2, il>::type, numeric_list<int, 0, 1>>::value));
VERIFY((is_same<typename take<3, il>::type, numeric_list<int, 0, 1, 2>>::value));
VERIFY((is_same<typename take<4, il>::type, numeric_list<int, 0, 1, 2, 3>>::value));
VERIFY((is_same<typename take<5, il>::type, numeric_list<int, 0, 1, 2, 3, 4>>::value));
VERIFY((is_same<typename take<6, il>::type, numeric_list<int, 0, 1, 2, 3, 4, 5>>::value));
VERIFY((is_same<typename skip<0, tl>::type, type_list<dummy_a, dummy_a, dummy_b, dummy_b, dummy_c, dummy_c>>::value));
VERIFY((is_same<typename skip<1, tl>::type, type_list<dummy_a, dummy_b, dummy_b, dummy_c, dummy_c>>::value));
VERIFY((is_same<typename skip<2, tl>::type, type_list<dummy_b, dummy_b, dummy_c, dummy_c>>::value));
VERIFY((is_same<typename skip<3, tl>::type, type_list<dummy_b, dummy_c, dummy_c>>::value));
VERIFY((is_same<typename skip<4, tl>::type, type_list<dummy_c, dummy_c>>::value));
VERIFY((is_same<typename skip<5, tl>::type, type_list<dummy_c>>::value));
VERIFY((is_same<typename skip<6, tl>::type, type_list<>>::value));
VERIFY((is_same<typename skip<0, il>::type, numeric_list<int, 0, 1, 2, 3, 4, 5>>::value));
VERIFY((is_same<typename skip<1, il>::type, numeric_list<int, 1, 2, 3, 4, 5>>::value));
VERIFY((is_same<typename skip<2, il>::type, numeric_list<int, 2, 3, 4, 5>>::value));
VERIFY((is_same<typename skip<3, il>::type, numeric_list<int, 3, 4, 5>>::value));
VERIFY((is_same<typename skip<4, il>::type, numeric_list<int, 4, 5>>::value));
VERIFY((is_same<typename skip<5, il>::type, numeric_list<int, 5>>::value));
VERIFY((is_same<typename skip<6, il>::type, numeric_list<int>>::value));
VERIFY((is_same<typename slice<0, 3, tl>::type, typename take<3, tl>::type>::value));
VERIFY((is_same<typename slice<0, 3, il>::type, typename take<3, il>::type>::value));
VERIFY((is_same<typename slice<1, 3, tl>::type, type_list<dummy_a, dummy_b, dummy_b>>::value));
VERIFY((is_same<typename slice<1, 3, il>::type, numeric_list<int, 1, 2, 3>>::value));
}
static void test_get()
{
typedef type_list<dummy_a, dummy_a, dummy_b, dummy_b, dummy_c, dummy_c> tl;
typedef numeric_list<int, 4, 8, 15, 16, 23, 42> il;
VERIFY((is_same<typename get<0, tl>::type, dummy_a>::value));
VERIFY((is_same<typename get<1, tl>::type, dummy_a>::value));
VERIFY((is_same<typename get<2, tl>::type, dummy_b>::value));
VERIFY((is_same<typename get<3, tl>::type, dummy_b>::value));
VERIFY((is_same<typename get<4, tl>::type, dummy_c>::value));
VERIFY((is_same<typename get<5, tl>::type, dummy_c>::value));
VERIFY_IS_EQUAL(((int)get<0, il>::value), 4);
VERIFY_IS_EQUAL(((int)get<1, il>::value), 8);
VERIFY_IS_EQUAL(((int)get<2, il>::value), 15);
VERIFY_IS_EQUAL(((int)get<3, il>::value), 16);
VERIFY_IS_EQUAL(((int)get<4, il>::value), 23);
VERIFY_IS_EQUAL(((int)get<5, il>::value), 42);
}
static void test_id_helper(dummy_a a, dummy_a b, dummy_a c)
{
(void)a;
(void)b;
(void)c;
}
template<int... ii>
static void test_id_numeric()
{
test_id_helper(typename id_numeric<int, ii, dummy_a>::type()...);
}
template<typename... tt>
static void test_id_type()
{
test_id_helper(typename id_type<tt, dummy_a>::type()...);
}
static void test_id()
{
// don't call VERIFY here, just assume it works if it compiles
// (otherwise it will complain that it can't find the function)
test_id_numeric<1, 4, 6>();
test_id_type<dummy_a, dummy_b, dummy_c>();
}
static void test_is_same_gf()
{
VERIFY((!is_same_gf<dummy_a, dummy_b>::value));
VERIFY((!!is_same_gf<dummy_a, dummy_a>::value));
VERIFY_IS_EQUAL((!!is_same_gf<dummy_a, dummy_b>::global_flags), false);
VERIFY_IS_EQUAL((!!is_same_gf<dummy_a, dummy_a>::global_flags), false);
}
static void test_apply_op()
{
typedef type_list<dummy_a, dummy_b, dummy_c> tl;
VERIFY((!!is_same<typename apply_op_from_left<dummy_op, dummy_a, tl>::type, type_list<dummy_e, dummy_c, dummy_d>>::value));
VERIFY((!!is_same<typename apply_op_from_right<dummy_op, dummy_a, tl>::type, type_list<dummy_e, dummy_d, dummy_b>>::value));
}
static void test_contained_in_list()
{
typedef type_list<dummy_a, dummy_b, dummy_c> tl;
VERIFY((!!contained_in_list<is_same, dummy_a, tl>::value));
VERIFY((!!contained_in_list<is_same, dummy_b, tl>::value));
VERIFY((!!contained_in_list<is_same, dummy_c, tl>::value));
VERIFY((!contained_in_list<is_same, dummy_d, tl>::value));
VERIFY((!contained_in_list<is_same, dummy_e, tl>::value));
VERIFY((!!contained_in_list_gf<dummy_test, dummy_a, tl>::value));
VERIFY((!!contained_in_list_gf<dummy_test, dummy_b, tl>::value));
VERIFY((!!contained_in_list_gf<dummy_test, dummy_c, tl>::value));
VERIFY((!contained_in_list_gf<dummy_test, dummy_d, tl>::value));
VERIFY((!contained_in_list_gf<dummy_test, dummy_e, tl>::value));
VERIFY_IS_EQUAL(((int)contained_in_list_gf<dummy_test, dummy_a, tl>::global_flags), 1);
VERIFY_IS_EQUAL(((int)contained_in_list_gf<dummy_test, dummy_b, tl>::global_flags), 2);
VERIFY_IS_EQUAL(((int)contained_in_list_gf<dummy_test, dummy_c, tl>::global_flags), 4);
VERIFY_IS_EQUAL(((int)contained_in_list_gf<dummy_test, dummy_d, tl>::global_flags), 0);
VERIFY_IS_EQUAL(((int)contained_in_list_gf<dummy_test, dummy_e, tl>::global_flags), 0);
}
static void test_arg_reductions()
{
VERIFY_IS_EQUAL(arg_sum(1,2,3,4), 10);
VERIFY_IS_EQUAL(arg_prod(1,2,3,4), 24);
VERIFY_IS_APPROX(arg_sum(0.5, 2, 5), 7.5);
VERIFY_IS_APPROX(arg_prod(0.5, 2, 5), 5.0);
}
static void test_array_reverse_and_reduce()
{
array<int, 6> a{{4, 8, 15, 16, 23, 42}};
array<int, 6> b{{42, 23, 16, 15, 8, 4}};
// there is no operator<< for std::array, so VERIFY_IS_EQUAL will
// not compile
VERIFY((array_reverse(a) == b));
VERIFY((array_reverse(b) == a));
VERIFY_IS_EQUAL((array_sum(a)), 108);
VERIFY_IS_EQUAL((array_sum(b)), 108);
VERIFY_IS_EQUAL((array_prod(a)), 7418880);
VERIFY_IS_EQUAL((array_prod(b)), 7418880);
}
static void test_array_zip_and_apply()
{
array<int, 6> a{{4, 8, 15, 16, 23, 42}};
array<int, 6> b{{0, 1, 2, 3, 4, 5}};
array<int, 6> c{{4, 9, 17, 19, 27, 47}};
array<int, 6> d{{0, 8, 30, 48, 92, 210}};
array<int, 6> e{{0, 2, 4, 6, 8, 10}};
VERIFY((array_zip<sum_op>(a, b) == c));
VERIFY((array_zip<product_op>(a, b) == d));
VERIFY((array_apply<times2_op>(b) == e));
VERIFY_IS_EQUAL((array_apply_and_reduce<sum_op, times2_op>(a)), 216);
VERIFY_IS_EQUAL((array_apply_and_reduce<sum_op, times2_op>(b)), 30);
VERIFY_IS_EQUAL((array_zip_and_reduce<product_op, sum_op>(a, b)), 14755932);
VERIFY_IS_EQUAL((array_zip_and_reduce<sum_op, product_op>(a, b)), 388);
}
static void test_array_misc()
{
array<int, 3> a3{{1, 1, 1}};
array<int, 6> a6{{2, 2, 2, 2, 2, 2}};
VERIFY((repeat<3, int>(1) == a3));
VERIFY((repeat<6, int>(2) == a6));
int data[5] = { 0, 1, 2, 3, 4 };
VERIFY_IS_EQUAL((instantiate_by_c_array<dummy_inst, int, 0>(data).c), 0);
VERIFY_IS_EQUAL((instantiate_by_c_array<dummy_inst, int, 1>(data).c), 1);
VERIFY_IS_EQUAL((instantiate_by_c_array<dummy_inst, int, 2>(data).c), 2);
VERIFY_IS_EQUAL((instantiate_by_c_array<dummy_inst, int, 3>(data).c), 3);
VERIFY_IS_EQUAL((instantiate_by_c_array<dummy_inst, int, 4>(data).c), 4);
VERIFY_IS_EQUAL((instantiate_by_c_array<dummy_inst, int, 5>(data).c), 5);
}
EIGEN_DECLARE_TEST(cxx11_meta)
{
CALL_SUBTEST(test_gen_numeric_list());
CALL_SUBTEST(test_concat());
CALL_SUBTEST(test_slice());
CALL_SUBTEST(test_get());
CALL_SUBTEST(test_id());
CALL_SUBTEST(test_is_same_gf());
CALL_SUBTEST(test_apply_op());
CALL_SUBTEST(test_contained_in_list());
CALL_SUBTEST(test_arg_reductions());
CALL_SUBTEST(test_array_reverse_and_reduce());
CALL_SUBTEST(test_array_zip_and_apply());
CALL_SUBTEST(test_array_misc());
}

View File

@@ -0,0 +1,180 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2016 Dmitry Vyukov <dvyukov@google.com>
// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#define EIGEN_USE_THREADS
#include "main.h"
#include "Eigen/CXX11/ThreadPool"
#include "Eigen/CXX11/Tensor"
static void test_create_destroy_empty_pool()
{
// Just create and destroy the pool. This will wind up and tear down worker
// threads. Ensure there are no issues in that logic.
for (int i = 0; i < 16; ++i) {
ThreadPool tp(i);
}
}
static void test_parallelism(bool allow_spinning)
{
// Test we never-ever fail to match available tasks with idle threads.
const int kThreads = 16; // code below expects that this is a multiple of 4
ThreadPool tp(kThreads, allow_spinning);
VERIFY_IS_EQUAL(tp.NumThreads(), kThreads);
VERIFY_IS_EQUAL(tp.CurrentThreadId(), -1);
for (int iter = 0; iter < 100; ++iter) {
std::atomic<int> running(0);
std::atomic<int> done(0);
std::atomic<int> phase(0);
// Schedule kThreads tasks and ensure that they all are running.
for (int i = 0; i < kThreads; ++i) {
tp.Schedule([&]() {
const int thread_id = tp.CurrentThreadId();
VERIFY_GE(thread_id, 0);
VERIFY_LE(thread_id, kThreads - 1);
running++;
while (phase < 1) {
}
done++;
});
}
while (running != kThreads) {
}
running = 0;
phase = 1;
// Now, while the previous tasks exit, schedule another kThreads tasks and
// ensure that they are running.
for (int i = 0; i < kThreads; ++i) {
tp.Schedule([&, i]() {
running++;
while (phase < 2) {
}
// When all tasks are running, half of tasks exit, quarter of tasks
// continue running and quarter of tasks schedule another 2 tasks each.
// Concurrently main thread schedules another quarter of tasks.
// This gives us another kThreads tasks and we ensure that they all
// are running.
if (i < kThreads / 2) {
} else if (i < 3 * kThreads / 4) {
running++;
while (phase < 3) {
}
done++;
} else {
for (int j = 0; j < 2; ++j) {
tp.Schedule([&]() {
running++;
while (phase < 3) {
}
done++;
});
}
}
done++;
});
}
while (running != kThreads) {
}
running = 0;
phase = 2;
for (int i = 0; i < kThreads / 4; ++i) {
tp.Schedule([&]() {
running++;
while (phase < 3) {
}
done++;
});
}
while (running != kThreads) {
}
phase = 3;
while (done != 3 * kThreads) {
}
}
}
static void test_cancel()
{
ThreadPool tp(2);
// Schedule a large number of closure that each sleeps for one second. This
// will keep the thread pool busy for much longer than the default test timeout.
for (int i = 0; i < 1000; ++i) {
tp.Schedule([]() {
std::this_thread::sleep_for(std::chrono::milliseconds(2000));
});
}
// Cancel the processing of all the closures that are still pending.
tp.Cancel();
}
static void test_pool_partitions() {
const int kThreads = 2;
ThreadPool tp(kThreads);
// Assign each thread to its own partition, so that stealing other work only
// occurs globally when a thread is idle.
std::vector<std::pair<unsigned, unsigned>> steal_partitions(kThreads);
for (int i = 0; i < kThreads; ++i) {
steal_partitions[i] = std::make_pair(i, i + 1);
}
tp.SetStealPartitions(steal_partitions);
std::atomic<int> running(0);
std::atomic<int> done(0);
std::atomic<int> phase(0);
// Schedule kThreads tasks and ensure that they all are running.
for (int i = 0; i < kThreads; ++i) {
tp.Schedule([&]() {
const int thread_id = tp.CurrentThreadId();
VERIFY_GE(thread_id, 0);
VERIFY_LE(thread_id, kThreads - 1);
++running;
while (phase < 1) {
}
++done;
});
}
while (running != kThreads) {
}
// Schedule each closure to only run on thread 'i' and verify that it does.
for (int i = 0; i < kThreads; ++i) {
tp.ScheduleWithHint(
[&, i]() {
++running;
const int thread_id = tp.CurrentThreadId();
VERIFY_IS_EQUAL(thread_id, i);
while (phase < 2) {
}
++done;
},
i, i + 1);
}
running = 0;
phase = 1;
while (running != kThreads) {
}
running = 0;
phase = 2;
}
EIGEN_DECLARE_TEST(cxx11_non_blocking_thread_pool)
{
CALL_SUBTEST(test_create_destroy_empty_pool());
CALL_SUBTEST(test_parallelism(true));
CALL_SUBTEST(test_parallelism(false));
CALL_SUBTEST(test_cancel());
CALL_SUBTEST(test_pool_partitions());
}

View File

@@ -0,0 +1,235 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2016 Dmitry Vyukov <dvyukov@google.com>
// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#define EIGEN_USE_THREADS
#include <cstdlib>
#include "main.h"
#include <Eigen/CXX11/ThreadPool>
// Visual studio doesn't implement a rand_r() function since its
// implementation of rand() is already thread safe
int rand_reentrant(unsigned int* s) {
#ifdef EIGEN_COMP_MSVC_STRICT
EIGEN_UNUSED_VARIABLE(s);
return rand();
#else
return rand_r(s);
#endif
}
void test_basic_runqueue()
{
RunQueue<int, 4> q;
// Check empty state.
VERIFY(q.Empty());
VERIFY_IS_EQUAL(0u, q.Size());
VERIFY_IS_EQUAL(0, q.PopFront());
std::vector<int> stolen;
VERIFY_IS_EQUAL(0u, q.PopBackHalf(&stolen));
VERIFY_IS_EQUAL(0u, stolen.size());
// Push one front, pop one front.
VERIFY_IS_EQUAL(0, q.PushFront(1));
VERIFY_IS_EQUAL(1u, q.Size());
VERIFY_IS_EQUAL(1, q.PopFront());
VERIFY_IS_EQUAL(0u, q.Size());
// Push front to overflow.
VERIFY_IS_EQUAL(0, q.PushFront(2));
VERIFY_IS_EQUAL(1u, q.Size());
VERIFY_IS_EQUAL(0, q.PushFront(3));
VERIFY_IS_EQUAL(2u, q.Size());
VERIFY_IS_EQUAL(0, q.PushFront(4));
VERIFY_IS_EQUAL(3u, q.Size());
VERIFY_IS_EQUAL(0, q.PushFront(5));
VERIFY_IS_EQUAL(4u, q.Size());
VERIFY_IS_EQUAL(6, q.PushFront(6));
VERIFY_IS_EQUAL(4u, q.Size());
VERIFY_IS_EQUAL(5, q.PopFront());
VERIFY_IS_EQUAL(3u, q.Size());
VERIFY_IS_EQUAL(4, q.PopFront());
VERIFY_IS_EQUAL(2u, q.Size());
VERIFY_IS_EQUAL(3, q.PopFront());
VERIFY_IS_EQUAL(1u, q.Size());
VERIFY_IS_EQUAL(2, q.PopFront());
VERIFY_IS_EQUAL(0u, q.Size());
VERIFY_IS_EQUAL(0, q.PopFront());
// Push one back, pop one back.
VERIFY_IS_EQUAL(0, q.PushBack(7));
VERIFY_IS_EQUAL(1u, q.Size());
VERIFY_IS_EQUAL(1u, q.PopBackHalf(&stolen));
VERIFY_IS_EQUAL(1u, stolen.size());
VERIFY_IS_EQUAL(7, stolen[0]);
VERIFY_IS_EQUAL(0u, q.Size());
stolen.clear();
// Push back to overflow.
VERIFY_IS_EQUAL(0, q.PushBack(8));
VERIFY_IS_EQUAL(1u, q.Size());
VERIFY_IS_EQUAL(0, q.PushBack(9));
VERIFY_IS_EQUAL(2u, q.Size());
VERIFY_IS_EQUAL(0, q.PushBack(10));
VERIFY_IS_EQUAL(3u, q.Size());
VERIFY_IS_EQUAL(0, q.PushBack(11));
VERIFY_IS_EQUAL(4u, q.Size());
VERIFY_IS_EQUAL(12, q.PushBack(12));
VERIFY_IS_EQUAL(4u, q.Size());
// Pop back in halves.
VERIFY_IS_EQUAL(2u, q.PopBackHalf(&stolen));
VERIFY_IS_EQUAL(2u, stolen.size());
VERIFY_IS_EQUAL(10, stolen[0]);
VERIFY_IS_EQUAL(11, stolen[1]);
VERIFY_IS_EQUAL(2u, q.Size());
stolen.clear();
VERIFY_IS_EQUAL(1u, q.PopBackHalf(&stolen));
VERIFY_IS_EQUAL(1u, stolen.size());
VERIFY_IS_EQUAL(9, stolen[0]);
VERIFY_IS_EQUAL(1u, q.Size());
stolen.clear();
VERIFY_IS_EQUAL(1u, q.PopBackHalf(&stolen));
VERIFY_IS_EQUAL(1u, stolen.size());
VERIFY_IS_EQUAL(8, stolen[0]);
stolen.clear();
VERIFY_IS_EQUAL(0u, q.PopBackHalf(&stolen));
VERIFY_IS_EQUAL(0u, stolen.size());
// Empty again.
VERIFY(q.Empty());
VERIFY_IS_EQUAL(0u, q.Size());
VERIFY_IS_EQUAL(0, q.PushFront(1));
VERIFY_IS_EQUAL(0, q.PushFront(2));
VERIFY_IS_EQUAL(0, q.PushFront(3));
VERIFY_IS_EQUAL(1, q.PopBack());
VERIFY_IS_EQUAL(2, q.PopBack());
VERIFY_IS_EQUAL(3, q.PopBack());
VERIFY(q.Empty());
VERIFY_IS_EQUAL(0u, q.Size());
}
// Empty tests that the queue is not claimed to be empty when is is in fact not.
// Emptiness property is crucial part of thread pool blocking scheme,
// so we go to great effort to ensure this property. We create a queue with
// 1 element and then push 1 element (either front or back at random) and pop
// 1 element (either front or back at random). So queue always contains at least
// 1 element, but otherwise changes chaotically. Another thread constantly tests
// that the queue is not claimed to be empty.
void test_empty_runqueue()
{
RunQueue<int, 4> q;
q.PushFront(1);
std::atomic<bool> done(false);
std::thread mutator([&q, &done]() {
unsigned rnd = 0;
std::vector<int> stolen;
for (int i = 0; i < 1 << 18; i++) {
if (rand_reentrant(&rnd) % 2)
VERIFY_IS_EQUAL(0, q.PushFront(1));
else
VERIFY_IS_EQUAL(0, q.PushBack(1));
if (rand_reentrant(&rnd) % 2)
VERIFY_IS_EQUAL(1, q.PopFront());
else {
for (;;) {
if (q.PopBackHalf(&stolen) == 1) {
stolen.clear();
break;
}
VERIFY_IS_EQUAL(0u, stolen.size());
}
}
}
done = true;
});
while (!done) {
VERIFY(!q.Empty());
int size = q.Size();
VERIFY_GE(size, 1);
VERIFY_LE(size, 2);
}
VERIFY_IS_EQUAL(1, q.PopFront());
mutator.join();
}
// Stress is a chaotic random test.
// One thread (owner) calls PushFront/PopFront, other threads call PushBack/
// PopBack. Ensure that we don't crash, deadlock, and all sanity checks pass.
void test_stress_runqueue()
{
static const int kEvents = 1 << 18;
RunQueue<int, 8> q;
std::atomic<int> total(0);
std::vector<std::unique_ptr<std::thread>> threads;
threads.emplace_back(new std::thread([&q, &total]() {
int sum = 0;
int pushed = 1;
int popped = 1;
while (pushed < kEvents || popped < kEvents) {
if (pushed < kEvents) {
if (q.PushFront(pushed) == 0) {
sum += pushed;
pushed++;
}
}
if (popped < kEvents) {
int v = q.PopFront();
if (v != 0) {
sum -= v;
popped++;
}
}
}
total += sum;
}));
for (int i = 0; i < 2; i++) {
threads.emplace_back(new std::thread([&q, &total]() {
int sum = 0;
for (int j = 1; j < kEvents; j++) {
if (q.PushBack(j) == 0) {
sum += j;
continue;
}
EIGEN_THREAD_YIELD();
j--;
}
total += sum;
}));
threads.emplace_back(new std::thread([&q, &total]() {
int sum = 0;
std::vector<int> stolen;
for (int j = 1; j < kEvents;) {
if (q.PopBackHalf(&stolen) == 0) {
EIGEN_THREAD_YIELD();
continue;
}
while (stolen.size() && j < kEvents) {
int v = stolen.back();
stolen.pop_back();
VERIFY_IS_NOT_EQUAL(v, 0);
sum += v;
j++;
}
}
while (stolen.size()) {
int v = stolen.back();
stolen.pop_back();
VERIFY_IS_NOT_EQUAL(v, 0);
while ((v = q.PushBack(v)) != 0) EIGEN_THREAD_YIELD();
}
total -= sum;
}));
}
for (size_t i = 0; i < threads.size(); i++) threads[i]->join();
VERIFY(q.Empty());
VERIFY(total.load() == 0);
}
EIGEN_DECLARE_TEST(cxx11_runqueue)
{
CALL_SUBTEST_1(test_basic_runqueue());
CALL_SUBTEST_2(test_empty_runqueue());
CALL_SUBTEST_3(test_stress_runqueue());
}

View File

@@ -0,0 +1,294 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2015 Eugene Brevdo <ebrevdo@google.com>
// Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
#include <Eigen/CXX11/Tensor>
using Eigen::Tensor;
using Eigen::array;
using Eigen::Tuple;
template <int DataLayout>
static void test_simple_index_tuples()
{
Tensor<float, 4, DataLayout> tensor(2,3,5,7);
tensor.setRandom();
tensor = (tensor + tensor.constant(0.5)).log();
Tensor<Tuple<DenseIndex, float>, 4, DataLayout> index_tuples(2,3,5,7);
index_tuples = tensor.index_tuples();
for (DenseIndex n = 0; n < 2*3*5*7; ++n) {
const Tuple<DenseIndex, float>& v = index_tuples.coeff(n);
VERIFY_IS_EQUAL(v.first, n);
VERIFY_IS_EQUAL(v.second, tensor.coeff(n));
}
}
template <int DataLayout>
static void test_index_tuples_dim()
{
Tensor<float, 4, DataLayout> tensor(2,3,5,7);
tensor.setRandom();
tensor = (tensor + tensor.constant(0.5)).log();
Tensor<Tuple<DenseIndex, float>, 4, DataLayout> index_tuples(2,3,5,7);
index_tuples = tensor.index_tuples();
for (Eigen::DenseIndex n = 0; n < tensor.size(); ++n) {
const Tuple<DenseIndex, float>& v = index_tuples(n); //(i, j, k, l);
VERIFY_IS_EQUAL(v.first, n);
VERIFY_IS_EQUAL(v.second, tensor(n));
}
}
template <int DataLayout>
static void test_argmax_tuple_reducer()
{
Tensor<float, 4, DataLayout> tensor(2,3,5,7);
tensor.setRandom();
tensor = (tensor + tensor.constant(0.5)).log();
Tensor<Tuple<DenseIndex, float>, 4, DataLayout> index_tuples(2,3,5,7);
index_tuples = tensor.index_tuples();
Tensor<Tuple<DenseIndex, float>, 0, DataLayout> reduced;
DimensionList<DenseIndex, 4> dims;
reduced = index_tuples.reduce(
dims, internal::ArgMaxTupleReducer<Tuple<DenseIndex, float> >());
Tensor<float, 0, DataLayout> maxi = tensor.maximum();
VERIFY_IS_EQUAL(maxi(), reduced(0).second);
array<DenseIndex, 3> reduce_dims;
for (int d = 0; d < 3; ++d) reduce_dims[d] = d;
Tensor<Tuple<DenseIndex, float>, 1, DataLayout> reduced_by_dims(7);
reduced_by_dims = index_tuples.reduce(
reduce_dims, internal::ArgMaxTupleReducer<Tuple<DenseIndex, float> >());
Tensor<float, 1, DataLayout> max_by_dims = tensor.maximum(reduce_dims);
for (int l = 0; l < 7; ++l) {
VERIFY_IS_EQUAL(max_by_dims(l), reduced_by_dims(l).second);
}
}
template <int DataLayout>
static void test_argmin_tuple_reducer()
{
Tensor<float, 4, DataLayout> tensor(2,3,5,7);
tensor.setRandom();
tensor = (tensor + tensor.constant(0.5)).log();
Tensor<Tuple<DenseIndex, float>, 4, DataLayout> index_tuples(2,3,5,7);
index_tuples = tensor.index_tuples();
Tensor<Tuple<DenseIndex, float>, 0, DataLayout> reduced;
DimensionList<DenseIndex, 4> dims;
reduced = index_tuples.reduce(
dims, internal::ArgMinTupleReducer<Tuple<DenseIndex, float> >());
Tensor<float, 0, DataLayout> mini = tensor.minimum();
VERIFY_IS_EQUAL(mini(), reduced(0).second);
array<DenseIndex, 3> reduce_dims;
for (int d = 0; d < 3; ++d) reduce_dims[d] = d;
Tensor<Tuple<DenseIndex, float>, 1, DataLayout> reduced_by_dims(7);
reduced_by_dims = index_tuples.reduce(
reduce_dims, internal::ArgMinTupleReducer<Tuple<DenseIndex, float> >());
Tensor<float, 1, DataLayout> min_by_dims = tensor.minimum(reduce_dims);
for (int l = 0; l < 7; ++l) {
VERIFY_IS_EQUAL(min_by_dims(l), reduced_by_dims(l).second);
}
}
template <int DataLayout>
static void test_simple_argmax()
{
Tensor<float, 4, DataLayout> tensor(2,3,5,7);
tensor.setRandom();
tensor = (tensor + tensor.constant(0.5)).log();
tensor(0,0,0,0) = 10.0;
Tensor<DenseIndex, 0, DataLayout> tensor_argmax;
tensor_argmax = tensor.argmax();
VERIFY_IS_EQUAL(tensor_argmax(0), 0);
tensor(1,2,4,6) = 20.0;
tensor_argmax = tensor.argmax();
VERIFY_IS_EQUAL(tensor_argmax(0), 2*3*5*7 - 1);
}
template <int DataLayout>
static void test_simple_argmin()
{
Tensor<float, 4, DataLayout> tensor(2,3,5,7);
tensor.setRandom();
tensor = (tensor + tensor.constant(0.5)).log();
tensor(0,0,0,0) = -10.0;
Tensor<DenseIndex, 0, DataLayout> tensor_argmin;
tensor_argmin = tensor.argmin();
VERIFY_IS_EQUAL(tensor_argmin(0), 0);
tensor(1,2,4,6) = -20.0;
tensor_argmin = tensor.argmin();
VERIFY_IS_EQUAL(tensor_argmin(0), 2*3*5*7 - 1);
}
template <int DataLayout>
static void test_argmax_dim()
{
Tensor<float, 4, DataLayout> tensor(2,3,5,7);
std::vector<int> dims {2, 3, 5, 7};
for (int dim = 0; dim < 4; ++dim) {
tensor.setRandom();
tensor = (tensor + tensor.constant(0.5)).log();
Tensor<DenseIndex, 3, DataLayout> tensor_argmax;
array<DenseIndex, 4> ix;
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 5; ++k) {
for (int l = 0; l < 7; ++l) {
ix[0] = i; ix[1] = j; ix[2] = k; ix[3] = l;
if (ix[dim] != 0) continue;
// suppose dim == 1, then for all i, k, l, set tensor(i, 0, k, l) = 10.0
tensor(ix) = 10.0;
}
}
}
}
tensor_argmax = tensor.argmax(dim);
VERIFY_IS_EQUAL(tensor_argmax.size(),
ptrdiff_t(2*3*5*7 / tensor.dimension(dim)));
for (ptrdiff_t n = 0; n < tensor_argmax.size(); ++n) {
// Expect max to be in the first index of the reduced dimension
VERIFY_IS_EQUAL(tensor_argmax.data()[n], 0);
}
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 5; ++k) {
for (int l = 0; l < 7; ++l) {
ix[0] = i; ix[1] = j; ix[2] = k; ix[3] = l;
if (ix[dim] != tensor.dimension(dim) - 1) continue;
// suppose dim == 1, then for all i, k, l, set tensor(i, 2, k, l) = 20.0
tensor(ix) = 20.0;
}
}
}
}
tensor_argmax = tensor.argmax(dim);
VERIFY_IS_EQUAL(tensor_argmax.size(),
ptrdiff_t(2*3*5*7 / tensor.dimension(dim)));
for (ptrdiff_t n = 0; n < tensor_argmax.size(); ++n) {
// Expect max to be in the last index of the reduced dimension
VERIFY_IS_EQUAL(tensor_argmax.data()[n], tensor.dimension(dim) - 1);
}
}
}
template <int DataLayout>
static void test_argmin_dim()
{
Tensor<float, 4, DataLayout> tensor(2,3,5,7);
std::vector<int> dims {2, 3, 5, 7};
for (int dim = 0; dim < 4; ++dim) {
tensor.setRandom();
tensor = (tensor + tensor.constant(0.5)).log();
Tensor<DenseIndex, 3, DataLayout> tensor_argmin;
array<DenseIndex, 4> ix;
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 5; ++k) {
for (int l = 0; l < 7; ++l) {
ix[0] = i; ix[1] = j; ix[2] = k; ix[3] = l;
if (ix[dim] != 0) continue;
// suppose dim == 1, then for all i, k, l, set tensor(i, 0, k, l) = -10.0
tensor(ix) = -10.0;
}
}
}
}
tensor_argmin = tensor.argmin(dim);
VERIFY_IS_EQUAL(tensor_argmin.size(),
ptrdiff_t(2*3*5*7 / tensor.dimension(dim)));
for (ptrdiff_t n = 0; n < tensor_argmin.size(); ++n) {
// Expect min to be in the first index of the reduced dimension
VERIFY_IS_EQUAL(tensor_argmin.data()[n], 0);
}
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 5; ++k) {
for (int l = 0; l < 7; ++l) {
ix[0] = i; ix[1] = j; ix[2] = k; ix[3] = l;
if (ix[dim] != tensor.dimension(dim) - 1) continue;
// suppose dim == 1, then for all i, k, l, set tensor(i, 2, k, l) = -20.0
tensor(ix) = -20.0;
}
}
}
}
tensor_argmin = tensor.argmin(dim);
VERIFY_IS_EQUAL(tensor_argmin.size(),
ptrdiff_t(2*3*5*7 / tensor.dimension(dim)));
for (ptrdiff_t n = 0; n < tensor_argmin.size(); ++n) {
// Expect min to be in the last index of the reduced dimension
VERIFY_IS_EQUAL(tensor_argmin.data()[n], tensor.dimension(dim) - 1);
}
}
}
EIGEN_DECLARE_TEST(cxx11_tensor_argmax)
{
CALL_SUBTEST(test_simple_index_tuples<RowMajor>());
CALL_SUBTEST(test_simple_index_tuples<ColMajor>());
CALL_SUBTEST(test_index_tuples_dim<RowMajor>());
CALL_SUBTEST(test_index_tuples_dim<ColMajor>());
CALL_SUBTEST(test_argmax_tuple_reducer<RowMajor>());
CALL_SUBTEST(test_argmax_tuple_reducer<ColMajor>());
CALL_SUBTEST(test_argmin_tuple_reducer<RowMajor>());
CALL_SUBTEST(test_argmin_tuple_reducer<ColMajor>());
CALL_SUBTEST(test_simple_argmax<RowMajor>());
CALL_SUBTEST(test_simple_argmax<ColMajor>());
CALL_SUBTEST(test_simple_argmin<RowMajor>());
CALL_SUBTEST(test_simple_argmin<ColMajor>());
CALL_SUBTEST(test_argmax_dim<RowMajor>());
CALL_SUBTEST(test_argmax_dim<ColMajor>());
CALL_SUBTEST(test_argmin_dim<RowMajor>());
CALL_SUBTEST(test_argmin_dim<ColMajor>());
}

View File

@@ -0,0 +1,253 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#define EIGEN_TEST_NO_LONGDOUBLE
#define EIGEN_USE_GPU
#include "main.h"
#include <unsupported/Eigen/CXX11/Tensor>
#include <unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h>
using Eigen::Tensor;
template <int Layout>
void test_gpu_simple_argmax()
{
Tensor<double, 3, Layout> in(Eigen::array<DenseIndex, 3>(72,53,97));
Tensor<DenseIndex, 1, Layout> out_max(Eigen::array<DenseIndex, 1>(1));
Tensor<DenseIndex, 1, Layout> out_min(Eigen::array<DenseIndex, 1>(1));
in.setRandom();
in *= in.constant(100.0);
in(0, 0, 0) = -1000.0;
in(71, 52, 96) = 1000.0;
std::size_t in_bytes = in.size() * sizeof(double);
std::size_t out_bytes = out_max.size() * sizeof(DenseIndex);
double* d_in;
DenseIndex* d_out_max;
DenseIndex* d_out_min;
gpuMalloc((void**)(&d_in), in_bytes);
gpuMalloc((void**)(&d_out_max), out_bytes);
gpuMalloc((void**)(&d_out_min), out_bytes);
gpuMemcpy(d_in, in.data(), in_bytes, gpuMemcpyHostToDevice);
Eigen::GpuStreamDevice stream;
Eigen::GpuDevice gpu_device(&stream);
Eigen::TensorMap<Eigen::Tensor<double, 3, Layout>, Aligned > gpu_in(d_in, Eigen::array<DenseIndex, 3>(72,53,97));
Eigen::TensorMap<Eigen::Tensor<DenseIndex, 1, Layout>, Aligned > gpu_out_max(d_out_max, Eigen::array<DenseIndex, 1>(1));
Eigen::TensorMap<Eigen::Tensor<DenseIndex, 1, Layout>, Aligned > gpu_out_min(d_out_min, Eigen::array<DenseIndex, 1>(1));
gpu_out_max.device(gpu_device) = gpu_in.argmax();
gpu_out_min.device(gpu_device) = gpu_in.argmin();
assert(gpuMemcpyAsync(out_max.data(), d_out_max, out_bytes, gpuMemcpyDeviceToHost, gpu_device.stream()) == gpuSuccess);
assert(gpuMemcpyAsync(out_min.data(), d_out_min, out_bytes, gpuMemcpyDeviceToHost, gpu_device.stream()) == gpuSuccess);
assert(gpuStreamSynchronize(gpu_device.stream()) == gpuSuccess);
VERIFY_IS_EQUAL(out_max(Eigen::array<DenseIndex, 1>(0)), 72*53*97 - 1);
VERIFY_IS_EQUAL(out_min(Eigen::array<DenseIndex, 1>(0)), 0);
gpuFree(d_in);
gpuFree(d_out_max);
gpuFree(d_out_min);
}
template <int DataLayout>
void test_gpu_argmax_dim()
{
Tensor<float, 4, DataLayout> tensor(2,3,5,7);
std::vector<int> dims;
dims.push_back(2); dims.push_back(3); dims.push_back(5); dims.push_back(7);
for (int dim = 0; dim < 4; ++dim) {
tensor.setRandom();
tensor = (tensor + tensor.constant(0.5)).log();
array<DenseIndex, 3> out_shape;
for (int d = 0; d < 3; ++d) out_shape[d] = (d < dim) ? dims[d] : dims[d+1];
Tensor<DenseIndex, 3, DataLayout> tensor_arg(out_shape);
array<DenseIndex, 4> ix;
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 5; ++k) {
for (int l = 0; l < 7; ++l) {
ix[0] = i; ix[1] = j; ix[2] = k; ix[3] = l;
if (ix[dim] != 0) continue;
// suppose dim == 1, then for all i, k, l, set tensor(i, 0, k, l) = 10.0
tensor(ix) = 10.0;
}
}
}
}
std::size_t in_bytes = tensor.size() * sizeof(float);
std::size_t out_bytes = tensor_arg.size() * sizeof(DenseIndex);
float* d_in;
DenseIndex* d_out;
gpuMalloc((void**)(&d_in), in_bytes);
gpuMalloc((void**)(&d_out), out_bytes);
gpuMemcpy(d_in, tensor.data(), in_bytes, gpuMemcpyHostToDevice);
Eigen::GpuStreamDevice stream;
Eigen::GpuDevice gpu_device(&stream);
Eigen::TensorMap<Eigen::Tensor<float, 4, DataLayout>, Aligned > gpu_in(d_in, Eigen::array<DenseIndex, 4>(2, 3, 5, 7));
Eigen::TensorMap<Eigen::Tensor<DenseIndex, 3, DataLayout>, Aligned > gpu_out(d_out, out_shape);
gpu_out.device(gpu_device) = gpu_in.argmax(dim);
assert(gpuMemcpyAsync(tensor_arg.data(), d_out, out_bytes, gpuMemcpyDeviceToHost, gpu_device.stream()) == gpuSuccess);
assert(gpuStreamSynchronize(gpu_device.stream()) == gpuSuccess);
VERIFY_IS_EQUAL(tensor_arg.size(),
size_t(2*3*5*7 / tensor.dimension(dim)));
for (DenseIndex n = 0; n < tensor_arg.size(); ++n) {
// Expect max to be in the first index of the reduced dimension
VERIFY_IS_EQUAL(tensor_arg.data()[n], 0);
}
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 5; ++k) {
for (int l = 0; l < 7; ++l) {
ix[0] = i; ix[1] = j; ix[2] = k; ix[3] = l;
if (ix[dim] != tensor.dimension(dim) - 1) continue;
// suppose dim == 1, then for all i, k, l, set tensor(i, 2, k, l) = 20.0
tensor(ix) = 20.0;
}
}
}
}
gpuMemcpy(d_in, tensor.data(), in_bytes, gpuMemcpyHostToDevice);
gpu_out.device(gpu_device) = gpu_in.argmax(dim);
assert(gpuMemcpyAsync(tensor_arg.data(), d_out, out_bytes, gpuMemcpyDeviceToHost, gpu_device.stream()) == gpuSuccess);
assert(gpuStreamSynchronize(gpu_device.stream()) == gpuSuccess);
for (DenseIndex n = 0; n < tensor_arg.size(); ++n) {
// Expect max to be in the last index of the reduced dimension
VERIFY_IS_EQUAL(tensor_arg.data()[n], tensor.dimension(dim) - 1);
}
gpuFree(d_in);
gpuFree(d_out);
}
}
template <int DataLayout>
void test_gpu_argmin_dim()
{
Tensor<float, 4, DataLayout> tensor(2,3,5,7);
std::vector<int> dims;
dims.push_back(2); dims.push_back(3); dims.push_back(5); dims.push_back(7);
for (int dim = 0; dim < 4; ++dim) {
tensor.setRandom();
tensor = (tensor + tensor.constant(0.5)).log();
array<DenseIndex, 3> out_shape;
for (int d = 0; d < 3; ++d) out_shape[d] = (d < dim) ? dims[d] : dims[d+1];
Tensor<DenseIndex, 3, DataLayout> tensor_arg(out_shape);
array<DenseIndex, 4> ix;
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 5; ++k) {
for (int l = 0; l < 7; ++l) {
ix[0] = i; ix[1] = j; ix[2] = k; ix[3] = l;
if (ix[dim] != 0) continue;
// suppose dim == 1, then for all i, k, l, set tensor(i, 0, k, l) = 10.0
tensor(ix) = -10.0;
}
}
}
}
std::size_t in_bytes = tensor.size() * sizeof(float);
std::size_t out_bytes = tensor_arg.size() * sizeof(DenseIndex);
float* d_in;
DenseIndex* d_out;
gpuMalloc((void**)(&d_in), in_bytes);
gpuMalloc((void**)(&d_out), out_bytes);
gpuMemcpy(d_in, tensor.data(), in_bytes, gpuMemcpyHostToDevice);
Eigen::GpuStreamDevice stream;
Eigen::GpuDevice gpu_device(&stream);
Eigen::TensorMap<Eigen::Tensor<float, 4, DataLayout>, Aligned > gpu_in(d_in, Eigen::array<DenseIndex, 4>(2, 3, 5, 7));
Eigen::TensorMap<Eigen::Tensor<DenseIndex, 3, DataLayout>, Aligned > gpu_out(d_out, out_shape);
gpu_out.device(gpu_device) = gpu_in.argmin(dim);
assert(gpuMemcpyAsync(tensor_arg.data(), d_out, out_bytes, gpuMemcpyDeviceToHost, gpu_device.stream()) == gpuSuccess);
assert(gpuStreamSynchronize(gpu_device.stream()) == gpuSuccess);
VERIFY_IS_EQUAL(tensor_arg.size(),
2*3*5*7 / tensor.dimension(dim));
for (DenseIndex n = 0; n < tensor_arg.size(); ++n) {
// Expect min to be in the first index of the reduced dimension
VERIFY_IS_EQUAL(tensor_arg.data()[n], 0);
}
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 5; ++k) {
for (int l = 0; l < 7; ++l) {
ix[0] = i; ix[1] = j; ix[2] = k; ix[3] = l;
if (ix[dim] != tensor.dimension(dim) - 1) continue;
// suppose dim == 1, then for all i, k, l, set tensor(i, 2, k, l) = 20.0
tensor(ix) = -20.0;
}
}
}
}
gpuMemcpy(d_in, tensor.data(), in_bytes, gpuMemcpyHostToDevice);
gpu_out.device(gpu_device) = gpu_in.argmin(dim);
assert(gpuMemcpyAsync(tensor_arg.data(), d_out, out_bytes, gpuMemcpyDeviceToHost, gpu_device.stream()) == gpuSuccess);
assert(gpuStreamSynchronize(gpu_device.stream()) == gpuSuccess);
for (DenseIndex n = 0; n < tensor_arg.size(); ++n) {
// Expect max to be in the last index of the reduced dimension
VERIFY_IS_EQUAL(tensor_arg.data()[n], tensor.dimension(dim) - 1);
}
gpuFree(d_in);
gpuFree(d_out);
}
}
EIGEN_DECLARE_TEST(cxx11_tensor_argmax_gpu)
{
CALL_SUBTEST_1(test_gpu_simple_argmax<RowMajor>());
CALL_SUBTEST_1(test_gpu_simple_argmax<ColMajor>());
CALL_SUBTEST_2(test_gpu_argmax_dim<RowMajor>());
CALL_SUBTEST_2(test_gpu_argmax_dim<ColMajor>());
CALL_SUBTEST_3(test_gpu_argmin_dim<RowMajor>());
CALL_SUBTEST_3(test_gpu_argmin_dim<ColMajor>());
}

View File

@@ -0,0 +1,258 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2016
// Mehdi Goli Codeplay Software Ltd.
// Ralph Potter Codeplay Software Ltd.
// Luke Iwanski Codeplay Software Ltd.
// Contact: <eigen@codeplay.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#define EIGEN_TEST_NO_LONGDOUBLE
#define EIGEN_TEST_NO_COMPLEX
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
#define EIGEN_USE_SYCL
#define EIGEN_HAS_CONSTEXPR 1
#include "main.h"
#include <unsupported/Eigen/CXX11/Tensor>
using Eigen::array;
using Eigen::SyclDevice;
using Eigen::Tensor;
using Eigen::TensorMap;
template <typename DataType, int Layout, typename DenseIndex>
static void test_sycl_simple_argmax(const Eigen::SyclDevice& sycl_device) {
Tensor<DataType, 3, Layout, DenseIndex> in(Eigen::array<DenseIndex, 3>{{2, 2, 2}});
Tensor<DenseIndex, 0, Layout, DenseIndex> out_max;
Tensor<DenseIndex, 0, Layout, DenseIndex> out_min;
in.setRandom();
in *= in.constant(100.0);
in(0, 0, 0) = -1000.0;
in(1, 1, 1) = 1000.0;
std::size_t in_bytes = in.size() * sizeof(DataType);
std::size_t out_bytes = out_max.size() * sizeof(DenseIndex);
DataType* d_in = static_cast<DataType*>(sycl_device.allocate(in_bytes));
DenseIndex* d_out_max = static_cast<DenseIndex*>(sycl_device.allocate(out_bytes));
DenseIndex* d_out_min = static_cast<DenseIndex*>(sycl_device.allocate(out_bytes));
Eigen::TensorMap<Eigen::Tensor<DataType, 3, Layout, DenseIndex> > gpu_in(d_in,
Eigen::array<DenseIndex, 3>{{2, 2, 2}});
Eigen::TensorMap<Eigen::Tensor<DenseIndex, 0, Layout, DenseIndex> > gpu_out_max(d_out_max);
Eigen::TensorMap<Eigen::Tensor<DenseIndex, 0, Layout, DenseIndex> > gpu_out_min(d_out_min);
sycl_device.memcpyHostToDevice(d_in, in.data(), in_bytes);
gpu_out_max.device(sycl_device) = gpu_in.argmax();
gpu_out_min.device(sycl_device) = gpu_in.argmin();
sycl_device.memcpyDeviceToHost(out_max.data(), d_out_max, out_bytes);
sycl_device.memcpyDeviceToHost(out_min.data(), d_out_min, out_bytes);
VERIFY_IS_EQUAL(out_max(), 2 * 2 * 2 - 1);
VERIFY_IS_EQUAL(out_min(), 0);
sycl_device.deallocate(d_in);
sycl_device.deallocate(d_out_max);
sycl_device.deallocate(d_out_min);
}
template <typename DataType, int DataLayout, typename DenseIndex>
static void test_sycl_argmax_dim(const Eigen::SyclDevice& sycl_device) {
DenseIndex sizeDim0 = 9;
DenseIndex sizeDim1 = 3;
DenseIndex sizeDim2 = 5;
DenseIndex sizeDim3 = 7;
Tensor<DataType, 4, DataLayout, DenseIndex> tensor(sizeDim0, sizeDim1, sizeDim2, sizeDim3);
std::vector<DenseIndex> dims;
dims.push_back(sizeDim0);
dims.push_back(sizeDim1);
dims.push_back(sizeDim2);
dims.push_back(sizeDim3);
for (DenseIndex dim = 0; dim < 4; ++dim) {
array<DenseIndex, 3> out_shape;
for (DenseIndex d = 0; d < 3; ++d) out_shape[d] = (d < dim) ? dims[d] : dims[d + 1];
Tensor<DenseIndex, 3, DataLayout, DenseIndex> tensor_arg(out_shape);
array<DenseIndex, 4> ix;
for (DenseIndex i = 0; i < sizeDim0; ++i) {
for (DenseIndex j = 0; j < sizeDim1; ++j) {
for (DenseIndex k = 0; k < sizeDim2; ++k) {
for (DenseIndex l = 0; l < sizeDim3; ++l) {
ix[0] = i;
ix[1] = j;
ix[2] = k;
ix[3] = l;
// suppose dim == 1, then for all i, k, l, set tensor(i, 0, k, l)
// = 10.0
tensor(ix) = (ix[dim] != 0) ? -1.0 : 10.0;
}
}
}
}
std::size_t in_bytes = tensor.size() * sizeof(DataType);
std::size_t out_bytes = tensor_arg.size() * sizeof(DenseIndex);
DataType* d_in = static_cast<DataType*>(sycl_device.allocate(in_bytes));
DenseIndex* d_out = static_cast<DenseIndex*>(sycl_device.allocate(out_bytes));
Eigen::TensorMap<Eigen::Tensor<DataType, 4, DataLayout, DenseIndex> > gpu_in(
d_in, Eigen::array<DenseIndex, 4>{{sizeDim0, sizeDim1, sizeDim2, sizeDim3}});
Eigen::TensorMap<Eigen::Tensor<DenseIndex, 3, DataLayout, DenseIndex> > gpu_out(d_out, out_shape);
sycl_device.memcpyHostToDevice(d_in, tensor.data(), in_bytes);
gpu_out.device(sycl_device) = gpu_in.argmax(dim);
sycl_device.memcpyDeviceToHost(tensor_arg.data(), d_out, out_bytes);
VERIFY_IS_EQUAL(static_cast<size_t>(tensor_arg.size()),
size_t(sizeDim0 * sizeDim1 * sizeDim2 * sizeDim3 / tensor.dimension(dim)));
for (DenseIndex n = 0; n < tensor_arg.size(); ++n) {
// Expect max to be in the first index of the reduced dimension
VERIFY_IS_EQUAL(tensor_arg.data()[n], 0);
}
sycl_device.synchronize();
for (DenseIndex i = 0; i < sizeDim0; ++i) {
for (DenseIndex j = 0; j < sizeDim1; ++j) {
for (DenseIndex k = 0; k < sizeDim2; ++k) {
for (DenseIndex l = 0; l < sizeDim3; ++l) {
ix[0] = i;
ix[1] = j;
ix[2] = k;
ix[3] = l;
// suppose dim == 1, then for all i, k, l, set tensor(i, 2, k, l) = 20.0
tensor(ix) = (ix[dim] != tensor.dimension(dim) - 1) ? -1.0 : 20.0;
}
}
}
}
sycl_device.memcpyHostToDevice(d_in, tensor.data(), in_bytes);
gpu_out.device(sycl_device) = gpu_in.argmax(dim);
sycl_device.memcpyDeviceToHost(tensor_arg.data(), d_out, out_bytes);
for (DenseIndex n = 0; n < tensor_arg.size(); ++n) {
// Expect max to be in the last index of the reduced dimension
VERIFY_IS_EQUAL(tensor_arg.data()[n], tensor.dimension(dim) - 1);
}
sycl_device.deallocate(d_in);
sycl_device.deallocate(d_out);
}
}
template <typename DataType, int DataLayout, typename DenseIndex>
static void test_sycl_argmin_dim(const Eigen::SyclDevice& sycl_device) {
DenseIndex sizeDim0 = 9;
DenseIndex sizeDim1 = 3;
DenseIndex sizeDim2 = 5;
DenseIndex sizeDim3 = 7;
Tensor<DataType, 4, DataLayout, DenseIndex> tensor(sizeDim0, sizeDim1, sizeDim2, sizeDim3);
std::vector<DenseIndex> dims;
dims.push_back(sizeDim0);
dims.push_back(sizeDim1);
dims.push_back(sizeDim2);
dims.push_back(sizeDim3);
for (DenseIndex dim = 0; dim < 4; ++dim) {
array<DenseIndex, 3> out_shape;
for (DenseIndex d = 0; d < 3; ++d) out_shape[d] = (d < dim) ? dims[d] : dims[d + 1];
Tensor<DenseIndex, 3, DataLayout, DenseIndex> tensor_arg(out_shape);
array<DenseIndex, 4> ix;
for (DenseIndex i = 0; i < sizeDim0; ++i) {
for (DenseIndex j = 0; j < sizeDim1; ++j) {
for (DenseIndex k = 0; k < sizeDim2; ++k) {
for (DenseIndex l = 0; l < sizeDim3; ++l) {
ix[0] = i;
ix[1] = j;
ix[2] = k;
ix[3] = l;
// suppose dim == 1, then for all i, k, l, set tensor(i, 0, k, l) = -10.0
tensor(ix) = (ix[dim] != 0) ? 1.0 : -10.0;
}
}
}
}
std::size_t in_bytes = tensor.size() * sizeof(DataType);
std::size_t out_bytes = tensor_arg.size() * sizeof(DenseIndex);
DataType* d_in = static_cast<DataType*>(sycl_device.allocate(in_bytes));
DenseIndex* d_out = static_cast<DenseIndex*>(sycl_device.allocate(out_bytes));
Eigen::TensorMap<Eigen::Tensor<DataType, 4, DataLayout, DenseIndex> > gpu_in(
d_in, Eigen::array<DenseIndex, 4>{{sizeDim0, sizeDim1, sizeDim2, sizeDim3}});
Eigen::TensorMap<Eigen::Tensor<DenseIndex, 3, DataLayout, DenseIndex> > gpu_out(d_out, out_shape);
sycl_device.memcpyHostToDevice(d_in, tensor.data(), in_bytes);
gpu_out.device(sycl_device) = gpu_in.argmin(dim);
sycl_device.memcpyDeviceToHost(tensor_arg.data(), d_out, out_bytes);
VERIFY_IS_EQUAL(static_cast<size_t>(tensor_arg.size()),
size_t(sizeDim0 * sizeDim1 * sizeDim2 * sizeDim3 / tensor.dimension(dim)));
for (DenseIndex n = 0; n < tensor_arg.size(); ++n) {
// Expect max to be in the first index of the reduced dimension
VERIFY_IS_EQUAL(tensor_arg.data()[n], 0);
}
sycl_device.synchronize();
for (DenseIndex i = 0; i < sizeDim0; ++i) {
for (DenseIndex j = 0; j < sizeDim1; ++j) {
for (DenseIndex k = 0; k < sizeDim2; ++k) {
for (DenseIndex l = 0; l < sizeDim3; ++l) {
ix[0] = i;
ix[1] = j;
ix[2] = k;
ix[3] = l;
// suppose dim == 1, then for all i, k, l, set tensor(i, 2, k, l) = -20.0
tensor(ix) = (ix[dim] != tensor.dimension(dim) - 1) ? 1.0 : -20.0;
}
}
}
}
sycl_device.memcpyHostToDevice(d_in, tensor.data(), in_bytes);
gpu_out.device(sycl_device) = gpu_in.argmin(dim);
sycl_device.memcpyDeviceToHost(tensor_arg.data(), d_out, out_bytes);
for (DenseIndex n = 0; n < tensor_arg.size(); ++n) {
// Expect max to be in the last index of the reduced dimension
VERIFY_IS_EQUAL(tensor_arg.data()[n], tensor.dimension(dim) - 1);
}
sycl_device.deallocate(d_in);
sycl_device.deallocate(d_out);
}
}
template <typename DataType, typename Device_Selector>
void sycl_argmax_test_per_device(const Device_Selector& d) {
QueueInterface queueInterface(d);
auto sycl_device = Eigen::SyclDevice(&queueInterface);
test_sycl_simple_argmax<DataType, RowMajor, int64_t>(sycl_device);
test_sycl_simple_argmax<DataType, ColMajor, int64_t>(sycl_device);
test_sycl_argmax_dim<DataType, ColMajor, int64_t>(sycl_device);
test_sycl_argmax_dim<DataType, RowMajor, int64_t>(sycl_device);
test_sycl_argmin_dim<DataType, ColMajor, int64_t>(sycl_device);
test_sycl_argmin_dim<DataType, RowMajor, int64_t>(sycl_device);
}
EIGEN_DECLARE_TEST(cxx11_tensor_argmax_sycl) {
for (const auto& device : Eigen::get_sycl_supported_devices()) {
CALL_SUBTEST(sycl_argmax_test_per_device<float>(device));
}
}

View File

@@ -0,0 +1,370 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
#include <Eigen/CXX11/Tensor>
using Eigen::Tensor;
using Eigen::RowMajor;
static void test_1d()
{
Tensor<int, 1> vec1(6);
Tensor<int, 1, RowMajor> vec2(6);
vec1(0) = 4; vec2(0) = 0;
vec1(1) = 8; vec2(1) = 1;
vec1(2) = 15; vec2(2) = 2;
vec1(3) = 16; vec2(3) = 3;
vec1(4) = 23; vec2(4) = 4;
vec1(5) = 42; vec2(5) = 5;
int col_major[6];
int row_major[6];
memset(col_major, 0, 6*sizeof(int));
memset(row_major, 0, 6*sizeof(int));
TensorMap<Tensor<int, 1> > vec3(col_major, 6);
TensorMap<Tensor<int, 1, RowMajor> > vec4(row_major, 6);
vec3 = vec1;
vec4 = vec2;
VERIFY_IS_EQUAL(vec3(0), 4);
VERIFY_IS_EQUAL(vec3(1), 8);
VERIFY_IS_EQUAL(vec3(2), 15);
VERIFY_IS_EQUAL(vec3(3), 16);
VERIFY_IS_EQUAL(vec3(4), 23);
VERIFY_IS_EQUAL(vec3(5), 42);
VERIFY_IS_EQUAL(vec4(0), 0);
VERIFY_IS_EQUAL(vec4(1), 1);
VERIFY_IS_EQUAL(vec4(2), 2);
VERIFY_IS_EQUAL(vec4(3), 3);
VERIFY_IS_EQUAL(vec4(4), 4);
VERIFY_IS_EQUAL(vec4(5), 5);
vec1.setZero();
vec2.setZero();
vec1 = vec3;
vec2 = vec4;
VERIFY_IS_EQUAL(vec1(0), 4);
VERIFY_IS_EQUAL(vec1(1), 8);
VERIFY_IS_EQUAL(vec1(2), 15);
VERIFY_IS_EQUAL(vec1(3), 16);
VERIFY_IS_EQUAL(vec1(4), 23);
VERIFY_IS_EQUAL(vec1(5), 42);
VERIFY_IS_EQUAL(vec2(0), 0);
VERIFY_IS_EQUAL(vec2(1), 1);
VERIFY_IS_EQUAL(vec2(2), 2);
VERIFY_IS_EQUAL(vec2(3), 3);
VERIFY_IS_EQUAL(vec2(4), 4);
VERIFY_IS_EQUAL(vec2(5), 5);
}
static void test_2d()
{
Tensor<int, 2> mat1(2,3);
Tensor<int, 2, RowMajor> mat2(2,3);
mat1(0,0) = 0;
mat1(0,1) = 1;
mat1(0,2) = 2;
mat1(1,0) = 3;
mat1(1,1) = 4;
mat1(1,2) = 5;
mat2(0,0) = 0;
mat2(0,1) = 1;
mat2(0,2) = 2;
mat2(1,0) = 3;
mat2(1,1) = 4;
mat2(1,2) = 5;
int col_major[6];
int row_major[6];
memset(col_major, 0, 6*sizeof(int));
memset(row_major, 0, 6*sizeof(int));
TensorMap<Tensor<int, 2> > mat3(row_major, 2, 3);
TensorMap<Tensor<int, 2, RowMajor> > mat4(col_major, 2, 3);
mat3 = mat1;
mat4 = mat2;
VERIFY_IS_EQUAL(mat3(0,0), 0);
VERIFY_IS_EQUAL(mat3(0,1), 1);
VERIFY_IS_EQUAL(mat3(0,2), 2);
VERIFY_IS_EQUAL(mat3(1,0), 3);
VERIFY_IS_EQUAL(mat3(1,1), 4);
VERIFY_IS_EQUAL(mat3(1,2), 5);
VERIFY_IS_EQUAL(mat4(0,0), 0);
VERIFY_IS_EQUAL(mat4(0,1), 1);
VERIFY_IS_EQUAL(mat4(0,2), 2);
VERIFY_IS_EQUAL(mat4(1,0), 3);
VERIFY_IS_EQUAL(mat4(1,1), 4);
VERIFY_IS_EQUAL(mat4(1,2), 5);
mat1.setZero();
mat2.setZero();
mat1 = mat3;
mat2 = mat4;
VERIFY_IS_EQUAL(mat1(0,0), 0);
VERIFY_IS_EQUAL(mat1(0,1), 1);
VERIFY_IS_EQUAL(mat1(0,2), 2);
VERIFY_IS_EQUAL(mat1(1,0), 3);
VERIFY_IS_EQUAL(mat1(1,1), 4);
VERIFY_IS_EQUAL(mat1(1,2), 5);
VERIFY_IS_EQUAL(mat2(0,0), 0);
VERIFY_IS_EQUAL(mat2(0,1), 1);
VERIFY_IS_EQUAL(mat2(0,2), 2);
VERIFY_IS_EQUAL(mat2(1,0), 3);
VERIFY_IS_EQUAL(mat2(1,1), 4);
VERIFY_IS_EQUAL(mat2(1,2), 5);
}
static void test_3d()
{
Tensor<int, 3> mat1(2,3,7);
Tensor<int, 3, RowMajor> mat2(2,3,7);
int val = 0;
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 7; ++k) {
mat1(i,j,k) = val;
mat2(i,j,k) = val;
val++;
}
}
}
int col_major[2*3*7];
int row_major[2*3*7];
memset(col_major, 0, 2*3*7*sizeof(int));
memset(row_major, 0, 2*3*7*sizeof(int));
TensorMap<Tensor<int, 3> > mat3(col_major, 2, 3, 7);
TensorMap<Tensor<int, 3, RowMajor> > mat4(row_major, 2, 3, 7);
mat3 = mat1;
mat4 = mat2;
val = 0;
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 7; ++k) {
VERIFY_IS_EQUAL(mat3(i,j,k), val);
VERIFY_IS_EQUAL(mat4(i,j,k), val);
val++;
}
}
}
mat1.setZero();
mat2.setZero();
mat1 = mat3;
mat2 = mat4;
val = 0;
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 7; ++k) {
VERIFY_IS_EQUAL(mat1(i,j,k), val);
VERIFY_IS_EQUAL(mat2(i,j,k), val);
val++;
}
}
}
}
static void test_same_type()
{
Tensor<int, 1> orig_tensor(5);
Tensor<int, 1> dest_tensor(5);
orig_tensor.setRandom();
dest_tensor.setRandom();
int* orig_data = orig_tensor.data();
int* dest_data = dest_tensor.data();
dest_tensor = orig_tensor;
VERIFY_IS_EQUAL(orig_tensor.data(), orig_data);
VERIFY_IS_EQUAL(dest_tensor.data(), dest_data);
for (int i = 0; i < 5; ++i) {
VERIFY_IS_EQUAL(dest_tensor(i), orig_tensor(i));
}
TensorFixedSize<int, Sizes<5> > orig_array;
TensorFixedSize<int, Sizes<5> > dest_array;
orig_array.setRandom();
dest_array.setRandom();
orig_data = orig_array.data();
dest_data = dest_array.data();
dest_array = orig_array;
VERIFY_IS_EQUAL(orig_array.data(), orig_data);
VERIFY_IS_EQUAL(dest_array.data(), dest_data);
for (int i = 0; i < 5; ++i) {
VERIFY_IS_EQUAL(dest_array(i), orig_array(i));
}
int orig[5] = {1, 2, 3, 4, 5};
int dest[5] = {6, 7, 8, 9, 10};
TensorMap<Tensor<int, 1> > orig_map(orig, 5);
TensorMap<Tensor<int, 1> > dest_map(dest, 5);
orig_data = orig_map.data();
dest_data = dest_map.data();
dest_map = orig_map;
VERIFY_IS_EQUAL(orig_map.data(), orig_data);
VERIFY_IS_EQUAL(dest_map.data(), dest_data);
for (int i = 0; i < 5; ++i) {
VERIFY_IS_EQUAL(dest[i], i+1);
}
}
static void test_auto_resize()
{
Tensor<int, 1> tensor1;
Tensor<int, 1> tensor2(3);
Tensor<int, 1> tensor3(5);
Tensor<int, 1> tensor4(7);
Tensor<int, 1> new_tensor(5);
new_tensor.setRandom();
tensor1 = tensor2 = tensor3 = tensor4 = new_tensor;
VERIFY_IS_EQUAL(tensor1.dimension(0), new_tensor.dimension(0));
VERIFY_IS_EQUAL(tensor2.dimension(0), new_tensor.dimension(0));
VERIFY_IS_EQUAL(tensor3.dimension(0), new_tensor.dimension(0));
VERIFY_IS_EQUAL(tensor4.dimension(0), new_tensor.dimension(0));
for (int i = 0; i < new_tensor.dimension(0); ++i) {
VERIFY_IS_EQUAL(tensor1(i), new_tensor(i));
VERIFY_IS_EQUAL(tensor2(i), new_tensor(i));
VERIFY_IS_EQUAL(tensor3(i), new_tensor(i));
VERIFY_IS_EQUAL(tensor4(i), new_tensor(i));
}
}
static void test_compound_assign()
{
Tensor<int, 1> start_tensor(10);
Tensor<int, 1> offset_tensor(10);
start_tensor.setRandom();
offset_tensor.setRandom();
Tensor<int, 1> tensor = start_tensor;
tensor += offset_tensor;
for (int i = 0; i < 10; ++i) {
VERIFY_IS_EQUAL(tensor(i), start_tensor(i) + offset_tensor(i));
}
tensor = start_tensor;
tensor -= offset_tensor;
for (int i = 0; i < 10; ++i) {
VERIFY_IS_EQUAL(tensor(i), start_tensor(i) - offset_tensor(i));
}
tensor = start_tensor;
tensor *= offset_tensor;
for (int i = 0; i < 10; ++i) {
VERIFY_IS_EQUAL(tensor(i), start_tensor(i) * offset_tensor(i));
}
tensor = start_tensor;
tensor /= offset_tensor;
for (int i = 0; i < 10; ++i) {
VERIFY_IS_EQUAL(tensor(i), start_tensor(i) / offset_tensor(i));
}
}
static void test_std_initializers_tensor() {
#if EIGEN_HAS_VARIADIC_TEMPLATES
Tensor<int, 1> a(3);
a.setValues({0, 1, 2});
VERIFY_IS_EQUAL(a(0), 0);
VERIFY_IS_EQUAL(a(1), 1);
VERIFY_IS_EQUAL(a(2), 2);
// It fills the top-left slice.
a.setValues({10, 20});
VERIFY_IS_EQUAL(a(0), 10);
VERIFY_IS_EQUAL(a(1), 20);
VERIFY_IS_EQUAL(a(2), 2);
// Chaining.
Tensor<int, 1> a2(3);
a2 = a.setValues({100, 200, 300});
VERIFY_IS_EQUAL(a(0), 100);
VERIFY_IS_EQUAL(a(1), 200);
VERIFY_IS_EQUAL(a(2), 300);
VERIFY_IS_EQUAL(a2(0), 100);
VERIFY_IS_EQUAL(a2(1), 200);
VERIFY_IS_EQUAL(a2(2), 300);
Tensor<int, 2> b(2, 3);
b.setValues({{0, 1, 2}, {3, 4, 5}});
VERIFY_IS_EQUAL(b(0, 0), 0);
VERIFY_IS_EQUAL(b(0, 1), 1);
VERIFY_IS_EQUAL(b(0, 2), 2);
VERIFY_IS_EQUAL(b(1, 0), 3);
VERIFY_IS_EQUAL(b(1, 1), 4);
VERIFY_IS_EQUAL(b(1, 2), 5);
// It fills the top-left slice.
b.setValues({{10, 20}, {30}});
VERIFY_IS_EQUAL(b(0, 0), 10);
VERIFY_IS_EQUAL(b(0, 1), 20);
VERIFY_IS_EQUAL(b(0, 2), 2);
VERIFY_IS_EQUAL(b(1, 0), 30);
VERIFY_IS_EQUAL(b(1, 1), 4);
VERIFY_IS_EQUAL(b(1, 2), 5);
Eigen::Tensor<int, 3> c(3, 2, 4);
c.setValues({{{0, 1, 2, 3}, {4, 5, 6, 7}},
{{10, 11, 12, 13}, {14, 15, 16, 17}},
{{20, 21, 22, 23}, {24, 25, 26, 27}}});
VERIFY_IS_EQUAL(c(0, 0, 0), 0);
VERIFY_IS_EQUAL(c(0, 0, 1), 1);
VERIFY_IS_EQUAL(c(0, 0, 2), 2);
VERIFY_IS_EQUAL(c(0, 0, 3), 3);
VERIFY_IS_EQUAL(c(0, 1, 0), 4);
VERIFY_IS_EQUAL(c(0, 1, 1), 5);
VERIFY_IS_EQUAL(c(0, 1, 2), 6);
VERIFY_IS_EQUAL(c(0, 1, 3), 7);
VERIFY_IS_EQUAL(c(1, 0, 0), 10);
VERIFY_IS_EQUAL(c(1, 0, 1), 11);
VERIFY_IS_EQUAL(c(1, 0, 2), 12);
VERIFY_IS_EQUAL(c(1, 0, 3), 13);
VERIFY_IS_EQUAL(c(1, 1, 0), 14);
VERIFY_IS_EQUAL(c(1, 1, 1), 15);
VERIFY_IS_EQUAL(c(1, 1, 2), 16);
VERIFY_IS_EQUAL(c(1, 1, 3), 17);
VERIFY_IS_EQUAL(c(2, 0, 0), 20);
VERIFY_IS_EQUAL(c(2, 0, 1), 21);
VERIFY_IS_EQUAL(c(2, 0, 2), 22);
VERIFY_IS_EQUAL(c(2, 0, 3), 23);
VERIFY_IS_EQUAL(c(2, 1, 0), 24);
VERIFY_IS_EQUAL(c(2, 1, 1), 25);
VERIFY_IS_EQUAL(c(2, 1, 2), 26);
VERIFY_IS_EQUAL(c(2, 1, 3), 27);
#endif // EIGEN_HAS_VARIADIC_TEMPLATES
}
EIGEN_DECLARE_TEST(cxx11_tensor_assign)
{
CALL_SUBTEST(test_1d());
CALL_SUBTEST(test_2d());
CALL_SUBTEST(test_3d());
CALL_SUBTEST(test_same_type());
CALL_SUBTEST(test_auto_resize());
CALL_SUBTEST(test_compound_assign());
CALL_SUBTEST(test_std_initializers_tensor());
}

View File

@@ -0,0 +1,576 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2018 Andy Davis <andydavis@google.com>
// Copyright (C) 2018 Eugene Zhulenev <ezhulenev@google.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
#include <algorithm>
#include <set>
#include <Eigen/CXX11/Tensor>
using Eigen::Tensor;
using Eigen::Index;
using Eigen::RowMajor;
using Eigen::ColMajor;
using Eigen::internal::TensorBlockShapeType;
static TensorOpCost zeroCost() { return {0, 0, 0}; }
template<typename T>
static const T& choose(int layout, const T& col, const T& row) {
return layout == ColMajor ? col : row;
}
static TensorBlockShapeType RandomShape() {
return internal::random<bool>()
? TensorBlockShapeType::kUniformAllDims
: TensorBlockShapeType::kSkewedInnerDims;
}
template <int NumDims>
static size_t RandomTargetSize(const DSizes<Index, NumDims>& dims) {
return internal::random<size_t>(1, dims.TotalSize());
}
template <int NumDims>
static DSizes<Index, NumDims> RandomDims() {
array<Index, NumDims> dims;
for (int i = 0; i < NumDims; ++i) {
dims[i] = internal::random<int>(1, 20);
}
return DSizes<Index, NumDims>(dims);
}
template <typename T>
static T* GenerateRandomData(const Index& size) {
T* data = new T[size];
for (int i = 0; i < size; ++i) {
data[i] = internal::random<T>();
}
return data;
}
template <int NumDims>
static void Debug(DSizes<Index, NumDims> dims) {
for (int i = 0; i < NumDims; ++i) {
std::cout << dims[i] << "; ";
}
std::cout << std::endl;
}
template <int Layout>
static void test_block_mapper_sanity()
{
typedef internal::TensorBlockMapper<2, Layout> TensorBlockMapper;
DSizes<Index, 2> tensor_dims(100, 100);
// Test uniform blocks.
TensorBlockMapper uniform_block_mapper(
tensor_dims, {TensorBlockShapeType::kUniformAllDims, 100, zeroCost()});
VERIFY_IS_EQUAL(uniform_block_mapper.blockCount(), 100);
VERIFY_IS_EQUAL(uniform_block_mapper.blockTotalSize(), 100);
// 10x10 blocks
auto uniform_b0 = uniform_block_mapper.blockDescriptor(0);
VERIFY_IS_EQUAL(uniform_b0.dimensions().at(0), 10);
VERIFY_IS_EQUAL(uniform_b0.dimensions().at(1), 10);
// Test skewed to inner dims blocks.
TensorBlockMapper skewed_block_mapper(
tensor_dims, {TensorBlockShapeType::kSkewedInnerDims, 100, zeroCost()});
VERIFY_IS_EQUAL(skewed_block_mapper.blockCount(), 100);
VERIFY_IS_EQUAL(skewed_block_mapper.blockTotalSize(), 100);
// 1x100 (100x1) rows/cols depending on a tensor layout.
auto skewed_b0 = skewed_block_mapper.blockDescriptor(0);
VERIFY_IS_EQUAL(skewed_b0.dimensions().at(0), choose(Layout, 100, 1));
VERIFY_IS_EQUAL(skewed_b0.dimensions().at(1), choose(Layout, 1, 100));
}
// Given a TensorBlock "visit" every element accessible though it, and a keep an
// index in the visited set. Verify that every coeff accessed only once.
template<int NumDims, int Layout>
static void UpdateCoeffSet(
const DSizes<Index, NumDims>& tensor_strides,
const internal::TensorBlockDescriptor<NumDims>& block,
Index first_coeff_index, int dim_index, std::set<Index>* visited_coeffs) {
const DSizes<Index, NumDims>& block_sizes = block.dimensions();
for (int i = 0; i < block_sizes[dim_index]; ++i) {
if (tensor_strides[dim_index] == 1) {
typedef std::pair<std::set<Index>::iterator, bool> ReturnType;
ReturnType inserted = visited_coeffs->insert(first_coeff_index + i);
VERIFY_IS_EQUAL(inserted.second, true);
} else {
int next_dim_index = dim_index + choose(Layout, -1, 1);
UpdateCoeffSet<NumDims, Layout>(tensor_strides, block, first_coeff_index,
next_dim_index, visited_coeffs);
first_coeff_index += tensor_strides[dim_index];
}
}
}
template <typename T, int NumDims, int Layout>
static void test_block_mapper_maps_every_element() {
typedef internal::TensorBlockMapper<NumDims, Layout> TensorBlockMapper;
DSizes<Index, NumDims> dims = RandomDims<NumDims>();
DSizes<Index, NumDims> strides = internal::strides<Layout>(dims);
// Keep track of elements indices available via block access.
std::set<Index> coeff_set;
// Try different combinations of block types and sizes.
TensorBlockMapper block_mapper(
dims, {RandomShape(), RandomTargetSize(dims), zeroCost()});
for (int i = 0; i < block_mapper.blockCount(); ++i) {
auto block = block_mapper.blockDescriptor(i);
UpdateCoeffSet<NumDims, Layout>(strides, block, block.offset(),
choose(Layout, NumDims - 1, 0),
&coeff_set);
}
// Verify that every coefficient in the original Tensor is accessible through
// TensorBlock only once.
Index total_coeffs = dims.TotalSize();
VERIFY_IS_EQUAL(Index(coeff_set.size()), total_coeffs);
VERIFY_IS_EQUAL(*coeff_set.begin(), 0);
VERIFY_IS_EQUAL(*coeff_set.rbegin(), total_coeffs - 1);
}
template <int Layout, int NumDims>
static Index GetInputIndex(Index output_index,
const array<Index, NumDims>& output_to_input_dim_map,
const array<Index, NumDims>& input_strides,
const array<Index, NumDims>& output_strides) {
int input_index = 0;
if (Layout == ColMajor) {
for (int i = NumDims - 1; i > 0; --i) {
const Index idx = output_index / output_strides[i];
input_index += idx * input_strides[output_to_input_dim_map[i]];
output_index -= idx * output_strides[i];
}
return input_index +
output_index * input_strides[output_to_input_dim_map[0]];
} else {
for (int i = 0; i < NumDims - 1; ++i) {
const Index idx = output_index / output_strides[i];
input_index += idx * input_strides[output_to_input_dim_map[i]];
output_index -= idx * output_strides[i];
}
return input_index +
output_index * input_strides[output_to_input_dim_map[NumDims - 1]];
}
}
template <int Layout, int NumDims>
static array<Index, NumDims> ComputeStrides(
const array<Index, NumDims>& sizes) {
array<Index, NumDims> strides;
if (Layout == ColMajor) {
strides[0] = 1;
for (int i = 1; i < NumDims; ++i) {
strides[i] = strides[i - 1] * sizes[i - 1];
}
} else {
strides[NumDims - 1] = 1;
for (int i = NumDims - 2; i >= 0; --i) {
strides[i] = strides[i + 1] * sizes[i + 1];
}
}
return strides;
}
template<typename Scalar, typename StorageIndex, int Dim>
class EqualityChecker
{
const Scalar* input_data;
const DSizes<StorageIndex, Dim> &input_dims, &input_strides, &output_dims, &output_strides;
void check_recursive(const Scalar* input, const Scalar* output, int depth=0) const
{
if(depth==Dim)
{
VERIFY_IS_EQUAL(*input, *output);
return;
}
for(int i=0; i<output_dims[depth]; ++i)
{
check_recursive(input + i % input_dims[depth] * input_strides[depth], output + i*output_strides[depth], depth+1);
}
}
public:
EqualityChecker(const Scalar* input_data_,
const DSizes<StorageIndex, Dim> &input_dims_, const DSizes<StorageIndex, Dim> &input_strides_,
const DSizes<StorageIndex, Dim> &output_dims_, const DSizes<StorageIndex, Dim> &output_strides_)
: input_data(input_data_)
, input_dims(input_dims_), input_strides(input_strides_)
, output_dims(output_dims_), output_strides(output_strides_)
{}
void operator()(const Scalar* output_data) const
{
check_recursive(input_data, output_data);
}
};
template <int Layout>
static void test_uniform_block_shape()
{
typedef internal::TensorBlockDescriptor<5> TensorBlock;
typedef internal::TensorBlockMapper<5, Layout> TensorBlockMapper;
{
// Test shape 'UniformAllDims' with uniform 'max_coeff count'.
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
const Index max_coeff_count = 5 * 5 * 5 * 5 * 5;
TensorBlockMapper block_mapper(dims, {TensorBlockShapeType::kUniformAllDims,
max_coeff_count, zeroCost()});
TensorBlock block = block_mapper.blockDescriptor(0);
for (int i = 0; i < 5; ++i) {
VERIFY_IS_EQUAL(5, block.dimensions()[i]);
}
VERIFY(block.dimensions().TotalSize() <= max_coeff_count);
}
// Test shape 'UniformAllDims' with larger 'max_coeff count' which spills
// partially into first inner-most dimension.
if (Layout == ColMajor) {
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
const Index max_coeff_count = 7 * 5 * 5 * 5 * 5;
TensorBlockMapper block_mapper(dims, {TensorBlockShapeType::kUniformAllDims,
max_coeff_count, zeroCost()});
TensorBlock block = block_mapper.blockDescriptor(0);
VERIFY_IS_EQUAL(7, block.dimensions()[0]);
for (int i = 1; i < 5; ++i) {
VERIFY_IS_EQUAL(5, block.dimensions()[i]);
}
VERIFY(block.dimensions().TotalSize() <= max_coeff_count);
} else {
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
const Index max_coeff_count = 5 * 5 * 5 * 5 * 6;
TensorBlockMapper block_mapper(dims, {TensorBlockShapeType::kUniformAllDims,
max_coeff_count, zeroCost()});
TensorBlock block = block_mapper.blockDescriptor(0);
VERIFY_IS_EQUAL(6, block.dimensions()[4]);
for (int i = 3; i >= 0; --i) {
VERIFY_IS_EQUAL(5, block.dimensions()[i]);
}
VERIFY(block.dimensions().TotalSize() <= max_coeff_count);
}
// Test shape 'UniformAllDims' with larger 'max_coeff count' which spills
// fully into first inner-most dimension.
if (Layout == ColMajor) {
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
const Index max_coeff_count = 11 * 5 * 5 * 5 * 5;
TensorBlockMapper block_mapper(dims, {TensorBlockShapeType::kUniformAllDims,
max_coeff_count, zeroCost()});
TensorBlock block = block_mapper.blockDescriptor(0);
VERIFY_IS_EQUAL(11, block.dimensions()[0]);
for (int i = 1; i < 5; ++i) {
VERIFY_IS_EQUAL(5, block.dimensions()[i]);
}
VERIFY(block.dimensions().TotalSize() <= max_coeff_count);
} else {
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
const Index max_coeff_count = 5 * 5 * 5 * 5 * 7;
TensorBlockMapper block_mapper(dims, {TensorBlockShapeType::kUniformAllDims,
max_coeff_count, zeroCost()});
TensorBlock block = block_mapper.blockDescriptor(0);
VERIFY_IS_EQUAL(7, block.dimensions()[4]);
for (int i = 3; i >= 0; --i) {
VERIFY_IS_EQUAL(5, block.dimensions()[i]);
}
VERIFY(block.dimensions().TotalSize() <= max_coeff_count);
}
// Test shape 'UniformAllDims' with larger 'max_coeff count' which spills
// fully into first few inner-most dimensions.
if (Layout == ColMajor) {
DSizes<Index, 5> dims(7, 5, 6, 17, 7);
const Index max_coeff_count = 7 * 5 * 6 * 7 * 5;
TensorBlockMapper block_mapper(dims, {TensorBlockShapeType::kUniformAllDims,
max_coeff_count, zeroCost()});
TensorBlock block = block_mapper.blockDescriptor(0);
VERIFY_IS_EQUAL(7, block.dimensions()[0]);
VERIFY_IS_EQUAL(5, block.dimensions()[1]);
VERIFY_IS_EQUAL(6, block.dimensions()[2]);
VERIFY_IS_EQUAL(7, block.dimensions()[3]);
VERIFY_IS_EQUAL(5, block.dimensions()[4]);
VERIFY(block.dimensions().TotalSize() <= max_coeff_count);
} else {
DSizes<Index, 5> dims(7, 5, 6, 9, 7);
const Index max_coeff_count = 5 * 5 * 5 * 6 * 7;
TensorBlockMapper block_mapper(dims, {TensorBlockShapeType::kUniformAllDims,
max_coeff_count, zeroCost()});
TensorBlock block = block_mapper.blockDescriptor(0);
VERIFY_IS_EQUAL(7, block.dimensions()[4]);
VERIFY_IS_EQUAL(6, block.dimensions()[3]);
VERIFY_IS_EQUAL(5, block.dimensions()[2]);
VERIFY_IS_EQUAL(5, block.dimensions()[1]);
VERIFY_IS_EQUAL(5, block.dimensions()[0]);
VERIFY(block.dimensions().TotalSize() <= max_coeff_count);
}
// Test shape 'UniformAllDims' with full allocation to all dims.
if (Layout == ColMajor) {
DSizes<Index, 5> dims(7, 5, 6, 17, 7);
const Index max_coeff_count = 7 * 5 * 6 * 17 * 7;
TensorBlockMapper block_mapper(dims, {TensorBlockShapeType::kUniformAllDims,
max_coeff_count, zeroCost()});
TensorBlock block = block_mapper.blockDescriptor(0);
VERIFY_IS_EQUAL(7, block.dimensions()[0]);
VERIFY_IS_EQUAL(5, block.dimensions()[1]);
VERIFY_IS_EQUAL(6, block.dimensions()[2]);
VERIFY_IS_EQUAL(17, block.dimensions()[3]);
VERIFY_IS_EQUAL(7, block.dimensions()[4]);
VERIFY(block.dimensions().TotalSize() <= max_coeff_count);
} else {
DSizes<Index, 5> dims(7, 5, 6, 9, 7);
const Index max_coeff_count = 7 * 5 * 6 * 9 * 7;
TensorBlockMapper block_mapper(dims, {TensorBlockShapeType::kUniformAllDims,
max_coeff_count, zeroCost()});
TensorBlock block = block_mapper.blockDescriptor(0);
VERIFY_IS_EQUAL(7, block.dimensions()[4]);
VERIFY_IS_EQUAL(9, block.dimensions()[3]);
VERIFY_IS_EQUAL(6, block.dimensions()[2]);
VERIFY_IS_EQUAL(5, block.dimensions()[1]);
VERIFY_IS_EQUAL(7, block.dimensions()[0]);
VERIFY(block.dimensions().TotalSize() <= max_coeff_count);
}
}
template <int Layout>
static void test_skewed_inner_dim_block_shape()
{
typedef internal::TensorBlockDescriptor<5> TensorBlock;
typedef internal::TensorBlockMapper<5, Layout> TensorBlockMapper;
// Test shape 'SkewedInnerDims' with partial allocation to inner-most dim.
if (Layout == ColMajor) {
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
const Index max_coeff_count = 10 * 1 * 1 * 1 * 1;
TensorBlockMapper block_mapper(
dims,
{TensorBlockShapeType::kSkewedInnerDims, max_coeff_count, zeroCost()});
TensorBlock block = block_mapper.blockDescriptor(0);
VERIFY_IS_EQUAL(10, block.dimensions()[0]);
for (int i = 1; i < 5; ++i) {
VERIFY_IS_EQUAL(1, block.dimensions()[i]);
}
VERIFY(block.dimensions().TotalSize() <= max_coeff_count);
} else {
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
const Index max_coeff_count = 1 * 1 * 1 * 1 * 6;
TensorBlockMapper block_mapper(
dims,
{TensorBlockShapeType::kSkewedInnerDims, max_coeff_count, zeroCost()});
TensorBlock block = block_mapper.blockDescriptor(0);
VERIFY_IS_EQUAL(6, block.dimensions()[4]);
for (int i = 3; i >= 0; --i) {
VERIFY_IS_EQUAL(1, block.dimensions()[i]);
}
VERIFY(block.dimensions().TotalSize() <= max_coeff_count);
}
// Test shape 'SkewedInnerDims' with full allocation to inner-most dim.
if (Layout == ColMajor) {
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
const Index max_coeff_count = 11 * 1 * 1 * 1 * 1;
TensorBlockMapper block_mapper(
dims,
{TensorBlockShapeType::kSkewedInnerDims, max_coeff_count, zeroCost()});
TensorBlock block = block_mapper.blockDescriptor(0);
VERIFY_IS_EQUAL(11, block.dimensions()[0]);
for (int i = 1; i < 5; ++i) {
VERIFY_IS_EQUAL(1, block.dimensions()[i]);
}
VERIFY(block.dimensions().TotalSize() <= max_coeff_count);
} else {
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
const Index max_coeff_count = 1 * 1 * 1 * 1 * 7;
TensorBlockMapper block_mapper(
dims,
{TensorBlockShapeType::kSkewedInnerDims, max_coeff_count, zeroCost()});
TensorBlock block = block_mapper.blockDescriptor(0);
VERIFY_IS_EQUAL(7, block.dimensions()[4]);
for (int i = 3; i >= 0; --i) {
VERIFY_IS_EQUAL(1, block.dimensions()[i]);
}
VERIFY(block.dimensions().TotalSize() <= max_coeff_count);
}
// Test shape 'SkewedInnerDims' with full allocation to inner-most dim,
// and partial allocation to second inner-dim.
if (Layout == ColMajor) {
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
const Index max_coeff_count = 11 * 3 * 1 * 1 * 1;
TensorBlockMapper block_mapper(
dims,
{TensorBlockShapeType::kSkewedInnerDims, max_coeff_count, zeroCost()});
TensorBlock block = block_mapper.blockDescriptor(0);
VERIFY_IS_EQUAL(11, block.dimensions()[0]);
VERIFY_IS_EQUAL(3, block.dimensions()[1]);
for (int i = 2; i < 5; ++i) {
VERIFY_IS_EQUAL(1, block.dimensions()[i]);
}
VERIFY(block.dimensions().TotalSize() <= max_coeff_count);
} else {
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
const Index max_coeff_count = 1 * 1 * 1 * 15 * 7;
TensorBlockMapper block_mapper(
dims,
{TensorBlockShapeType::kSkewedInnerDims, max_coeff_count, zeroCost()});
TensorBlock block = block_mapper.blockDescriptor(0);
VERIFY_IS_EQUAL(7, block.dimensions()[4]);
VERIFY_IS_EQUAL(15, block.dimensions()[3]);
for (int i = 2; i >= 0; --i) {
VERIFY_IS_EQUAL(1, block.dimensions()[i]);
}
VERIFY(block.dimensions().TotalSize() <= max_coeff_count);
}
// Test shape 'SkewedInnerDims' with full allocation to inner-most dim,
// and partial allocation to third inner-dim.
if (Layout == ColMajor) {
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
const Index max_coeff_count = 11 * 5 * 5 * 1 * 1;
TensorBlockMapper block_mapper(
dims,
{TensorBlockShapeType::kSkewedInnerDims, max_coeff_count, zeroCost()});
TensorBlock block = block_mapper.blockDescriptor(0);
VERIFY_IS_EQUAL(11, block.dimensions()[0]);
VERIFY_IS_EQUAL(5, block.dimensions()[1]);
VERIFY_IS_EQUAL(5, block.dimensions()[2]);
for (int i = 3; i < 5; ++i) {
VERIFY_IS_EQUAL(1, block.dimensions()[i]);
}
VERIFY(block.dimensions().TotalSize() <= max_coeff_count);
} else {
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
const Index max_coeff_count = 1 * 1 * 5 * 17 * 7;
TensorBlockMapper block_mapper(
dims,
{TensorBlockShapeType::kSkewedInnerDims, max_coeff_count, zeroCost()});
TensorBlock block = block_mapper.blockDescriptor(0);
VERIFY_IS_EQUAL(7, block.dimensions()[4]);
VERIFY_IS_EQUAL(17, block.dimensions()[3]);
VERIFY_IS_EQUAL(5, block.dimensions()[2]);
for (int i = 1; i >= 0; --i) {
VERIFY_IS_EQUAL(1, block.dimensions()[i]);
}
VERIFY(block.dimensions().TotalSize() <= max_coeff_count);
}
// Test shape 'SkewedInnerDims' with full allocation to all dims.
if (Layout == ColMajor) {
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
const Index max_coeff_count = 11 * 5 * 6 * 17 * 7;
TensorBlockMapper block_mapper(
dims,
{TensorBlockShapeType::kSkewedInnerDims, max_coeff_count, zeroCost()});
TensorBlock block = block_mapper.blockDescriptor(0);
VERIFY_IS_EQUAL(11, block.dimensions()[0]);
VERIFY_IS_EQUAL(5, block.dimensions()[1]);
VERIFY_IS_EQUAL(6, block.dimensions()[2]);
VERIFY_IS_EQUAL(17, block.dimensions()[3]);
VERIFY_IS_EQUAL(7, block.dimensions()[4]);
VERIFY(block.dimensions().TotalSize() <= max_coeff_count);
} else {
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
const Index max_coeff_count = 11 * 5 * 6 * 17 * 7;
TensorBlockMapper block_mapper(
dims,
{TensorBlockShapeType::kSkewedInnerDims, max_coeff_count, zeroCost()});
TensorBlock block = block_mapper.blockDescriptor(0);
VERIFY_IS_EQUAL(7, block.dimensions()[4]);
VERIFY_IS_EQUAL(17, block.dimensions()[3]);
VERIFY_IS_EQUAL(6, block.dimensions()[2]);
VERIFY_IS_EQUAL(5, block.dimensions()[1]);
VERIFY_IS_EQUAL(11, block.dimensions()[0]);
VERIFY(block.dimensions().TotalSize() <= max_coeff_count);
}
}
template <int Layout>
static void test_empty_dims(const internal::TensorBlockShapeType block_shape)
{
// Test blocking of tensors with zero dimensions:
// - we must not crash on asserts and divisions by zero
// - we must not return block with zero dimensions
// (recipe for overflows/underflows, divisions by zero and NaNs later)
// - total block count must be zero
{
typedef internal::TensorBlockMapper<1, Layout> TensorBlockMapper;
DSizes<Index, 1> dims(0);
for (size_t max_coeff_count = 0; max_coeff_count < 2; ++max_coeff_count) {
TensorBlockMapper block_mapper(
dims, {block_shape, max_coeff_count, zeroCost()});
VERIFY_IS_EQUAL(block_mapper.blockCount(), 0);
VERIFY(block_mapper.blockTotalSize() >= 1);
}
}
{
typedef internal::TensorBlockMapper<2, Layout> TensorBlockMapper;
for (int dim1 = 0; dim1 < 3; ++dim1) {
for (int dim2 = 0; dim2 < 3; ++dim2) {
DSizes<Index, 2> dims(dim1, dim2);
for (size_t max_coeff_count = 0; max_coeff_count < 2; ++max_coeff_count) {
TensorBlockMapper block_mapper(
dims, {block_shape, max_coeff_count, zeroCost()});
if (dim1 * dim2 == 0) {
VERIFY_IS_EQUAL(block_mapper.blockCount(), 0);
}
VERIFY(block_mapper.blockTotalSize() >= 1);
}
}
}
}
}
#define TEST_LAYOUTS(NAME) \
CALL_SUBTEST(NAME<ColMajor>()); \
CALL_SUBTEST(NAME<RowMajor>())
#define TEST_LAYOUTS_AND_DIMS(TYPE, NAME) \
CALL_SUBTEST((NAME<TYPE, 1, ColMajor>())); \
CALL_SUBTEST((NAME<TYPE, 1, RowMajor>())); \
CALL_SUBTEST((NAME<TYPE, 2, ColMajor>())); \
CALL_SUBTEST((NAME<TYPE, 2, RowMajor>())); \
CALL_SUBTEST((NAME<TYPE, 3, ColMajor>())); \
CALL_SUBTEST((NAME<TYPE, 3, RowMajor>())); \
CALL_SUBTEST((NAME<TYPE, 4, ColMajor>())); \
CALL_SUBTEST((NAME<TYPE, 4, RowMajor>())); \
CALL_SUBTEST((NAME<TYPE, 5, ColMajor>())); \
CALL_SUBTEST((NAME<TYPE, 5, RowMajor>()))
#define TEST_LAYOUTS_WITH_ARG(NAME, ARG) \
CALL_SUBTEST(NAME<ColMajor>(ARG)); \
CALL_SUBTEST(NAME<RowMajor>(ARG))
EIGEN_DECLARE_TEST(cxx11_tensor_block_access) {
TEST_LAYOUTS(test_block_mapper_sanity);
TEST_LAYOUTS_AND_DIMS(float, test_block_mapper_maps_every_element);
TEST_LAYOUTS(test_uniform_block_shape);
TEST_LAYOUTS(test_skewed_inner_dim_block_shape);
TEST_LAYOUTS_WITH_ARG(test_empty_dims, TensorBlockShapeType::kUniformAllDims);
TEST_LAYOUTS_WITH_ARG(test_empty_dims, TensorBlockShapeType::kSkewedInnerDims);
}
#undef TEST_LAYOUTS
#undef TEST_LAYOUTS_WITH_ARG

View File

@@ -0,0 +1,858 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
// clang-format off
#include "main.h"
#include <Eigen/CXX11/Tensor>
// clang-format on
using Eigen::internal::TensorBlockDescriptor;
using Eigen::internal::TensorExecutor;
// -------------------------------------------------------------------------- //
// Utility functions to generate random tensors, blocks, and evaluate them.
template <int NumDims>
static DSizes<Index, NumDims> RandomDims(Index min, Index max) {
DSizes<Index, NumDims> dims;
for (int i = 0; i < NumDims; ++i) {
dims[i] = internal::random<Index>(min, max);
}
return DSizes<Index, NumDims>(dims);
}
// Block offsets and extents allows to construct a TensorSlicingOp corresponding
// to a TensorBlockDescriptor.
template <int NumDims>
struct TensorBlockParams {
DSizes<Index, NumDims> offsets;
DSizes<Index, NumDims> sizes;
TensorBlockDescriptor<NumDims, Index> desc;
};
template <int Layout, int NumDims>
static TensorBlockParams<NumDims> RandomBlock(DSizes<Index, NumDims> dims,
Index min, Index max) {
// Choose random offsets and sizes along all tensor dimensions.
DSizes<Index, NumDims> offsets(RandomDims<NumDims>(min, max));
DSizes<Index, NumDims> sizes(RandomDims<NumDims>(min, max));
// Make sure that offset + size do not overflow dims.
for (int i = 0; i < NumDims; ++i) {
offsets[i] = numext::mini(dims[i] - 1, offsets[i]);
sizes[i] = numext::mini(sizes[i], dims[i] - offsets[i]);
}
Index offset = 0;
DSizes<Index, NumDims> strides = Eigen::internal::strides<Layout>(dims);
for (int i = 0; i < NumDims; ++i) {
offset += strides[i] * offsets[i];
}
return {offsets, sizes, TensorBlockDescriptor<NumDims, Index>(offset, sizes)};
}
// Generate block with block sizes skewed towards inner dimensions. This type of
// block is required for evaluating broadcast expressions.
template <int Layout, int NumDims>
static TensorBlockParams<NumDims> SkewedInnerBlock(
DSizes<Index, NumDims> dims) {
using BlockMapper = internal::TensorBlockMapper<NumDims, Layout, Index>;
BlockMapper block_mapper(dims,
{internal::TensorBlockShapeType::kSkewedInnerDims,
internal::random<size_t>(1, dims.TotalSize()),
{0, 0, 0}});
Index total_blocks = block_mapper.blockCount();
Index block_index = internal::random<Index>(0, total_blocks - 1);
auto block = block_mapper.blockDescriptor(block_index);
DSizes<Index, NumDims> sizes = block.dimensions();
auto strides = internal::strides<Layout>(dims);
DSizes<Index, NumDims> offsets;
// Compute offsets for the first block coefficient.
Index index = block.offset();
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
for (int i = NumDims - 1; i > 0; --i) {
const Index idx = index / strides[i];
index -= idx * strides[i];
offsets[i] = idx;
}
if (NumDims > 0) offsets[0] = index;
} else {
for (int i = 0; i < NumDims - 1; ++i) {
const Index idx = index / strides[i];
index -= idx * strides[i];
offsets[i] = idx;
}
if (NumDims > 0) offsets[NumDims - 1] = index;
}
return {offsets, sizes, block};
}
template <int NumDims>
static TensorBlockParams<NumDims> FixedSizeBlock(DSizes<Index, NumDims> dims) {
DSizes<Index, NumDims> offsets;
for (int i = 0; i < NumDims; ++i) offsets[i] = 0;
return {offsets, dims, TensorBlockDescriptor<NumDims, Index>(0, dims)};
}
inline Eigen::IndexList<Index, Eigen::type2index<1>> NByOne(Index n) {
Eigen::IndexList<Index, Eigen::type2index<1>> ret;
ret.set(0, n);
return ret;
}
inline Eigen::IndexList<Eigen::type2index<1>, Index> OneByM(Index m) {
Eigen::IndexList<Eigen::type2index<1>, Index> ret;
ret.set(1, m);
return ret;
}
// -------------------------------------------------------------------------- //
// Verify that block expression evaluation produces the same result as a
// TensorSliceOp (reading a tensor block is same to taking a tensor slice).
template <typename T, int NumDims, int Layout, typename Expression,
typename GenBlockParams>
static void VerifyBlockEvaluator(Expression expr, GenBlockParams gen_block) {
using Device = DefaultDevice;
auto d = Device();
// Scratch memory allocator for block evaluation.
typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
TensorBlockScratch scratch(d);
// TensorEvaluator is needed to produce tensor blocks of the expression.
auto eval = TensorEvaluator<const decltype(expr), Device>(expr, d);
eval.evalSubExprsIfNeeded(nullptr);
// Choose a random offsets, sizes and TensorBlockDescriptor.
TensorBlockParams<NumDims> block_params = gen_block();
// Evaluate TensorBlock expression into a tensor.
Tensor<T, NumDims, Layout> block(block_params.desc.dimensions());
// Dimensions for the potential destination buffer.
DSizes<Index, NumDims> dst_dims;
if (internal::random<bool>()) {
dst_dims = block_params.desc.dimensions();
} else {
for (int i = 0; i < NumDims; ++i) {
Index extent = internal::random<Index>(0, 5);
dst_dims[i] = block_params.desc.dimension(i) + extent;
}
}
// Maybe use this tensor as a block desc destination.
Tensor<T, NumDims, Layout> dst(dst_dims);
dst.setZero();
if (internal::random<bool>()) {
block_params.desc.template AddDestinationBuffer<Layout>(
dst.data(), internal::strides<Layout>(dst.dimensions()));
}
const bool root_of_expr = internal::random<bool>();
auto tensor_block = eval.block(block_params.desc, scratch, root_of_expr);
if (tensor_block.kind() == internal::TensorBlockKind::kMaterializedInOutput) {
// Copy data from destination buffer.
if (dimensions_match(dst.dimensions(), block.dimensions())) {
block = dst;
} else {
DSizes<Index, NumDims> offsets;
for (int i = 0; i < NumDims; ++i) offsets[i] = 0;
block = dst.slice(offsets, block.dimensions());
}
} else {
// Assign to block from expression.
auto b_expr = tensor_block.expr();
// We explicitly disable vectorization and tiling, to run a simple coefficient
// wise assignment loop, because it's very simple and should be correct.
using BlockAssign = TensorAssignOp<decltype(block), const decltype(b_expr)>;
using BlockExecutor = TensorExecutor<const BlockAssign, Device, false,
internal::TiledEvaluation::Off>;
BlockExecutor::run(BlockAssign(block, b_expr), d);
}
// Cleanup temporary buffers owned by a tensor block.
tensor_block.cleanup();
// Compute a Tensor slice corresponding to a Tensor block.
Tensor<T, NumDims, Layout> slice(block_params.desc.dimensions());
auto s_expr = expr.slice(block_params.offsets, block_params.sizes);
// Explicitly use coefficient assignment to evaluate slice expression.
using SliceAssign = TensorAssignOp<decltype(slice), const decltype(s_expr)>;
using SliceExecutor = TensorExecutor<const SliceAssign, Device, false,
internal::TiledEvaluation::Off>;
SliceExecutor::run(SliceAssign(slice, s_expr), d);
// Tensor block and tensor slice must be the same.
for (Index i = 0; i < block.dimensions().TotalSize(); ++i) {
VERIFY_IS_EQUAL(block.coeff(i), slice.coeff(i));
}
}
// -------------------------------------------------------------------------- //
template <typename T, int NumDims, int Layout>
static void test_eval_tensor_block() {
DSizes<Index, NumDims> dims = RandomDims<NumDims>(10, 20);
Tensor<T, NumDims, Layout> input(dims);
input.setRandom();
// Identity tensor expression transformation.
VerifyBlockEvaluator<T, NumDims, Layout>(
input, [&dims]() { return RandomBlock<Layout>(dims, 1, 10); });
}
template <typename T, int NumDims, int Layout>
static void test_eval_tensor_unary_expr_block() {
DSizes<Index, NumDims> dims = RandomDims<NumDims>(10, 20);
Tensor<T, NumDims, Layout> input(dims);
input.setRandom();
VerifyBlockEvaluator<T, NumDims, Layout>(
input.abs(), [&dims]() { return RandomBlock<Layout>(dims, 1, 10); });
}
template <typename T, int NumDims, int Layout>
static void test_eval_tensor_binary_expr_block() {
DSizes<Index, NumDims> dims = RandomDims<NumDims>(10, 20);
Tensor<T, NumDims, Layout> lhs(dims), rhs(dims);
lhs.setRandom();
rhs.setRandom();
VerifyBlockEvaluator<T, NumDims, Layout>(
lhs * rhs, [&dims]() { return RandomBlock<Layout>(dims, 1, 10); });
}
template <typename T, int NumDims, int Layout>
static void test_eval_tensor_binary_with_unary_expr_block() {
DSizes<Index, NumDims> dims = RandomDims<NumDims>(10, 20);
Tensor<T, NumDims, Layout> lhs(dims), rhs(dims);
lhs.setRandom();
rhs.setRandom();
VerifyBlockEvaluator<T, NumDims, Layout>(
(lhs.square() + rhs.square()).sqrt(),
[&dims]() { return RandomBlock<Layout>(dims, 1, 10); });
}
template <typename T, int NumDims, int Layout>
static void test_eval_tensor_broadcast() {
DSizes<Index, NumDims> dims = RandomDims<NumDims>(1, 10);
Tensor<T, NumDims, Layout> input(dims);
input.setRandom();
DSizes<Index, NumDims> bcast = RandomDims<NumDims>(1, 5);
DSizes<Index, NumDims> bcasted_dims;
for (int i = 0; i < NumDims; ++i) bcasted_dims[i] = dims[i] * bcast[i];
VerifyBlockEvaluator<T, NumDims, Layout>(
input.broadcast(bcast),
[&bcasted_dims]() { return SkewedInnerBlock<Layout>(bcasted_dims); });
VerifyBlockEvaluator<T, NumDims, Layout>(
input.broadcast(bcast),
[&bcasted_dims]() { return RandomBlock<Layout>(bcasted_dims, 5, 10); });
VerifyBlockEvaluator<T, NumDims, Layout>(
input.broadcast(bcast),
[&bcasted_dims]() { return FixedSizeBlock(bcasted_dims); });
// Check that desc.destination() memory is not shared between two broadcast
// materializations.
VerifyBlockEvaluator<T, NumDims, Layout>(
input.broadcast(bcast) * input.abs().broadcast(bcast),
[&bcasted_dims]() { return SkewedInnerBlock<Layout>(bcasted_dims); });
}
template <typename T, int NumDims, int Layout>
static void test_eval_tensor_reshape() {
DSizes<Index, NumDims> dims = RandomDims<NumDims>(1, 10);
DSizes<Index, NumDims> shuffled = dims;
std::shuffle(&shuffled[0], &shuffled[NumDims - 1], std::mt19937(g_seed));
Tensor<T, NumDims, Layout> input(dims);
input.setRandom();
VerifyBlockEvaluator<T, NumDims, Layout>(
input.reshape(shuffled),
[&shuffled]() { return RandomBlock<Layout>(shuffled, 1, 10); });
VerifyBlockEvaluator<T, NumDims, Layout>(
input.reshape(shuffled),
[&shuffled]() { return SkewedInnerBlock<Layout>(shuffled); });
}
template <typename T, int NumDims, int Layout>
static void test_eval_tensor_cast() {
DSizes<Index, NumDims> dims = RandomDims<NumDims>(10, 20);
Tensor<T, NumDims, Layout> input(dims);
input.setRandom();
VerifyBlockEvaluator<T, NumDims, Layout>(
input.template cast<int>().template cast<T>(),
[&dims]() { return RandomBlock<Layout>(dims, 1, 10); });
}
template <typename T, int NumDims, int Layout>
static void test_eval_tensor_select() {
DSizes<Index, NumDims> dims = RandomDims<NumDims>(10, 20);
Tensor<T, NumDims, Layout> lhs(dims);
Tensor<T, NumDims, Layout> rhs(dims);
Tensor<bool, NumDims, Layout> cond(dims);
lhs.setRandom();
rhs.setRandom();
cond.setRandom();
VerifyBlockEvaluator<T, NumDims, Layout>(cond.select(lhs, rhs), [&dims]() {
return RandomBlock<Layout>(dims, 1, 20);
});
}
template <typename T, int NumDims, int Layout>
static void test_eval_tensor_padding() {
const int inner_dim = Layout == static_cast<int>(ColMajor) ? 0 : NumDims - 1;
DSizes<Index, NumDims> dims = RandomDims<NumDims>(10, 20);
Tensor<T, NumDims, Layout> input(dims);
input.setRandom();
DSizes<Index, NumDims> pad_before = RandomDims<NumDims>(0, 4);
DSizes<Index, NumDims> pad_after = RandomDims<NumDims>(0, 4);
array<std::pair<Index, Index>, NumDims> paddings;
for (int i = 0; i < NumDims; ++i) {
paddings[i] = std::make_pair(pad_before[i], pad_after[i]);
}
// Test squeezing reads from inner dim.
if (internal::random<bool>()) {
pad_before[inner_dim] = 0;
pad_after[inner_dim] = 0;
paddings[inner_dim] = std::make_pair(0, 0);
}
DSizes<Index, NumDims> padded_dims;
for (int i = 0; i < NumDims; ++i) {
padded_dims[i] = dims[i] + pad_before[i] + pad_after[i];
}
VerifyBlockEvaluator<T, NumDims, Layout>(
input.pad(paddings),
[&padded_dims]() { return FixedSizeBlock(padded_dims); });
VerifyBlockEvaluator<T, NumDims, Layout>(
input.pad(paddings),
[&padded_dims]() { return RandomBlock<Layout>(padded_dims, 1, 10); });
VerifyBlockEvaluator<T, NumDims, Layout>(
input.pad(paddings),
[&padded_dims]() { return SkewedInnerBlock<Layout>(padded_dims); });
}
template <typename T, int NumDims, int Layout>
static void test_eval_tensor_chipping() {
DSizes<Index, NumDims> dims = RandomDims<NumDims>(10, 20);
Tensor<T, NumDims, Layout> input(dims);
input.setRandom();
Index chip_dim = internal::random<int>(0, NumDims - 1);
Index chip_offset = internal::random<Index>(0, dims[chip_dim] - 2);
DSizes<Index, NumDims - 1> chipped_dims;
for (Index i = 0; i < chip_dim; ++i) {
chipped_dims[i] = dims[i];
}
for (Index i = chip_dim + 1; i < NumDims; ++i) {
chipped_dims[i - 1] = dims[i];
}
// Block buffer forwarding.
VerifyBlockEvaluator<T, NumDims - 1, Layout>(
input.chip(chip_offset, chip_dim),
[&chipped_dims]() { return FixedSizeBlock(chipped_dims); });
VerifyBlockEvaluator<T, NumDims - 1, Layout>(
input.chip(chip_offset, chip_dim),
[&chipped_dims]() { return RandomBlock<Layout>(chipped_dims, 1, 10); });
// Block expression assignment.
VerifyBlockEvaluator<T, NumDims - 1, Layout>(
input.abs().chip(chip_offset, chip_dim),
[&chipped_dims]() { return FixedSizeBlock(chipped_dims); });
VerifyBlockEvaluator<T, NumDims - 1, Layout>(
input.abs().chip(chip_offset, chip_dim),
[&chipped_dims]() { return RandomBlock<Layout>(chipped_dims, 1, 10); });
}
template<typename T, int NumDims>
struct SimpleTensorGenerator {
T operator()(const array<Index, NumDims>& coords) const {
T result = static_cast<T>(0);
for (int i = 0; i < NumDims; ++i) {
result += static_cast<T>((i + 1) * coords[i]);
}
return result;
}
};
// Boolean specialization to avoid -Wint-in-bool-context warnings on GCC.
template<int NumDims>
struct SimpleTensorGenerator<bool, NumDims> {
bool operator()(const array<Index, NumDims>& coords) const {
bool result = false;
for (int i = 0; i < NumDims; ++i) {
result ^= coords[i];
}
return result;
}
};
template <typename T, int NumDims, int Layout>
static void test_eval_tensor_generator() {
DSizes<Index, NumDims> dims = RandomDims<NumDims>(10, 20);
Tensor<T, NumDims, Layout> input(dims);
input.setRandom();
auto generator = SimpleTensorGenerator<T, NumDims>();
VerifyBlockEvaluator<T, NumDims, Layout>(
input.generate(generator), [&dims]() { return FixedSizeBlock(dims); });
VerifyBlockEvaluator<T, NumDims, Layout>(
input.generate(generator),
[&dims]() { return RandomBlock<Layout>(dims, 1, 10); });
}
template <typename T, int NumDims, int Layout>
static void test_eval_tensor_reverse() {
DSizes<Index, NumDims> dims = RandomDims<NumDims>(10, 20);
Tensor<T, NumDims, Layout> input(dims);
input.setRandom();
// Randomly reverse dimensions.
Eigen::DSizes<bool, NumDims> reverse;
for (int i = 0; i < NumDims; ++i) reverse[i] = internal::random<bool>();
VerifyBlockEvaluator<T, NumDims, Layout>(
input.reverse(reverse), [&dims]() { return FixedSizeBlock(dims); });
VerifyBlockEvaluator<T, NumDims, Layout>(input.reverse(reverse), [&dims]() {
return RandomBlock<Layout>(dims, 1, 10);
});
}
template <typename T, int NumDims, int Layout>
static void test_eval_tensor_slice() {
DSizes<Index, NumDims> dims = RandomDims<NumDims>(10, 20);
Tensor<T, NumDims, Layout> input(dims);
input.setRandom();
// Pick a random slice of an input tensor.
DSizes<Index, NumDims> slice_start = RandomDims<NumDims>(5, 10);
DSizes<Index, NumDims> slice_size = RandomDims<NumDims>(5, 10);
// Make sure that slice start + size do not overflow tensor dims.
for (int i = 0; i < NumDims; ++i) {
slice_start[i] = numext::mini(dims[i] - 1, slice_start[i]);
slice_size[i] = numext::mini(slice_size[i], dims[i] - slice_start[i]);
}
VerifyBlockEvaluator<T, NumDims, Layout>(
input.slice(slice_start, slice_size),
[&slice_size]() { return FixedSizeBlock(slice_size); });
VerifyBlockEvaluator<T, NumDims, Layout>(
input.slice(slice_start, slice_size),
[&slice_size]() { return RandomBlock<Layout>(slice_size, 1, 10); });
}
template <typename T, int NumDims, int Layout>
static void test_eval_tensor_shuffle() {
DSizes<Index, NumDims> dims = RandomDims<NumDims>(5, 15);
Tensor<T, NumDims, Layout> input(dims);
input.setRandom();
DSizes<Index, NumDims> shuffle;
for (int i = 0; i < NumDims; ++i) shuffle[i] = i;
do {
DSizes<Index, NumDims> shuffled_dims;
for (int i = 0; i < NumDims; ++i) shuffled_dims[i] = dims[shuffle[i]];
VerifyBlockEvaluator<T, NumDims, Layout>(
input.shuffle(shuffle),
[&shuffled_dims]() { return FixedSizeBlock(shuffled_dims); });
VerifyBlockEvaluator<T, NumDims, Layout>(
input.shuffle(shuffle), [&shuffled_dims]() {
return RandomBlock<Layout>(shuffled_dims, 1, 5);
});
break;
} while (std::next_permutation(&shuffle[0], &shuffle[0] + NumDims));
}
template <typename T, int Layout>
static void test_eval_tensor_reshape_with_bcast() {
Index dim = internal::random<Index>(1, 100);
Tensor<T, 2, Layout> lhs(1, dim);
Tensor<T, 2, Layout> rhs(dim, 1);
lhs.setRandom();
rhs.setRandom();
auto reshapeLhs = NByOne(dim);
auto reshapeRhs = OneByM(dim);
auto bcastLhs = OneByM(dim);
auto bcastRhs = NByOne(dim);
DSizes<Index, 2> dims(dim, dim);
VerifyBlockEvaluator<T, 2, Layout>(
lhs.reshape(reshapeLhs).broadcast(bcastLhs) *
rhs.reshape(reshapeRhs).broadcast(bcastRhs),
[dims]() { return SkewedInnerBlock<Layout, 2>(dims); });
}
template <typename T, int Layout>
static void test_eval_tensor_forced_eval() {
Index dim = internal::random<Index>(1, 100);
Tensor<T, 2, Layout> lhs(dim, 1);
Tensor<T, 2, Layout> rhs(1, dim);
lhs.setRandom();
rhs.setRandom();
auto bcastLhs = OneByM(dim);
auto bcastRhs = NByOne(dim);
DSizes<Index, 2> dims(dim, dim);
VerifyBlockEvaluator<T, 2, Layout>(
(lhs.broadcast(bcastLhs) * rhs.broadcast(bcastRhs)).eval().reshape(dims),
[dims]() { return SkewedInnerBlock<Layout, 2>(dims); });
VerifyBlockEvaluator<T, 2, Layout>(
(lhs.broadcast(bcastLhs) * rhs.broadcast(bcastRhs)).eval().reshape(dims),
[dims]() { return RandomBlock<Layout, 2>(dims, 1, 50); });
}
template <typename T, int Layout>
static void test_eval_tensor_chipping_of_bcast() {
if (Layout != static_cast<int>(RowMajor)) return;
Index dim0 = internal::random<Index>(1, 10);
Index dim1 = internal::random<Index>(1, 10);
Index dim2 = internal::random<Index>(1, 10);
Tensor<T, 3, Layout> input(1, dim1, dim2);
input.setRandom();
Eigen::array<Index, 3> bcast = {{dim0, 1, 1}};
DSizes<Index, 2> chipped_dims(dim0, dim2);
VerifyBlockEvaluator<T, 2, Layout>(
input.broadcast(bcast).chip(0, 1),
[chipped_dims]() { return FixedSizeBlock(chipped_dims); });
VerifyBlockEvaluator<T, 2, Layout>(
input.broadcast(bcast).chip(0, 1),
[chipped_dims]() { return SkewedInnerBlock<Layout, 2>(chipped_dims); });
VerifyBlockEvaluator<T, 2, Layout>(
input.broadcast(bcast).chip(0, 1),
[chipped_dims]() { return RandomBlock<Layout, 2>(chipped_dims, 1, 5); });
}
// -------------------------------------------------------------------------- //
// Verify that assigning block to a Tensor expression produces the same result
// as an assignment to TensorSliceOp (writing a block is is identical to
// assigning one tensor to a slice of another tensor).
template <typename T, int NumDims, int Layout, int NumExprDims = NumDims,
typename Expression, typename GenBlockParams>
static void VerifyBlockAssignment(Tensor<T, NumDims, Layout>& tensor,
Expression expr, GenBlockParams gen_block) {
using Device = DefaultDevice;
auto d = Device();
// We use tensor evaluator as a target for block and slice assignments.
auto eval = TensorEvaluator<decltype(expr), Device>(expr, d);
// Generate a random block, or choose a block that fits in full expression.
TensorBlockParams<NumExprDims> block_params = gen_block();
// Generate random data of the selected block size.
Tensor<T, NumExprDims, Layout> block(block_params.desc.dimensions());
block.setRandom();
// ************************************************************************ //
// (1) Assignment from a block.
// Construct a materialize block from a random generated block tensor.
internal::TensorMaterializedBlock<T, NumExprDims, Layout> blk(
internal::TensorBlockKind::kView, block.data(), block.dimensions());
// Reset all underlying tensor values to zero.
tensor.setZero();
// Use evaluator to write block into a tensor.
eval.writeBlock(block_params.desc, blk);
// Make a copy of the result after assignment.
Tensor<T, NumDims, Layout> block_assigned = tensor;
// ************************************************************************ //
// (2) Assignment to a slice
// Reset all underlying tensor values to zero.
tensor.setZero();
// Assign block to a slice of original expression
auto s_expr = expr.slice(block_params.offsets, block_params.sizes);
// Explicitly use coefficient assignment to evaluate slice expression.
using SliceAssign = TensorAssignOp<decltype(s_expr), const decltype(block)>;
using SliceExecutor = TensorExecutor<const SliceAssign, Device, false,
internal::TiledEvaluation::Off>;
SliceExecutor::run(SliceAssign(s_expr, block), d);
// Make a copy of the result after assignment.
Tensor<T, NumDims, Layout> slice_assigned = tensor;
for (Index i = 0; i < tensor.dimensions().TotalSize(); ++i) {
VERIFY_IS_EQUAL(block_assigned.coeff(i), slice_assigned.coeff(i));
}
}
// -------------------------------------------------------------------------- //
template <typename T, int NumDims, int Layout>
static void test_assign_to_tensor() {
DSizes<Index, NumDims> dims = RandomDims<NumDims>(10, 20);
Tensor<T, NumDims, Layout> tensor(dims);
TensorMap<Tensor<T, NumDims, Layout>> map(tensor.data(), dims);
VerifyBlockAssignment<T, NumDims, Layout>(
tensor, map, [&dims]() { return RandomBlock<Layout>(dims, 10, 20); });
VerifyBlockAssignment<T, NumDims, Layout>(
tensor, map, [&dims]() { return FixedSizeBlock(dims); });
}
template <typename T, int NumDims, int Layout>
static void test_assign_to_tensor_reshape() {
DSizes<Index, NumDims> dims = RandomDims<NumDims>(10, 20);
Tensor<T, NumDims, Layout> tensor(dims);
TensorMap<Tensor<T, NumDims, Layout>> map(tensor.data(), dims);
DSizes<Index, NumDims> shuffled = dims;
std::shuffle(&shuffled[0], &shuffled[NumDims - 1], std::mt19937(g_seed));
VerifyBlockAssignment<T, NumDims, Layout>(
tensor, map.reshape(shuffled),
[&shuffled]() { return RandomBlock<Layout>(shuffled, 1, 10); });
VerifyBlockAssignment<T, NumDims, Layout>(
tensor, map.reshape(shuffled),
[&shuffled]() { return SkewedInnerBlock<Layout>(shuffled); });
VerifyBlockAssignment<T, NumDims, Layout>(
tensor, map.reshape(shuffled),
[&shuffled]() { return FixedSizeBlock(shuffled); });
}
template <typename T, int NumDims, int Layout>
static void test_assign_to_tensor_chipping() {
DSizes<Index, NumDims> dims = RandomDims<NumDims>(10, 20);
Tensor<T, NumDims, Layout> tensor(dims);
Index chip_dim = internal::random<int>(0, NumDims - 1);
Index chip_offset = internal::random<Index>(0, dims[chip_dim] - 2);
DSizes<Index, NumDims - 1> chipped_dims;
for (Index i = 0; i < chip_dim; ++i) {
chipped_dims[i] = dims[i];
}
for (Index i = chip_dim + 1; i < NumDims; ++i) {
chipped_dims[i - 1] = dims[i];
}
TensorMap<Tensor<T, NumDims, Layout>> map(tensor.data(), dims);
VerifyBlockAssignment<T, NumDims, Layout, NumDims - 1>(
tensor, map.chip(chip_offset, chip_dim),
[&chipped_dims]() { return RandomBlock<Layout>(chipped_dims, 1, 10); });
VerifyBlockAssignment<T, NumDims, Layout, NumDims - 1>(
tensor, map.chip(chip_offset, chip_dim),
[&chipped_dims]() { return SkewedInnerBlock<Layout>(chipped_dims); });
VerifyBlockAssignment<T, NumDims, Layout, NumDims - 1>(
tensor, map.chip(chip_offset, chip_dim),
[&chipped_dims]() { return FixedSizeBlock(chipped_dims); });
}
template <typename T, int NumDims, int Layout>
static void test_assign_to_tensor_slice() {
DSizes<Index, NumDims> dims = RandomDims<NumDims>(10, 20);
Tensor<T, NumDims, Layout> tensor(dims);
// Pick a random slice of tensor.
DSizes<Index, NumDims> slice_start = RandomDims<NumDims>(5, 10);
DSizes<Index, NumDims> slice_size = RandomDims<NumDims>(5, 10);
// Make sure that slice start + size do not overflow tensor dims.
for (int i = 0; i < NumDims; ++i) {
slice_start[i] = numext::mini(dims[i] - 1, slice_start[i]);
slice_size[i] = numext::mini(slice_size[i], dims[i] - slice_start[i]);
}
TensorMap<Tensor<T, NumDims, Layout>> map(tensor.data(), dims);
VerifyBlockAssignment<T, NumDims, Layout>(
tensor, map.slice(slice_start, slice_size),
[&slice_size]() { return RandomBlock<Layout>(slice_size, 1, 10); });
VerifyBlockAssignment<T, NumDims, Layout>(
tensor, map.slice(slice_start, slice_size),
[&slice_size]() { return SkewedInnerBlock<Layout>(slice_size); });
VerifyBlockAssignment<T, NumDims, Layout>(
tensor, map.slice(slice_start, slice_size),
[&slice_size]() { return FixedSizeBlock(slice_size); });
}
template <typename T, int NumDims, int Layout>
static void test_assign_to_tensor_shuffle() {
DSizes<Index, NumDims> dims = RandomDims<NumDims>(5, 15);
Tensor<T, NumDims, Layout> tensor(dims);
DSizes<Index, NumDims> shuffle;
for (int i = 0; i < NumDims; ++i) shuffle[i] = i;
TensorMap<Tensor<T, NumDims, Layout>> map(tensor.data(), dims);
do {
DSizes<Index, NumDims> shuffled_dims;
for (int i = 0; i < NumDims; ++i) shuffled_dims[i] = dims[shuffle[i]];
VerifyBlockAssignment<T, NumDims, Layout>(
tensor, map.shuffle(shuffle),
[&shuffled_dims]() { return FixedSizeBlock(shuffled_dims); });
VerifyBlockAssignment<T, NumDims, Layout>(
tensor, map.shuffle(shuffle), [&shuffled_dims]() {
return RandomBlock<Layout>(shuffled_dims, 1, 5);
});
} while (std::next_permutation(&shuffle[0], &shuffle[0] + NumDims));
}
// -------------------------------------------------------------------------- //
#define CALL_SUBTEST_PART(PART) \
CALL_SUBTEST_##PART
#define CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(PART, NAME) \
CALL_SUBTEST_PART(PART)((NAME<float, 1, RowMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<float, 2, RowMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<float, 3, RowMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<float, 4, RowMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<float, 5, RowMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<float, 1, ColMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<float, 2, ColMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<float, 4, ColMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<float, 4, ColMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<float, 5, ColMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<int, 1, RowMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<int, 2, RowMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<int, 3, RowMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<int, 4, RowMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<int, 5, RowMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<int, 1, ColMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<int, 2, ColMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<int, 4, ColMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<int, 4, ColMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<int, 5, ColMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<bool, 1, RowMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<bool, 2, RowMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<bool, 3, RowMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<bool, 4, RowMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<bool, 5, RowMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<bool, 1, ColMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<bool, 2, ColMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<bool, 4, ColMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<bool, 4, ColMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<bool, 5, ColMajor>()))
#define CALL_SUBTESTS_DIMS_LAYOUTS(PART, NAME) \
CALL_SUBTEST_PART(PART)((NAME<float, 1, RowMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<float, 2, RowMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<float, 3, RowMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<float, 4, RowMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<float, 5, RowMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<float, 1, ColMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<float, 2, ColMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<float, 4, ColMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<float, 4, ColMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<float, 5, ColMajor>()))
#define CALL_SUBTESTS_LAYOUTS_TYPES(PART, NAME) \
CALL_SUBTEST_PART(PART)((NAME<float, RowMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<float, ColMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<bool, RowMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<bool, ColMajor>()))
EIGEN_DECLARE_TEST(cxx11_tensor_block_eval) {
// clang-format off
CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(1, test_eval_tensor_block);
CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(1, test_eval_tensor_binary_expr_block);
CALL_SUBTESTS_DIMS_LAYOUTS(1, test_eval_tensor_unary_expr_block);
CALL_SUBTESTS_DIMS_LAYOUTS(2, test_eval_tensor_binary_with_unary_expr_block);
CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(2, test_eval_tensor_broadcast);
CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(2, test_eval_tensor_reshape);
CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(3, test_eval_tensor_cast);
CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(3, test_eval_tensor_select);
CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(3, test_eval_tensor_padding);
CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(4, test_eval_tensor_chipping);
CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(4, test_eval_tensor_generator);
CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(4, test_eval_tensor_reverse);
CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(5, test_eval_tensor_slice);
CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(5, test_eval_tensor_shuffle);
CALL_SUBTESTS_LAYOUTS_TYPES(6, test_eval_tensor_reshape_with_bcast);
CALL_SUBTESTS_LAYOUTS_TYPES(6, test_eval_tensor_forced_eval);
CALL_SUBTESTS_LAYOUTS_TYPES(6, test_eval_tensor_chipping_of_bcast);
CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(7, test_assign_to_tensor);
CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(7, test_assign_to_tensor_reshape);
CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(7, test_assign_to_tensor_chipping);
CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(8, test_assign_to_tensor_slice);
CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(8, test_assign_to_tensor_shuffle);
// Force CMake to split this test.
// EIGEN_SUFFIXES;1;2;3;4;5;6;7;8
// clang-format on
}

View File

@@ -0,0 +1,445 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
// clang-format off
#include "main.h"
#include <Eigen/CXX11/Tensor>
// clang-format on
// -------------------------------------------------------------------------- //
// A set of tests for TensorBlockIO: copying data between tensor blocks.
template <int NumDims>
static DSizes<Index, NumDims> RandomDims(Index min, Index max) {
DSizes<Index, NumDims> dims;
for (int i = 0; i < NumDims; ++i) {
dims[i] = internal::random<Index>(min, max);
}
return DSizes<Index, NumDims>(dims);
}
static internal::TensorBlockShapeType RandomBlockShape() {
return internal::random<bool>()
? internal::TensorBlockShapeType::kUniformAllDims
: internal::TensorBlockShapeType::kSkewedInnerDims;
}
template <int NumDims>
static size_t RandomTargetBlockSize(const DSizes<Index, NumDims>& dims) {
return internal::random<size_t>(1, dims.TotalSize());
}
template <int Layout, int NumDims>
static Index GetInputIndex(Index output_index,
const array<Index, NumDims>& output_to_input_dim_map,
const array<Index, NumDims>& input_strides,
const array<Index, NumDims>& output_strides) {
int input_index = 0;
if (Layout == ColMajor) {
for (int i = NumDims - 1; i > 0; --i) {
const Index idx = output_index / output_strides[i];
input_index += idx * input_strides[output_to_input_dim_map[i]];
output_index -= idx * output_strides[i];
}
return input_index +
output_index * input_strides[output_to_input_dim_map[0]];
} else {
for (int i = 0; i < NumDims - 1; ++i) {
const Index idx = output_index / output_strides[i];
input_index += idx * input_strides[output_to_input_dim_map[i]];
output_index -= idx * output_strides[i];
}
return input_index +
output_index * input_strides[output_to_input_dim_map[NumDims - 1]];
}
}
template <typename T, int NumDims, int Layout>
static void test_block_io_copy_data_from_source_to_target() {
using TensorBlockIO = internal::TensorBlockIO<T, Index, NumDims, Layout>;
using IODst = typename TensorBlockIO::Dst;
using IOSrc = typename TensorBlockIO::Src;
// Generate a random input Tensor.
DSizes<Index, NumDims> dims = RandomDims<NumDims>(1, 30);
Tensor<T, NumDims, Layout> input(dims);
input.setRandom();
// Write data to an output Tensor.
Tensor<T, NumDims, Layout> output(dims);
// Construct a tensor block mapper.
using TensorBlockMapper =
internal::TensorBlockMapper<NumDims, Layout, Index>;
TensorBlockMapper block_mapper(
dims, {RandomBlockShape(), RandomTargetBlockSize(dims), {0, 0, 0}});
// We will copy data from input to output through this buffer.
Tensor<T, NumDims, Layout> block(block_mapper.blockDimensions());
// Precompute strides for TensorBlockIO::Copy.
auto input_strides = internal::strides<Layout>(dims);
auto output_strides = internal::strides<Layout>(dims);
const T* input_data = input.data();
T* output_data = output.data();
T* block_data = block.data();
for (int i = 0; i < block_mapper.blockCount(); ++i) {
auto desc = block_mapper.blockDescriptor(i);
auto blk_dims = desc.dimensions();
auto blk_strides = internal::strides<Layout>(blk_dims);
{
// Read from input into a block buffer.
IODst dst(blk_dims, blk_strides, block_data, 0);
IOSrc src(input_strides, input_data, desc.offset());
TensorBlockIO::Copy(dst, src);
}
{
// Write from block buffer to output.
IODst dst(blk_dims, output_strides, output_data, desc.offset());
IOSrc src(blk_strides, block_data, 0);
TensorBlockIO::Copy(dst, src);
}
}
for (int i = 0; i < dims.TotalSize(); ++i) {
VERIFY_IS_EQUAL(input_data[i], output_data[i]);
}
}
template <typename T, int NumDims, int Layout>
static void test_block_io_copy_using_reordered_dimensions() {
// Generate a random input Tensor.
DSizes<Index, NumDims> dims = RandomDims<NumDims>(1, 30);
Tensor<T, NumDims, Layout> input(dims);
input.setRandom();
// Create a random dimension re-ordering/shuffle.
std::vector<int> shuffle;
for (int i = 0; i < NumDims; ++i) shuffle.push_back(i);
std::shuffle(shuffle.begin(), shuffle.end(), std::mt19937(g_seed));
DSizes<Index, NumDims> output_tensor_dims;
DSizes<Index, NumDims> input_to_output_dim_map;
DSizes<Index, NumDims> output_to_input_dim_map;
for (Index i = 0; i < NumDims; ++i) {
output_tensor_dims[shuffle[i]] = dims[i];
input_to_output_dim_map[i] = shuffle[i];
output_to_input_dim_map[shuffle[i]] = i;
}
// Write data to an output Tensor.
Tensor<T, NumDims, Layout> output(output_tensor_dims);
// Construct a tensor block mapper.
// NOTE: Tensor block mapper works with shuffled dimensions.
using TensorBlockMapper =
internal::TensorBlockMapper<NumDims, Layout, Index>;
TensorBlockMapper block_mapper(output_tensor_dims,
{RandomBlockShape(),
RandomTargetBlockSize(output_tensor_dims),
{0, 0, 0}});
// We will copy data from input to output through this buffer.
Tensor<T, NumDims, Layout> block(block_mapper.blockDimensions());
// Precompute strides for TensorBlockIO::Copy.
auto input_strides = internal::strides<Layout>(dims);
auto output_strides = internal::strides<Layout>(output_tensor_dims);
const T* input_data = input.data();
T* output_data = output.data();
T* block_data = block.data();
for (Index i = 0; i < block_mapper.blockCount(); ++i) {
auto desc = block_mapper.blockDescriptor(i);
const Index first_coeff_index = GetInputIndex<Layout, NumDims>(
desc.offset(), output_to_input_dim_map, input_strides,
output_strides);
// NOTE: Block dimensions are in the same order as output dimensions.
using TensorBlockIO = internal::TensorBlockIO<T, Index, NumDims, Layout>;
using IODst = typename TensorBlockIO::Dst;
using IOSrc = typename TensorBlockIO::Src;
auto blk_dims = desc.dimensions();
auto blk_strides = internal::strides<Layout>(blk_dims);
{
// Read from input into a block buffer.
IODst dst(blk_dims, blk_strides, block_data, 0);
IOSrc src(input_strides, input_data, first_coeff_index);
// TODO(ezhulenev): Remove when fully switched to TensorBlock.
DSizes<int, NumDims> dim_map;
for (int j = 0; j < NumDims; ++j)
dim_map[j] = static_cast<int>(output_to_input_dim_map[j]);
TensorBlockIO::Copy(dst, src, /*dst_to_src_dim_map=*/dim_map);
}
{
// We need to convert block dimensions from output to input order.
auto dst_dims = blk_dims;
for (int out_dim = 0; out_dim < NumDims; ++out_dim) {
dst_dims[output_to_input_dim_map[out_dim]] = blk_dims[out_dim];
}
// Write from block buffer to output.
IODst dst(dst_dims, input_strides, output_data, first_coeff_index);
IOSrc src(blk_strides, block_data, 0);
// TODO(ezhulenev): Remove when fully switched to TensorBlock.
DSizes<int, NumDims> dim_map;
for (int j = 0; j < NumDims; ++j)
dim_map[j] = static_cast<int>(input_to_output_dim_map[j]);
TensorBlockIO::Copy(dst, src, /*dst_to_src_dim_map=*/dim_map);
}
}
for (Index i = 0; i < dims.TotalSize(); ++i) {
VERIFY_IS_EQUAL(input_data[i], output_data[i]);
}
}
// This is the special case for reading data with reordering, when dimensions
// before/after reordering are the same. Squeezing reads along inner dimensions
// in this case is illegal, because we reorder innermost dimension.
template <int Layout>
static void test_block_io_copy_using_reordered_dimensions_do_not_squeeze() {
DSizes<Index, 3> tensor_dims(7, 9, 7);
DSizes<Index, 3> block_dims = tensor_dims;
DSizes<int, 3> block_to_tensor_dim;
block_to_tensor_dim[0] = 2;
block_to_tensor_dim[1] = 1;
block_to_tensor_dim[2] = 0;
auto tensor_strides = internal::strides<Layout>(tensor_dims);
auto block_strides = internal::strides<Layout>(block_dims);
Tensor<float, 3, Layout> block(block_dims);
Tensor<float, 3, Layout> tensor(tensor_dims);
tensor.setRandom();
float* tensor_data = tensor.data();
float* block_data = block.data();
using TensorBlockIO = internal::TensorBlockIO<float, Index, 3, Layout>;
using IODst = typename TensorBlockIO::Dst;
using IOSrc = typename TensorBlockIO::Src;
// Read from a tensor into a block.
IODst dst(block_dims, block_strides, block_data, 0);
IOSrc src(tensor_strides, tensor_data, 0);
TensorBlockIO::Copy(dst, src, /*dst_to_src_dim_map=*/block_to_tensor_dim);
TensorMap<Tensor<float, 3, Layout> > block_tensor(block_data, block_dims);
TensorMap<Tensor<float, 3, Layout> > tensor_tensor(tensor_data, tensor_dims);
for (Index d0 = 0; d0 < tensor_dims[0]; ++d0) {
for (Index d1 = 0; d1 < tensor_dims[1]; ++d1) {
for (Index d2 = 0; d2 < tensor_dims[2]; ++d2) {
float block_value = block_tensor(d2, d1, d0);
float tensor_value = tensor_tensor(d0, d1, d2);
VERIFY_IS_EQUAL(block_value, tensor_value);
}
}
}
}
// This is the special case for reading data with reordering, when dimensions
// before/after reordering are the same. Squeezing reads in this case is allowed
// because we reorder outer dimensions.
template <int Layout>
static void test_block_io_copy_using_reordered_dimensions_squeeze() {
DSizes<Index, 4> tensor_dims(7, 5, 9, 9);
DSizes<Index, 4> block_dims = tensor_dims;
DSizes<int, 4> block_to_tensor_dim;
block_to_tensor_dim[0] = 0;
block_to_tensor_dim[1] = 1;
block_to_tensor_dim[2] = 3;
block_to_tensor_dim[3] = 2;
auto tensor_strides = internal::strides<Layout>(tensor_dims);
auto block_strides = internal::strides<Layout>(block_dims);
Tensor<float, 4, Layout> block(block_dims);
Tensor<float, 4, Layout> tensor(tensor_dims);
tensor.setRandom();
float* tensor_data = tensor.data();
float* block_data = block.data();
using TensorBlockIO = internal::TensorBlockIO<float, Index, 4, Layout>;
using IODst = typename TensorBlockIO::Dst;
using IOSrc = typename TensorBlockIO::Src;
// Read from a tensor into a block.
IODst dst(block_dims, block_strides, block_data, 0);
IOSrc src(tensor_strides, tensor_data, 0);
TensorBlockIO::Copy(dst, src, /*dst_to_src_dim_map=*/block_to_tensor_dim);
TensorMap<Tensor<float, 4, Layout> > block_tensor(block_data, block_dims);
TensorMap<Tensor<float, 4, Layout> > tensor_tensor(tensor_data, tensor_dims);
for (Index d0 = 0; d0 < tensor_dims[0]; ++d0) {
for (Index d1 = 0; d1 < tensor_dims[1]; ++d1) {
for (Index d2 = 0; d2 < tensor_dims[2]; ++d2) {
for (Index d3 = 0; d3 < tensor_dims[3]; ++d3) {
float block_value = block_tensor(d0, d1, d3, d2);
float tensor_value = tensor_tensor(d0, d1, d2, d3);
VERIFY_IS_EQUAL(block_value, tensor_value);
}
}
}
}
}
template <int Layout>
static void test_block_io_zero_stride() {
DSizes<Index, 5> rnd_dims = RandomDims<5>(1, 30);
DSizes<Index, 5> input_tensor_dims = rnd_dims;
input_tensor_dims[0] = 1;
input_tensor_dims[2] = 1;
input_tensor_dims[4] = 1;
Tensor<float, 5, Layout> input(input_tensor_dims);
input.setRandom();
DSizes<Index, 5> output_tensor_dims = rnd_dims;
auto input_tensor_strides = internal::strides<Layout>(input_tensor_dims);
auto output_tensor_strides = internal::strides<Layout>(output_tensor_dims);
auto input_tensor_strides_with_zeros = input_tensor_strides;
input_tensor_strides_with_zeros[0] = 0;
input_tensor_strides_with_zeros[2] = 0;
input_tensor_strides_with_zeros[4] = 0;
Tensor<float, 5, Layout> output(output_tensor_dims);
output.setRandom();
using TensorBlockIO = internal::TensorBlockIO<float, Index, 5, Layout>;
using IODst = typename TensorBlockIO::Dst;
using IOSrc = typename TensorBlockIO::Src;
// Write data from input to output with broadcasting in dims [0, 2, 4].
IODst dst(output_tensor_dims, output_tensor_strides, output.data(), 0);
IOSrc src(input_tensor_strides_with_zeros, input.data(), 0);
TensorBlockIO::Copy(dst, src);
for (int i = 0; i < output_tensor_dims[0]; ++i) {
for (int j = 0; j < output_tensor_dims[1]; ++j) {
for (int k = 0; k < output_tensor_dims[2]; ++k) {
for (int l = 0; l < output_tensor_dims[3]; ++l) {
for (int m = 0; m < output_tensor_dims[4]; ++m) {
float input_value = input(0, j, 0, l, 0);
float output_value = output(i, j, k, l, m);
VERIFY_IS_EQUAL(input_value, output_value);
}
}
}
}
}
}
template <int Layout>
static void test_block_io_squeeze_ones() {
using TensorBlockIO = internal::TensorBlockIO<float, Index, 5, Layout>;
using IODst = typename TensorBlockIO::Dst;
using IOSrc = typename TensorBlockIO::Src;
// Total size > 1.
{
DSizes<Index, 5> block_sizes(1, 2, 1, 2, 1);
auto strides = internal::strides<Layout>(block_sizes);
// Create a random input tensor.
Tensor<float, 5> input(block_sizes);
input.setRandom();
Tensor<float, 5> output(block_sizes);
IODst dst(block_sizes, strides, output.data(), 0);
IOSrc src(strides, input.data());
TensorBlockIO::Copy(dst, src);
for (Index i = 0; i < block_sizes.TotalSize(); ++i) {
VERIFY_IS_EQUAL(output.data()[i], input.data()[i]);
}
}
// Total size == 1.
{
DSizes<Index, 5> block_sizes(1, 1, 1, 1, 1);
auto strides = internal::strides<Layout>(block_sizes);
// Create a random input tensor.
Tensor<float, 5> input(block_sizes);
input.setRandom();
Tensor<float, 5> output(block_sizes);
IODst dst(block_sizes, strides, output.data(), 0);
IOSrc src(strides, input.data());
TensorBlockIO::Copy(dst, src);
for (Index i = 0; i < block_sizes.TotalSize(); ++i) {
VERIFY_IS_EQUAL(output.data()[i], input.data()[i]);
}
}
}
#define CALL_SUBTESTS(NAME) \
CALL_SUBTEST((NAME<float, 1, RowMajor>())); \
CALL_SUBTEST((NAME<float, 2, RowMajor>())); \
CALL_SUBTEST((NAME<float, 4, RowMajor>())); \
CALL_SUBTEST((NAME<float, 5, RowMajor>())); \
CALL_SUBTEST((NAME<float, 1, ColMajor>())); \
CALL_SUBTEST((NAME<float, 2, ColMajor>())); \
CALL_SUBTEST((NAME<float, 4, ColMajor>())); \
CALL_SUBTEST((NAME<float, 5, ColMajor>())); \
CALL_SUBTEST((NAME<bool, 1, RowMajor>())); \
CALL_SUBTEST((NAME<bool, 2, RowMajor>())); \
CALL_SUBTEST((NAME<bool, 4, RowMajor>())); \
CALL_SUBTEST((NAME<bool, 5, RowMajor>())); \
CALL_SUBTEST((NAME<bool, 1, ColMajor>())); \
CALL_SUBTEST((NAME<bool, 2, ColMajor>())); \
CALL_SUBTEST((NAME<bool, 4, ColMajor>())); \
CALL_SUBTEST((NAME<bool, 5, ColMajor>()))
EIGEN_DECLARE_TEST(cxx11_tensor_block_io) {
// clang-format off
CALL_SUBTESTS(test_block_io_copy_data_from_source_to_target);
CALL_SUBTESTS(test_block_io_copy_using_reordered_dimensions);
CALL_SUBTEST(test_block_io_copy_using_reordered_dimensions_do_not_squeeze<RowMajor>());
CALL_SUBTEST(test_block_io_copy_using_reordered_dimensions_do_not_squeeze<ColMajor>());
CALL_SUBTEST(test_block_io_copy_using_reordered_dimensions_squeeze<RowMajor>());
CALL_SUBTEST(test_block_io_copy_using_reordered_dimensions_squeeze<ColMajor>());
CALL_SUBTEST(test_block_io_zero_stride<RowMajor>());
CALL_SUBTEST(test_block_io_zero_stride<ColMajor>());
CALL_SUBTEST(test_block_io_squeeze_ones<RowMajor>());
CALL_SUBTEST(test_block_io_squeeze_ones<ColMajor>());
// clang-format on
}

View File

@@ -0,0 +1,144 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2016
// Mehdi Goli Codeplay Software Ltd.
// Ralph Potter Codeplay Software Ltd.
// Luke Iwanski Codeplay Software Ltd.
// Contact: <eigen@codeplay.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#define EIGEN_TEST_NO_LONGDOUBLE
#define EIGEN_TEST_NO_COMPLEX
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
#define EIGEN_USE_SYCL
#include "main.h"
#include <unsupported/Eigen/CXX11/Tensor>
using Eigen::array;
using Eigen::SyclDevice;
using Eigen::Tensor;
using Eigen::TensorMap;
template <typename DataType, int DataLayout, typename IndexType>
static void test_broadcast_sycl_fixed(const Eigen::SyclDevice &sycl_device){
// BROADCAST test:
IndexType inDim1=2;
IndexType inDim2=3;
IndexType inDim3=5;
IndexType inDim4=7;
IndexType bDim1=2;
IndexType bDim2=3;
IndexType bDim3=1;
IndexType bDim4=4;
array<IndexType, 4> in_range = {{inDim1, inDim2, inDim3, inDim4}};
array<IndexType, 4> broadcasts = {{bDim1, bDim2, bDim3, bDim4}};
array<IndexType, 4> out_range; // = in_range * broadcasts
for (size_t i = 0; i < out_range.size(); ++i)
out_range[i] = in_range[i] * broadcasts[i];
Tensor<DataType, 4, DataLayout, IndexType> input(in_range);
Tensor<DataType, 4, DataLayout, IndexType> out(out_range);
for (size_t i = 0; i < in_range.size(); ++i)
VERIFY_IS_EQUAL(out.dimension(i), out_range[i]);
for (IndexType i = 0; i < input.size(); ++i)
input(i) = static_cast<DataType>(i);
DataType * gpu_in_data = static_cast<DataType*>(sycl_device.allocate(input.dimensions().TotalSize()*sizeof(DataType)));
DataType * gpu_out_data = static_cast<DataType*>(sycl_device.allocate(out.dimensions().TotalSize()*sizeof(DataType)));
TensorMap<TensorFixedSize<DataType, Sizes<2, 3, 5, 7>, DataLayout, IndexType>> gpu_in(gpu_in_data, in_range);
TensorMap<Tensor<DataType, 4, DataLayout, IndexType>> gpu_out(gpu_out_data, out_range);
sycl_device.memcpyHostToDevice(gpu_in_data, input.data(),(input.dimensions().TotalSize())*sizeof(DataType));
gpu_out.device(sycl_device) = gpu_in.broadcast(broadcasts);
sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(DataType));
for (IndexType i = 0; i < inDim1*bDim1; ++i) {
for (IndexType j = 0; j < inDim2*bDim2; ++j) {
for (IndexType k = 0; k < inDim3*bDim3; ++k) {
for (IndexType l = 0; l < inDim4*bDim4; ++l) {
VERIFY_IS_APPROX(input(i%2,j%3,k%5,l%7), out(i,j,k,l));
}
}
}
}
printf("Broadcast Test with fixed size Passed\n");
sycl_device.deallocate(gpu_in_data);
sycl_device.deallocate(gpu_out_data);
}
template <typename DataType, int DataLayout, typename IndexType>
static void test_broadcast_sycl(const Eigen::SyclDevice &sycl_device){
// BROADCAST test:
IndexType inDim1=2;
IndexType inDim2=3;
IndexType inDim3=5;
IndexType inDim4=7;
IndexType bDim1=2;
IndexType bDim2=3;
IndexType bDim3=1;
IndexType bDim4=4;
array<IndexType, 4> in_range = {{inDim1, inDim2, inDim3, inDim4}};
array<IndexType, 4> broadcasts = {{bDim1, bDim2, bDim3, bDim4}};
array<IndexType, 4> out_range; // = in_range * broadcasts
for (size_t i = 0; i < out_range.size(); ++i)
out_range[i] = in_range[i] * broadcasts[i];
Tensor<DataType, 4, DataLayout, IndexType> input(in_range);
Tensor<DataType, 4, DataLayout, IndexType> out(out_range);
for (size_t i = 0; i < in_range.size(); ++i)
VERIFY_IS_EQUAL(out.dimension(i), out_range[i]);
for (IndexType i = 0; i < input.size(); ++i)
input(i) = static_cast<DataType>(i);
DataType * gpu_in_data = static_cast<DataType*>(sycl_device.allocate(input.dimensions().TotalSize()*sizeof(DataType)));
DataType * gpu_out_data = static_cast<DataType*>(sycl_device.allocate(out.dimensions().TotalSize()*sizeof(DataType)));
TensorMap<Tensor<DataType, 4, DataLayout, IndexType>> gpu_in(gpu_in_data, in_range);
TensorMap<Tensor<DataType, 4, DataLayout, IndexType>> gpu_out(gpu_out_data, out_range);
sycl_device.memcpyHostToDevice(gpu_in_data, input.data(),(input.dimensions().TotalSize())*sizeof(DataType));
gpu_out.device(sycl_device) = gpu_in.broadcast(broadcasts);
sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(DataType));
for (IndexType i = 0; i < inDim1*bDim1; ++i) {
for (IndexType j = 0; j < inDim2*bDim2; ++j) {
for (IndexType k = 0; k < inDim3*bDim3; ++k) {
for (IndexType l = 0; l < inDim4*bDim4; ++l) {
VERIFY_IS_APPROX(input(i%inDim1,j%inDim2,k%inDim3,l%inDim4), out(i,j,k,l));
}
}
}
}
printf("Broadcast Test Passed\n");
sycl_device.deallocate(gpu_in_data);
sycl_device.deallocate(gpu_out_data);
}
template<typename DataType> void sycl_broadcast_test_per_device(const cl::sycl::device& d){
std::cout << "Running on " << d.template get_info<cl::sycl::info::device::name>() << std::endl;
QueueInterface queueInterface(d);
auto sycl_device = Eigen::SyclDevice(&queueInterface);
test_broadcast_sycl<DataType, RowMajor, int64_t>(sycl_device);
test_broadcast_sycl<DataType, ColMajor, int64_t>(sycl_device);
test_broadcast_sycl_fixed<DataType, RowMajor, int64_t>(sycl_device);
test_broadcast_sycl_fixed<DataType, ColMajor, int64_t>(sycl_device);
}
EIGEN_DECLARE_TEST(cxx11_tensor_broadcast_sycl) {
for (const auto& device :Eigen::get_sycl_supported_devices()) {
CALL_SUBTEST(sycl_broadcast_test_per_device<float>(device));
}
}

View File

@@ -0,0 +1,331 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
#include <Eigen/CXX11/Tensor>
using Eigen::Tensor;
template <int DataLayout>
static void test_simple_broadcasting()
{
Tensor<float, 4, DataLayout> tensor(2,3,5,7);
tensor.setRandom();
array<ptrdiff_t, 4> broadcasts;
broadcasts[0] = 1;
broadcasts[1] = 1;
broadcasts[2] = 1;
broadcasts[3] = 1;
Tensor<float, 4, DataLayout> no_broadcast;
no_broadcast = tensor.broadcast(broadcasts);
VERIFY_IS_EQUAL(no_broadcast.dimension(0), 2);
VERIFY_IS_EQUAL(no_broadcast.dimension(1), 3);
VERIFY_IS_EQUAL(no_broadcast.dimension(2), 5);
VERIFY_IS_EQUAL(no_broadcast.dimension(3), 7);
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 5; ++k) {
for (int l = 0; l < 7; ++l) {
VERIFY_IS_EQUAL(tensor(i,j,k,l), no_broadcast(i,j,k,l));
}
}
}
}
broadcasts[0] = 2;
broadcasts[1] = 3;
broadcasts[2] = 1;
broadcasts[3] = 4;
Tensor<float, 4, DataLayout> broadcast;
broadcast = tensor.broadcast(broadcasts);
VERIFY_IS_EQUAL(broadcast.dimension(0), 4);
VERIFY_IS_EQUAL(broadcast.dimension(1), 9);
VERIFY_IS_EQUAL(broadcast.dimension(2), 5);
VERIFY_IS_EQUAL(broadcast.dimension(3), 28);
for (int i = 0; i < 4; ++i) {
for (int j = 0; j < 9; ++j) {
for (int k = 0; k < 5; ++k) {
for (int l = 0; l < 28; ++l) {
VERIFY_IS_EQUAL(tensor(i%2,j%3,k%5,l%7), broadcast(i,j,k,l));
}
}
}
}
}
template <int DataLayout>
static void test_vectorized_broadcasting()
{
Tensor<float, 3, DataLayout> tensor(8,3,5);
tensor.setRandom();
array<ptrdiff_t, 3> broadcasts;
broadcasts[0] = 2;
broadcasts[1] = 3;
broadcasts[2] = 4;
Tensor<float, 3, DataLayout> broadcast;
broadcast = tensor.broadcast(broadcasts);
VERIFY_IS_EQUAL(broadcast.dimension(0), 16);
VERIFY_IS_EQUAL(broadcast.dimension(1), 9);
VERIFY_IS_EQUAL(broadcast.dimension(2), 20);
for (int i = 0; i < 16; ++i) {
for (int j = 0; j < 9; ++j) {
for (int k = 0; k < 20; ++k) {
VERIFY_IS_EQUAL(tensor(i%8,j%3,k%5), broadcast(i,j,k));
}
}
}
#if EIGEN_HAS_VARIADIC_TEMPLATES
tensor.resize(11,3,5);
#else
array<Index, 3> new_dims;
new_dims[0] = 11;
new_dims[1] = 3;
new_dims[2] = 5;
tensor.resize(new_dims);
#endif
tensor.setRandom();
broadcast = tensor.broadcast(broadcasts);
VERIFY_IS_EQUAL(broadcast.dimension(0), 22);
VERIFY_IS_EQUAL(broadcast.dimension(1), 9);
VERIFY_IS_EQUAL(broadcast.dimension(2), 20);
for (int i = 0; i < 22; ++i) {
for (int j = 0; j < 9; ++j) {
for (int k = 0; k < 20; ++k) {
VERIFY_IS_EQUAL(tensor(i%11,j%3,k%5), broadcast(i,j,k));
}
}
}
}
template <int DataLayout>
static void test_static_broadcasting()
{
Tensor<float, 3, DataLayout> tensor(8,3,5);
tensor.setRandom();
#if defined(EIGEN_HAS_INDEX_LIST)
Eigen::IndexList<Eigen::type2index<2>, Eigen::type2index<3>, Eigen::type2index<4>> broadcasts;
#else
Eigen::array<int, 3> broadcasts;
broadcasts[0] = 2;
broadcasts[1] = 3;
broadcasts[2] = 4;
#endif
Tensor<float, 3, DataLayout> broadcast;
broadcast = tensor.broadcast(broadcasts);
VERIFY_IS_EQUAL(broadcast.dimension(0), 16);
VERIFY_IS_EQUAL(broadcast.dimension(1), 9);
VERIFY_IS_EQUAL(broadcast.dimension(2), 20);
for (int i = 0; i < 16; ++i) {
for (int j = 0; j < 9; ++j) {
for (int k = 0; k < 20; ++k) {
VERIFY_IS_EQUAL(tensor(i%8,j%3,k%5), broadcast(i,j,k));
}
}
}
#if EIGEN_HAS_VARIADIC_TEMPLATES
tensor.resize(11,3,5);
#else
array<Index, 3> new_dims;
new_dims[0] = 11;
new_dims[1] = 3;
new_dims[2] = 5;
tensor.resize(new_dims);
#endif
tensor.setRandom();
broadcast = tensor.broadcast(broadcasts);
VERIFY_IS_EQUAL(broadcast.dimension(0), 22);
VERIFY_IS_EQUAL(broadcast.dimension(1), 9);
VERIFY_IS_EQUAL(broadcast.dimension(2), 20);
for (int i = 0; i < 22; ++i) {
for (int j = 0; j < 9; ++j) {
for (int k = 0; k < 20; ++k) {
VERIFY_IS_EQUAL(tensor(i%11,j%3,k%5), broadcast(i,j,k));
}
}
}
}
template <int DataLayout>
static void test_fixed_size_broadcasting()
{
// Need to add a [] operator to the Size class for this to work
#if 0
Tensor<float, 1, DataLayout> t1(10);
t1.setRandom();
TensorFixedSize<float, Sizes<1>, DataLayout> t2;
t2 = t2.constant(20.0f);
Tensor<float, 1, DataLayout> t3 = t1 + t2.broadcast(Eigen::array<int, 1>{{10}});
for (int i = 0; i < 10; ++i) {
VERIFY_IS_APPROX(t3(i), t1(i) + t2(0));
}
TensorMap<TensorFixedSize<float, Sizes<1>, DataLayout> > t4(t2.data(), {{1}});
Tensor<float, 1, DataLayout> t5 = t1 + t4.broadcast(Eigen::array<int, 1>{{10}});
for (int i = 0; i < 10; ++i) {
VERIFY_IS_APPROX(t5(i), t1(i) + t2(0));
}
#endif
}
template <int DataLayout>
static void test_simple_broadcasting_one_by_n()
{
Tensor<float, 4, DataLayout> tensor(1,13,5,7);
tensor.setRandom();
array<ptrdiff_t, 4> broadcasts;
broadcasts[0] = 9;
broadcasts[1] = 1;
broadcasts[2] = 1;
broadcasts[3] = 1;
Tensor<float, 4, DataLayout> broadcast;
broadcast = tensor.broadcast(broadcasts);
VERIFY_IS_EQUAL(broadcast.dimension(0), 9);
VERIFY_IS_EQUAL(broadcast.dimension(1), 13);
VERIFY_IS_EQUAL(broadcast.dimension(2), 5);
VERIFY_IS_EQUAL(broadcast.dimension(3), 7);
for (int i = 0; i < 9; ++i) {
for (int j = 0; j < 13; ++j) {
for (int k = 0; k < 5; ++k) {
for (int l = 0; l < 7; ++l) {
VERIFY_IS_EQUAL(tensor(i%1,j%13,k%5,l%7), broadcast(i,j,k,l));
}
}
}
}
}
template <int DataLayout>
static void test_simple_broadcasting_n_by_one()
{
Tensor<float, 4, DataLayout> tensor(7,3,5,1);
tensor.setRandom();
array<ptrdiff_t, 4> broadcasts;
broadcasts[0] = 1;
broadcasts[1] = 1;
broadcasts[2] = 1;
broadcasts[3] = 19;
Tensor<float, 4, DataLayout> broadcast;
broadcast = tensor.broadcast(broadcasts);
VERIFY_IS_EQUAL(broadcast.dimension(0), 7);
VERIFY_IS_EQUAL(broadcast.dimension(1), 3);
VERIFY_IS_EQUAL(broadcast.dimension(2), 5);
VERIFY_IS_EQUAL(broadcast.dimension(3), 19);
for (int i = 0; i < 7; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 5; ++k) {
for (int l = 0; l < 19; ++l) {
VERIFY_IS_EQUAL(tensor(i%7,j%3,k%5,l%1), broadcast(i,j,k,l));
}
}
}
}
}
template <int DataLayout>
static void test_simple_broadcasting_one_by_n_by_one_1d()
{
Tensor<float, 3, DataLayout> tensor(1,7,1);
tensor.setRandom();
array<ptrdiff_t, 3> broadcasts;
broadcasts[0] = 5;
broadcasts[1] = 1;
broadcasts[2] = 13;
Tensor<float, 3, DataLayout> broadcasted;
broadcasted = tensor.broadcast(broadcasts);
VERIFY_IS_EQUAL(broadcasted.dimension(0), 5);
VERIFY_IS_EQUAL(broadcasted.dimension(1), 7);
VERIFY_IS_EQUAL(broadcasted.dimension(2), 13);
for (int i = 0; i < 5; ++i) {
for (int j = 0; j < 7; ++j) {
for (int k = 0; k < 13; ++k) {
VERIFY_IS_EQUAL(tensor(0,j%7,0), broadcasted(i,j,k));
}
}
}
}
template <int DataLayout>
static void test_simple_broadcasting_one_by_n_by_one_2d()
{
Tensor<float, 4, DataLayout> tensor(1,7,13,1);
tensor.setRandom();
array<ptrdiff_t, 4> broadcasts;
broadcasts[0] = 5;
broadcasts[1] = 1;
broadcasts[2] = 1;
broadcasts[3] = 19;
Tensor<float, 4, DataLayout> broadcast;
broadcast = tensor.broadcast(broadcasts);
VERIFY_IS_EQUAL(broadcast.dimension(0), 5);
VERIFY_IS_EQUAL(broadcast.dimension(1), 7);
VERIFY_IS_EQUAL(broadcast.dimension(2), 13);
VERIFY_IS_EQUAL(broadcast.dimension(3), 19);
for (int i = 0; i < 5; ++i) {
for (int j = 0; j < 7; ++j) {
for (int k = 0; k < 13; ++k) {
for (int l = 0; l < 19; ++l) {
VERIFY_IS_EQUAL(tensor(0,j%7,k%13,0), broadcast(i,j,k,l));
}
}
}
}
}
EIGEN_DECLARE_TEST(cxx11_tensor_broadcasting)
{
CALL_SUBTEST(test_simple_broadcasting<ColMajor>());
CALL_SUBTEST(test_simple_broadcasting<RowMajor>());
CALL_SUBTEST(test_vectorized_broadcasting<ColMajor>());
CALL_SUBTEST(test_vectorized_broadcasting<RowMajor>());
CALL_SUBTEST(test_static_broadcasting<ColMajor>());
CALL_SUBTEST(test_static_broadcasting<RowMajor>());
CALL_SUBTEST(test_fixed_size_broadcasting<ColMajor>());
CALL_SUBTEST(test_fixed_size_broadcasting<RowMajor>());
CALL_SUBTEST(test_simple_broadcasting_one_by_n<RowMajor>());
CALL_SUBTEST(test_simple_broadcasting_n_by_one<RowMajor>());
CALL_SUBTEST(test_simple_broadcasting_one_by_n<ColMajor>());
CALL_SUBTEST(test_simple_broadcasting_n_by_one<ColMajor>());
CALL_SUBTEST(test_simple_broadcasting_one_by_n_by_one_1d<ColMajor>());
CALL_SUBTEST(test_simple_broadcasting_one_by_n_by_one_2d<ColMajor>());
CALL_SUBTEST(test_simple_broadcasting_one_by_n_by_one_1d<RowMajor>());
CALL_SUBTEST(test_simple_broadcasting_one_by_n_by_one_2d<RowMajor>());
}

View File

@@ -0,0 +1,354 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2016
// Mehdi Goli Codeplay Software Ltd.
// Ralph Potter Codeplay Software Ltd.
// Luke Iwanski Codeplay Software Ltd.
// Contact: <eigen@codeplay.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#define EIGEN_TEST_NO_LONGDOUBLE
#define EIGEN_TEST_NO_COMPLEX
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
#define EIGEN_USE_SYCL
#include "main.h"
#include <unsupported/Eigen/CXX11/Tensor>
using Eigen::array;
using Eigen::SyclDevice;
using Eigen::Tensor;
using Eigen::TensorMap;
// Functions used to compare the TensorMap implementation on the device with
// the equivalent on the host
namespace cl {
namespace sycl {
template <typename T> T abs(T x) { return cl::sycl::fabs(x); }
template <typename T> T square(T x) { return x * x; }
template <typename T> T cube(T x) { return x * x * x; }
template <typename T> T inverse(T x) { return T(1) / x; }
template <typename T> T cwiseMax(T x, T y) { return cl::sycl::max(x, y); }
template <typename T> T cwiseMin(T x, T y) { return cl::sycl::min(x, y); }
}
}
struct EqualAssignement {
template <typename Lhs, typename Rhs>
void operator()(Lhs& lhs, const Rhs& rhs) { lhs = rhs; }
};
struct PlusEqualAssignement {
template <typename Lhs, typename Rhs>
void operator()(Lhs& lhs, const Rhs& rhs) { lhs += rhs; }
};
template <typename DataType, int DataLayout,
typename Assignement, typename Operator>
void test_unary_builtins_for_scalar(const Eigen::SyclDevice& sycl_device,
const array<int64_t, 3>& tensor_range) {
Operator op;
Assignement asgn;
{
/* Assignement(out, Operator(in)) */
Tensor<DataType, 3, DataLayout, int64_t> in(tensor_range);
Tensor<DataType, 3, DataLayout, int64_t> out(tensor_range);
in = in.random() + DataType(0.01);
out = out.random() + DataType(0.01);
Tensor<DataType, 3, DataLayout, int64_t> reference(out);
DataType *gpu_data = static_cast<DataType *>(
sycl_device.allocate(in.size() * sizeof(DataType)));
DataType *gpu_data_out = static_cast<DataType *>(
sycl_device.allocate(out.size() * sizeof(DataType)));
TensorMap<Tensor<DataType, 3, DataLayout, int64_t>> gpu(gpu_data, tensor_range);
TensorMap<Tensor<DataType, 3, DataLayout, int64_t>> gpu_out(gpu_data_out, tensor_range);
sycl_device.memcpyHostToDevice(gpu_data, in.data(),
(in.size()) * sizeof(DataType));
sycl_device.memcpyHostToDevice(gpu_data_out, out.data(),
(out.size()) * sizeof(DataType));
auto device_expr = gpu_out.device(sycl_device);
asgn(device_expr, op(gpu));
sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out,
(out.size()) * sizeof(DataType));
for (int64_t i = 0; i < out.size(); ++i) {
DataType ver = reference(i);
asgn(ver, op(in(i)));
VERIFY_IS_APPROX(out(i), ver);
}
sycl_device.deallocate(gpu_data);
sycl_device.deallocate(gpu_data_out);
}
{
/* Assignement(out, Operator(out)) */
Tensor<DataType, 3, DataLayout, int64_t> out(tensor_range);
out = out.random() + DataType(0.01);
Tensor<DataType, 3, DataLayout, int64_t> reference(out);
DataType *gpu_data_out = static_cast<DataType *>(
sycl_device.allocate(out.size() * sizeof(DataType)));
TensorMap<Tensor<DataType, 3, DataLayout, int64_t>> gpu_out(gpu_data_out, tensor_range);
sycl_device.memcpyHostToDevice(gpu_data_out, out.data(),
(out.size()) * sizeof(DataType));
auto device_expr = gpu_out.device(sycl_device);
asgn(device_expr, op(gpu_out));
sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out,
(out.size()) * sizeof(DataType));
for (int64_t i = 0; i < out.size(); ++i) {
DataType ver = reference(i);
asgn(ver, op(reference(i)));
VERIFY_IS_APPROX(out(i), ver);
}
sycl_device.deallocate(gpu_data_out);
}
}
#define DECLARE_UNARY_STRUCT(FUNC) \
struct op_##FUNC { \
template <typename T> \
auto operator()(const T& x) -> decltype(cl::sycl::FUNC(x)) { \
return cl::sycl::FUNC(x); \
} \
template <typename T> \
auto operator()(const TensorMap<T>& x) -> decltype(x.FUNC()) { \
return x.FUNC(); \
} \
};
DECLARE_UNARY_STRUCT(abs)
DECLARE_UNARY_STRUCT(sqrt)
DECLARE_UNARY_STRUCT(rsqrt)
DECLARE_UNARY_STRUCT(square)
DECLARE_UNARY_STRUCT(cube)
DECLARE_UNARY_STRUCT(inverse)
DECLARE_UNARY_STRUCT(tanh)
DECLARE_UNARY_STRUCT(exp)
DECLARE_UNARY_STRUCT(expm1)
DECLARE_UNARY_STRUCT(log)
DECLARE_UNARY_STRUCT(ceil)
DECLARE_UNARY_STRUCT(floor)
DECLARE_UNARY_STRUCT(round)
DECLARE_UNARY_STRUCT(log1p)
DECLARE_UNARY_STRUCT(sign)
DECLARE_UNARY_STRUCT(isnan)
DECLARE_UNARY_STRUCT(isfinite)
DECLARE_UNARY_STRUCT(isinf)
template <typename DataType, int DataLayout, typename Assignement>
void test_unary_builtins_for_assignement(const Eigen::SyclDevice& sycl_device,
const array<int64_t, 3>& tensor_range) {
#define RUN_UNARY_TEST(FUNC) \
test_unary_builtins_for_scalar<DataType, DataLayout, Assignement, \
op_##FUNC>(sycl_device, tensor_range)
RUN_UNARY_TEST(abs);
RUN_UNARY_TEST(sqrt);
RUN_UNARY_TEST(rsqrt);
RUN_UNARY_TEST(square);
RUN_UNARY_TEST(cube);
RUN_UNARY_TEST(inverse);
RUN_UNARY_TEST(tanh);
RUN_UNARY_TEST(exp);
RUN_UNARY_TEST(expm1);
RUN_UNARY_TEST(log);
RUN_UNARY_TEST(ceil);
RUN_UNARY_TEST(floor);
RUN_UNARY_TEST(round);
RUN_UNARY_TEST(log1p);
RUN_UNARY_TEST(sign);
}
template <typename DataType, int DataLayout, typename Operator>
void test_unary_builtins_return_bool(const Eigen::SyclDevice& sycl_device,
const array<int64_t, 3>& tensor_range) {
/* out = op(in) */
Operator op;
Tensor<DataType, 3, DataLayout, int64_t> in(tensor_range);
Tensor<bool, 3, DataLayout, int64_t> out(tensor_range);
in = in.random() + DataType(0.01);
DataType *gpu_data = static_cast<DataType *>(
sycl_device.allocate(in.size() * sizeof(DataType)));
bool *gpu_data_out =
static_cast<bool *>(sycl_device.allocate(out.size() * sizeof(bool)));
TensorMap<Tensor<DataType, 3, DataLayout, int64_t>> gpu(gpu_data, tensor_range);
TensorMap<Tensor<bool, 3, DataLayout, int64_t>> gpu_out(gpu_data_out, tensor_range);
sycl_device.memcpyHostToDevice(gpu_data, in.data(),
(in.size()) * sizeof(DataType));
gpu_out.device(sycl_device) = op(gpu);
sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out,
(out.size()) * sizeof(bool));
for (int64_t i = 0; i < out.size(); ++i) {
VERIFY_IS_EQUAL(out(i), op(in(i)));
}
sycl_device.deallocate(gpu_data);
sycl_device.deallocate(gpu_data_out);
}
template <typename DataType, int DataLayout>
void test_unary_builtins(const Eigen::SyclDevice& sycl_device,
const array<int64_t, 3>& tensor_range) {
test_unary_builtins_for_assignement<DataType, DataLayout,
PlusEqualAssignement>(sycl_device, tensor_range);
test_unary_builtins_for_assignement<DataType, DataLayout,
EqualAssignement>(sycl_device, tensor_range);
test_unary_builtins_return_bool<DataType, DataLayout,
op_isnan>(sycl_device, tensor_range);
test_unary_builtins_return_bool<DataType, DataLayout,
op_isfinite>(sycl_device, tensor_range);
test_unary_builtins_return_bool<DataType, DataLayout,
op_isinf>(sycl_device, tensor_range);
}
template <typename DataType>
static void test_builtin_unary_sycl(const Eigen::SyclDevice &sycl_device) {
int64_t sizeDim1 = 10;
int64_t sizeDim2 = 10;
int64_t sizeDim3 = 10;
array<int64_t, 3> tensor_range = {{sizeDim1, sizeDim2, sizeDim3}};
test_unary_builtins<DataType, RowMajor>(sycl_device, tensor_range);
test_unary_builtins<DataType, ColMajor>(sycl_device, tensor_range);
}
template <typename DataType, int DataLayout, typename Operator>
void test_binary_builtins_func(const Eigen::SyclDevice& sycl_device,
const array<int64_t, 3>& tensor_range) {
/* out = op(in_1, in_2) */
Operator op;
Tensor<DataType, 3, DataLayout, int64_t> in_1(tensor_range);
Tensor<DataType, 3, DataLayout, int64_t> in_2(tensor_range);
Tensor<DataType, 3, DataLayout, int64_t> out(tensor_range);
in_1 = in_1.random() + DataType(0.01);
in_2 = in_2.random() + DataType(0.01);
Tensor<DataType, 3, DataLayout, int64_t> reference(out);
DataType *gpu_data_1 = static_cast<DataType *>(
sycl_device.allocate(in_1.size() * sizeof(DataType)));
DataType *gpu_data_2 = static_cast<DataType *>(
sycl_device.allocate(in_2.size() * sizeof(DataType)));
DataType *gpu_data_out = static_cast<DataType *>(
sycl_device.allocate(out.size() * sizeof(DataType)));
TensorMap<Tensor<DataType, 3, DataLayout, int64_t>> gpu_1(gpu_data_1, tensor_range);
TensorMap<Tensor<DataType, 3, DataLayout, int64_t>> gpu_2(gpu_data_2, tensor_range);
TensorMap<Tensor<DataType, 3, DataLayout, int64_t>> gpu_out(gpu_data_out, tensor_range);
sycl_device.memcpyHostToDevice(gpu_data_1, in_1.data(),
(in_1.size()) * sizeof(DataType));
sycl_device.memcpyHostToDevice(gpu_data_2, in_2.data(),
(in_2.size()) * sizeof(DataType));
gpu_out.device(sycl_device) = op(gpu_1, gpu_2);
sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out,
(out.size()) * sizeof(DataType));
for (int64_t i = 0; i < out.size(); ++i) {
VERIFY_IS_APPROX(out(i), op(in_1(i), in_2(i)));
}
sycl_device.deallocate(gpu_data_1);
sycl_device.deallocate(gpu_data_2);
sycl_device.deallocate(gpu_data_out);
}
template <typename DataType, int DataLayout, typename Operator>
void test_binary_builtins_fixed_arg2(const Eigen::SyclDevice& sycl_device,
const array<int64_t, 3>& tensor_range) {
/* out = op(in_1, 2) */
Operator op;
const DataType arg2(2);
Tensor<DataType, 3, DataLayout, int64_t> in_1(tensor_range);
Tensor<DataType, 3, DataLayout, int64_t> out(tensor_range);
in_1 = in_1.random();
Tensor<DataType, 3, DataLayout, int64_t> reference(out);
DataType *gpu_data_1 = static_cast<DataType *>(
sycl_device.allocate(in_1.size() * sizeof(DataType)));
DataType *gpu_data_out = static_cast<DataType *>(
sycl_device.allocate(out.size() * sizeof(DataType)));
TensorMap<Tensor<DataType, 3, DataLayout, int64_t>> gpu_1(gpu_data_1, tensor_range);
TensorMap<Tensor<DataType, 3, DataLayout, int64_t>> gpu_out(gpu_data_out, tensor_range);
sycl_device.memcpyHostToDevice(gpu_data_1, in_1.data(),
(in_1.size()) * sizeof(DataType));
gpu_out.device(sycl_device) = op(gpu_1, arg2);
sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out,
(out.size()) * sizeof(DataType));
for (int64_t i = 0; i < out.size(); ++i) {
VERIFY_IS_APPROX(out(i), op(in_1(i), arg2));
}
sycl_device.deallocate(gpu_data_1);
sycl_device.deallocate(gpu_data_out);
}
#define DECLARE_BINARY_STRUCT(FUNC) \
struct op_##FUNC { \
template <typename T1, typename T2> \
auto operator()(const T1& x, const T2& y) -> decltype(cl::sycl::FUNC(x, y)) { \
return cl::sycl::FUNC(x, y); \
} \
template <typename T1, typename T2> \
auto operator()(const TensorMap<T1>& x, const TensorMap<T2>& y) -> decltype(x.FUNC(y)) { \
return x.FUNC(y); \
} \
};
DECLARE_BINARY_STRUCT(cwiseMax)
DECLARE_BINARY_STRUCT(cwiseMin)
#define DECLARE_BINARY_STRUCT_OP(NAME, OPERATOR) \
struct op_##NAME { \
template <typename T1, typename T2> \
auto operator()(const T1& x, const T2& y) -> decltype(x OPERATOR y) { \
return x OPERATOR y; \
} \
};
DECLARE_BINARY_STRUCT_OP(plus, +)
DECLARE_BINARY_STRUCT_OP(minus, -)
DECLARE_BINARY_STRUCT_OP(times, *)
DECLARE_BINARY_STRUCT_OP(divide, /)
DECLARE_BINARY_STRUCT_OP(modulo, %)
template <typename DataType, int DataLayout>
void test_binary_builtins(const Eigen::SyclDevice& sycl_device,
const array<int64_t, 3>& tensor_range) {
test_binary_builtins_func<DataType, DataLayout,
op_cwiseMax>(sycl_device, tensor_range);
test_binary_builtins_func<DataType, DataLayout,
op_cwiseMin>(sycl_device, tensor_range);
test_binary_builtins_func<DataType, DataLayout,
op_plus>(sycl_device, tensor_range);
test_binary_builtins_func<DataType, DataLayout,
op_minus>(sycl_device, tensor_range);
test_binary_builtins_func<DataType, DataLayout,
op_times>(sycl_device, tensor_range);
test_binary_builtins_func<DataType, DataLayout,
op_divide>(sycl_device, tensor_range);
}
template <typename DataType>
static void test_floating_builtin_binary_sycl(const Eigen::SyclDevice &sycl_device) {
int64_t sizeDim1 = 10;
int64_t sizeDim2 = 10;
int64_t sizeDim3 = 10;
array<int64_t, 3> tensor_range = {{sizeDim1, sizeDim2, sizeDim3}};
test_binary_builtins<DataType, RowMajor>(sycl_device, tensor_range);
test_binary_builtins<DataType, ColMajor>(sycl_device, tensor_range);
}
template <typename DataType>
static void test_integer_builtin_binary_sycl(const Eigen::SyclDevice &sycl_device) {
int64_t sizeDim1 = 10;
int64_t sizeDim2 = 10;
int64_t sizeDim3 = 10;
array<int64_t, 3> tensor_range = {{sizeDim1, sizeDim2, sizeDim3}};
test_binary_builtins_fixed_arg2<DataType, RowMajor,
op_modulo>(sycl_device, tensor_range);
test_binary_builtins_fixed_arg2<DataType, ColMajor,
op_modulo>(sycl_device, tensor_range);
}
EIGEN_DECLARE_TEST(cxx11_tensor_builtins_sycl) {
for (const auto& device :Eigen::get_sycl_supported_devices()) {
QueueInterface queueInterface(device);
Eigen::SyclDevice sycl_device(&queueInterface);
CALL_SUBTEST_1(test_builtin_unary_sycl<float>(sycl_device));
CALL_SUBTEST_2(test_floating_builtin_binary_sycl<float>(sycl_device));
CALL_SUBTEST_3(test_integer_builtin_binary_sycl<int>(sycl_device));
}
}

View File

@@ -0,0 +1,79 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#define EIGEN_TEST_NO_LONGDOUBLE
#define EIGEN_TEST_NO_COMPLEX
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
#define EIGEN_USE_GPU
#include "main.h"
#include <unsupported/Eigen/CXX11/Tensor>
using Eigen::Tensor;
void test_gpu_conversion() {
Eigen::GpuStreamDevice stream;
Eigen::GpuDevice gpu_device(&stream);
int num_elem = 101;
Tensor<float, 1> floats(num_elem);
floats.setRandom();
float* d_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
Eigen::half* d_half = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half));
float* d_conv = (float*)gpu_device.allocate(num_elem * sizeof(float));
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float(
d_float, num_elem);
Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_half(
d_half, num_elem);
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_conv(
d_conv, num_elem);
gpu_device.memcpyHostToDevice(d_float, floats.data(), num_elem*sizeof(float));
gpu_half.device(gpu_device) = gpu_float.cast<Eigen::half>();
gpu_conv.device(gpu_device) = gpu_half.cast<float>();
Tensor<float, 1> initial(num_elem);
Tensor<float, 1> final(num_elem);
gpu_device.memcpyDeviceToHost(initial.data(), d_float, num_elem*sizeof(float));
gpu_device.memcpyDeviceToHost(final.data(), d_conv, num_elem*sizeof(float));
gpu_device.synchronize();
for (int i = 0; i < num_elem; ++i) {
VERIFY_IS_APPROX(initial(i), final(i));
}
gpu_device.deallocate(d_float);
gpu_device.deallocate(d_half);
gpu_device.deallocate(d_conv);
}
void test_fallback_conversion() {
int num_elem = 101;
Tensor<float, 1> floats(num_elem);
floats.setRandom();
Eigen::Tensor<Eigen::half, 1> halfs = floats.cast<Eigen::half>();
Eigen::Tensor<float, 1> conv = halfs.cast<float>();
for (int i = 0; i < num_elem; ++i) {
VERIFY_IS_APPROX(floats(i), conv(i));
}
}
EIGEN_DECLARE_TEST(cxx11_tensor_cast_float16_gpu)
{
CALL_SUBTEST(test_gpu_conversion());
CALL_SUBTEST(test_fallback_conversion());
}

View File

@@ -0,0 +1,186 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
#include "random_without_cast_overflow.h"
#include <Eigen/CXX11/Tensor>
using Eigen::Tensor;
using Eigen::array;
static void test_simple_cast()
{
Tensor<float, 2> ftensor(20,30);
ftensor = ftensor.random() * 100.f;
Tensor<char, 2> chartensor(20,30);
chartensor.setRandom();
Tensor<std::complex<float>, 2> cplextensor(20,30);
cplextensor.setRandom();
chartensor = ftensor.cast<char>();
cplextensor = ftensor.cast<std::complex<float> >();
for (int i = 0; i < 20; ++i) {
for (int j = 0; j < 30; ++j) {
VERIFY_IS_EQUAL(chartensor(i,j), static_cast<char>(ftensor(i,j)));
VERIFY_IS_EQUAL(cplextensor(i,j), static_cast<std::complex<float> >(ftensor(i,j)));
}
}
}
static void test_vectorized_cast()
{
Tensor<int, 2> itensor(20,30);
itensor = itensor.random() / 1000;
Tensor<float, 2> ftensor(20,30);
ftensor.setRandom();
Tensor<double, 2> dtensor(20,30);
dtensor.setRandom();
ftensor = itensor.cast<float>();
dtensor = itensor.cast<double>();
for (int i = 0; i < 20; ++i) {
for (int j = 0; j < 30; ++j) {
VERIFY_IS_EQUAL(itensor(i,j), static_cast<int>(ftensor(i,j)));
VERIFY_IS_EQUAL(dtensor(i,j), static_cast<double>(ftensor(i,j)));
}
}
}
static void test_float_to_int_cast()
{
Tensor<float, 2> ftensor(20,30);
ftensor = ftensor.random() * 1000.0f;
Tensor<double, 2> dtensor(20,30);
dtensor = dtensor.random() * 1000.0;
Tensor<int, 2> i1tensor = ftensor.cast<int>();
Tensor<int, 2> i2tensor = dtensor.cast<int>();
for (int i = 0; i < 20; ++i) {
for (int j = 0; j < 30; ++j) {
VERIFY_IS_EQUAL(i1tensor(i,j), static_cast<int>(ftensor(i,j)));
VERIFY_IS_EQUAL(i2tensor(i,j), static_cast<int>(dtensor(i,j)));
}
}
}
static void test_big_to_small_type_cast()
{
Tensor<double, 2> dtensor(20, 30);
dtensor.setRandom();
Tensor<float, 2> ftensor(20, 30);
ftensor = dtensor.cast<float>();
for (int i = 0; i < 20; ++i) {
for (int j = 0; j < 30; ++j) {
VERIFY_IS_APPROX(dtensor(i,j), static_cast<double>(ftensor(i,j)));
}
}
}
static void test_small_to_big_type_cast()
{
Tensor<float, 2> ftensor(20, 30);
ftensor.setRandom();
Tensor<double, 2> dtensor(20, 30);
dtensor = ftensor.cast<double>();
for (int i = 0; i < 20; ++i) {
for (int j = 0; j < 30; ++j) {
VERIFY_IS_APPROX(dtensor(i,j), static_cast<double>(ftensor(i,j)));
}
}
}
template <typename FromType, typename ToType>
static void test_type_cast() {
Tensor<FromType, 2> ftensor(100, 200);
// Generate random values for a valid cast.
for (int i = 0; i < 100; ++i) {
for (int j = 0; j < 200; ++j) {
ftensor(i, j) = internal::random_without_cast_overflow<FromType,ToType>::value();
}
}
Tensor<ToType, 2> ttensor(100, 200);
ttensor = ftensor.template cast<ToType>();
for (int i = 0; i < 100; ++i) {
for (int j = 0; j < 200; ++j) {
const ToType ref = internal::cast<FromType,ToType>(ftensor(i, j));
VERIFY_IS_APPROX(ttensor(i, j), ref);
}
}
}
template<typename Scalar, typename EnableIf = void>
struct test_cast_runner {
static void run() {
test_type_cast<Scalar, bool>();
test_type_cast<Scalar, int8_t>();
test_type_cast<Scalar, int16_t>();
test_type_cast<Scalar, int32_t>();
test_type_cast<Scalar, int64_t>();
test_type_cast<Scalar, uint8_t>();
test_type_cast<Scalar, uint16_t>();
test_type_cast<Scalar, uint32_t>();
test_type_cast<Scalar, uint64_t>();
test_type_cast<Scalar, half>();
test_type_cast<Scalar, bfloat16>();
test_type_cast<Scalar, float>();
test_type_cast<Scalar, double>();
test_type_cast<Scalar, std::complex<float>>();
test_type_cast<Scalar, std::complex<double>>();
}
};
// Only certain types allow cast from std::complex<>.
template<typename Scalar>
struct test_cast_runner<Scalar, typename internal::enable_if<NumTraits<Scalar>::IsComplex>::type> {
static void run() {
test_type_cast<Scalar, half>();
test_type_cast<Scalar, bfloat16>();
test_type_cast<Scalar, std::complex<float>>();
test_type_cast<Scalar, std::complex<double>>();
}
};
EIGEN_DECLARE_TEST(cxx11_tensor_casts)
{
CALL_SUBTEST(test_simple_cast());
CALL_SUBTEST(test_vectorized_cast());
CALL_SUBTEST(test_float_to_int_cast());
CALL_SUBTEST(test_big_to_small_type_cast());
CALL_SUBTEST(test_small_to_big_type_cast());
CALL_SUBTEST(test_cast_runner<bool>::run());
CALL_SUBTEST(test_cast_runner<int8_t>::run());
CALL_SUBTEST(test_cast_runner<int16_t>::run());
CALL_SUBTEST(test_cast_runner<int32_t>::run());
CALL_SUBTEST(test_cast_runner<int64_t>::run());
CALL_SUBTEST(test_cast_runner<uint8_t>::run());
CALL_SUBTEST(test_cast_runner<uint16_t>::run());
CALL_SUBTEST(test_cast_runner<uint32_t>::run());
CALL_SUBTEST(test_cast_runner<uint64_t>::run());
CALL_SUBTEST(test_cast_runner<half>::run());
CALL_SUBTEST(test_cast_runner<bfloat16>::run());
CALL_SUBTEST(test_cast_runner<float>::run());
CALL_SUBTEST(test_cast_runner<double>::run());
CALL_SUBTEST(test_cast_runner<std::complex<float>>::run());
CALL_SUBTEST(test_cast_runner<std::complex<double>>::run());
}

View File

@@ -0,0 +1,425 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
#include <Eigen/CXX11/Tensor>
using Eigen::Tensor;
template<int DataLayout>
static void test_simple_chip()
{
Tensor<float, 5, DataLayout> tensor(2,3,5,7,11);
tensor.setRandom();
Tensor<float, 4, DataLayout> chip1;
chip1 = tensor.template chip<0>(1);
VERIFY_IS_EQUAL(chip1.dimension(0), 3);
VERIFY_IS_EQUAL(chip1.dimension(1), 5);
VERIFY_IS_EQUAL(chip1.dimension(2), 7);
VERIFY_IS_EQUAL(chip1.dimension(3), 11);
for (int i = 0; i < 3; ++i) {
for (int j = 0; j < 5; ++j) {
for (int k = 0; k < 7; ++k) {
for (int l = 0; l < 11; ++l) {
VERIFY_IS_EQUAL(chip1(i,j,k,l), tensor(1,i,j,k,l));
}
}
}
}
Tensor<float, 4, DataLayout> chip2 = tensor.template chip<1>(1);
VERIFY_IS_EQUAL(chip2.dimension(0), 2);
VERIFY_IS_EQUAL(chip2.dimension(1), 5);
VERIFY_IS_EQUAL(chip2.dimension(2), 7);
VERIFY_IS_EQUAL(chip2.dimension(3), 11);
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 5; ++j) {
for (int k = 0; k < 7; ++k) {
for (int l = 0; l < 11; ++l) {
VERIFY_IS_EQUAL(chip2(i,j,k,l), tensor(i,1,j,k,l));
}
}
}
}
Tensor<float, 4, DataLayout> chip3 = tensor.template chip<2>(2);
VERIFY_IS_EQUAL(chip3.dimension(0), 2);
VERIFY_IS_EQUAL(chip3.dimension(1), 3);
VERIFY_IS_EQUAL(chip3.dimension(2), 7);
VERIFY_IS_EQUAL(chip3.dimension(3), 11);
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 7; ++k) {
for (int l = 0; l < 11; ++l) {
VERIFY_IS_EQUAL(chip3(i,j,k,l), tensor(i,j,2,k,l));
}
}
}
}
Tensor<float, 4, DataLayout> chip4(tensor.template chip<3>(5));
VERIFY_IS_EQUAL(chip4.dimension(0), 2);
VERIFY_IS_EQUAL(chip4.dimension(1), 3);
VERIFY_IS_EQUAL(chip4.dimension(2), 5);
VERIFY_IS_EQUAL(chip4.dimension(3), 11);
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 5; ++k) {
for (int l = 0; l < 11; ++l) {
VERIFY_IS_EQUAL(chip4(i,j,k,l), tensor(i,j,k,5,l));
}
}
}
}
Tensor<float, 4, DataLayout> chip5(tensor.template chip<4>(7));
VERIFY_IS_EQUAL(chip5.dimension(0), 2);
VERIFY_IS_EQUAL(chip5.dimension(1), 3);
VERIFY_IS_EQUAL(chip5.dimension(2), 5);
VERIFY_IS_EQUAL(chip5.dimension(3), 7);
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 5; ++k) {
for (int l = 0; l < 7; ++l) {
VERIFY_IS_EQUAL(chip5(i,j,k,l), tensor(i,j,k,l,7));
}
}
}
}
}
template<int DataLayout>
static void test_dynamic_chip()
{
Tensor<float, 5, DataLayout> tensor(2,3,5,7,11);
tensor.setRandom();
Tensor<float, 4, DataLayout> chip1;
chip1 = tensor.chip(1, 0);
VERIFY_IS_EQUAL(chip1.dimension(0), 3);
VERIFY_IS_EQUAL(chip1.dimension(1), 5);
VERIFY_IS_EQUAL(chip1.dimension(2), 7);
VERIFY_IS_EQUAL(chip1.dimension(3), 11);
for (int i = 0; i < 3; ++i) {
for (int j = 0; j < 5; ++j) {
for (int k = 0; k < 7; ++k) {
for (int l = 0; l < 11; ++l) {
VERIFY_IS_EQUAL(chip1(i,j,k,l), tensor(1,i,j,k,l));
}
}
}
}
Tensor<float, 4, DataLayout> chip2 = tensor.chip(1, 1);
VERIFY_IS_EQUAL(chip2.dimension(0), 2);
VERIFY_IS_EQUAL(chip2.dimension(1), 5);
VERIFY_IS_EQUAL(chip2.dimension(2), 7);
VERIFY_IS_EQUAL(chip2.dimension(3), 11);
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 5; ++j) {
for (int k = 0; k < 7; ++k) {
for (int l = 0; l < 11; ++l) {
VERIFY_IS_EQUAL(chip2(i,j,k,l), tensor(i,1,j,k,l));
}
}
}
}
Tensor<float, 4, DataLayout> chip3 = tensor.chip(2, 2);
VERIFY_IS_EQUAL(chip3.dimension(0), 2);
VERIFY_IS_EQUAL(chip3.dimension(1), 3);
VERIFY_IS_EQUAL(chip3.dimension(2), 7);
VERIFY_IS_EQUAL(chip3.dimension(3), 11);
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 7; ++k) {
for (int l = 0; l < 11; ++l) {
VERIFY_IS_EQUAL(chip3(i,j,k,l), tensor(i,j,2,k,l));
}
}
}
}
Tensor<float, 4, DataLayout> chip4(tensor.chip(5, 3));
VERIFY_IS_EQUAL(chip4.dimension(0), 2);
VERIFY_IS_EQUAL(chip4.dimension(1), 3);
VERIFY_IS_EQUAL(chip4.dimension(2), 5);
VERIFY_IS_EQUAL(chip4.dimension(3), 11);
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 5; ++k) {
for (int l = 0; l < 11; ++l) {
VERIFY_IS_EQUAL(chip4(i,j,k,l), tensor(i,j,k,5,l));
}
}
}
}
Tensor<float, 4, DataLayout> chip5(tensor.chip(7, 4));
VERIFY_IS_EQUAL(chip5.dimension(0), 2);
VERIFY_IS_EQUAL(chip5.dimension(1), 3);
VERIFY_IS_EQUAL(chip5.dimension(2), 5);
VERIFY_IS_EQUAL(chip5.dimension(3), 7);
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 5; ++k) {
for (int l = 0; l < 7; ++l) {
VERIFY_IS_EQUAL(chip5(i,j,k,l), tensor(i,j,k,l,7));
}
}
}
}
}
template<int DataLayout>
static void test_chip_in_expr() {
Tensor<float, 5, DataLayout> input1(2,3,5,7,11);
input1.setRandom();
Tensor<float, 4, DataLayout> input2(3,5,7,11);
input2.setRandom();
Tensor<float, 4, DataLayout> result = input1.template chip<0>(0) + input2;
for (int i = 0; i < 3; ++i) {
for (int j = 0; j < 5; ++j) {
for (int k = 0; k < 7; ++k) {
for (int l = 0; l < 11; ++l) {
float expected = input1(0,i,j,k,l) + input2(i,j,k,l);
VERIFY_IS_EQUAL(result(i,j,k,l), expected);
}
}
}
}
Tensor<float, 3, DataLayout> input3(3,7,11);
input3.setRandom();
Tensor<float, 3, DataLayout> result2 = input1.template chip<0>(0).template chip<1>(2) + input3;
for (int i = 0; i < 3; ++i) {
for (int j = 0; j < 7; ++j) {
for (int k = 0; k < 11; ++k) {
float expected = input1(0,i,2,j,k) + input3(i,j,k);
VERIFY_IS_EQUAL(result2(i,j,k), expected);
}
}
}
}
template<int DataLayout>
static void test_chip_as_lvalue()
{
Tensor<float, 5, DataLayout> input1(2,3,5,7,11);
input1.setRandom();
Tensor<float, 4, DataLayout> input2(3,5,7,11);
input2.setRandom();
Tensor<float, 5, DataLayout> tensor = input1;
tensor.template chip<0>(1) = input2;
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 5; ++k) {
for (int l = 0; l < 7; ++l) {
for (int m = 0; m < 11; ++m) {
if (i != 1) {
VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m));
} else {
VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input2(j,k,l,m));
}
}
}
}
}
}
Tensor<float, 4, DataLayout> input3(2,5,7,11);
input3.setRandom();
tensor = input1;
tensor.template chip<1>(1) = input3;
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 5; ++k) {
for (int l = 0; l < 7; ++l) {
for (int m = 0; m < 11; ++m) {
if (j != 1) {
VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m));
} else {
VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input3(i,k,l,m));
}
}
}
}
}
}
Tensor<float, 4, DataLayout> input4(2,3,7,11);
input4.setRandom();
tensor = input1;
tensor.template chip<2>(3) = input4;
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 5; ++k) {
for (int l = 0; l < 7; ++l) {
for (int m = 0; m < 11; ++m) {
if (k != 3) {
VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m));
} else {
VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input4(i,j,l,m));
}
}
}
}
}
}
Tensor<float, 4, DataLayout> input5(2,3,5,11);
input5.setRandom();
tensor = input1;
tensor.template chip<3>(4) = input5;
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 5; ++k) {
for (int l = 0; l < 7; ++l) {
for (int m = 0; m < 11; ++m) {
if (l != 4) {
VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m));
} else {
VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input5(i,j,k,m));
}
}
}
}
}
}
Tensor<float, 4, DataLayout> input6(2,3,5,7);
input6.setRandom();
tensor = input1;
tensor.template chip<4>(5) = input6;
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 5; ++k) {
for (int l = 0; l < 7; ++l) {
for (int m = 0; m < 11; ++m) {
if (m != 5) {
VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m));
} else {
VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input6(i,j,k,l));
}
}
}
}
}
}
Tensor<float, 5, DataLayout> input7(2,3,5,7,11);
input7.setRandom();
tensor = input1;
tensor.chip(0, 0) = input7.chip(0, 0);
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 5; ++k) {
for (int l = 0; l < 7; ++l) {
for (int m = 0; m < 11; ++m) {
if (i != 0) {
VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m));
} else {
VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input7(i,j,k,l,m));
}
}
}
}
}
}
}
static void test_chip_raw_data_col_major()
{
Tensor<float, 5, ColMajor> tensor(2,3,5,7,11);
tensor.setRandom();
typedef TensorEvaluator<decltype(tensor.chip<4>(3)), DefaultDevice> Evaluator4;
auto chip = Evaluator4(tensor.chip<4>(3), DefaultDevice());
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 5; ++k) {
for (int l = 0; l < 7; ++l) {
int chip_index = i + 2 * (j + 3 * (k + 5 * l));
VERIFY_IS_EQUAL(chip.data()[chip_index], tensor(i,j,k,l,3));
}
}
}
}
typedef TensorEvaluator<decltype(tensor.chip<0>(0)), DefaultDevice> Evaluator0;
auto chip0 = Evaluator0(tensor.chip<0>(0), DefaultDevice());
VERIFY_IS_EQUAL(chip0.data(), static_cast<float*>(0));
typedef TensorEvaluator<decltype(tensor.chip<1>(0)), DefaultDevice> Evaluator1;
auto chip1 = Evaluator1(tensor.chip<1>(0), DefaultDevice());
VERIFY_IS_EQUAL(chip1.data(), static_cast<float*>(0));
typedef TensorEvaluator<decltype(tensor.chip<2>(0)), DefaultDevice> Evaluator2;
auto chip2 = Evaluator2(tensor.chip<2>(0), DefaultDevice());
VERIFY_IS_EQUAL(chip2.data(), static_cast<float*>(0));
typedef TensorEvaluator<decltype(tensor.chip<3>(0)), DefaultDevice> Evaluator3;
auto chip3 = Evaluator3(tensor.chip<3>(0), DefaultDevice());
VERIFY_IS_EQUAL(chip3.data(), static_cast<float*>(0));
}
static void test_chip_raw_data_row_major()
{
Tensor<float, 5, RowMajor> tensor(11,7,5,3,2);
tensor.setRandom();
typedef TensorEvaluator<decltype(tensor.chip<0>(3)), DefaultDevice> Evaluator0;
auto chip = Evaluator0(tensor.chip<0>(3), DefaultDevice());
for (int i = 0; i < 7; ++i) {
for (int j = 0; j < 5; ++j) {
for (int k = 0; k < 3; ++k) {
for (int l = 0; l < 2; ++l) {
int chip_index = l + 2 * (k + 3 * (j + 5 * i));
VERIFY_IS_EQUAL(chip.data()[chip_index], tensor(3,i,j,k,l));
}
}
}
}
typedef TensorEvaluator<decltype(tensor.chip<1>(0)), DefaultDevice> Evaluator1;
auto chip1 = Evaluator1(tensor.chip<1>(0), DefaultDevice());
VERIFY_IS_EQUAL(chip1.data(), static_cast<float*>(0));
typedef TensorEvaluator<decltype(tensor.chip<2>(0)), DefaultDevice> Evaluator2;
auto chip2 = Evaluator2(tensor.chip<2>(0), DefaultDevice());
VERIFY_IS_EQUAL(chip2.data(), static_cast<float*>(0));
typedef TensorEvaluator<decltype(tensor.chip<3>(0)), DefaultDevice> Evaluator3;
auto chip3 = Evaluator3(tensor.chip<3>(0), DefaultDevice());
VERIFY_IS_EQUAL(chip3.data(), static_cast<float*>(0));
typedef TensorEvaluator<decltype(tensor.chip<4>(0)), DefaultDevice> Evaluator4;
auto chip4 = Evaluator4(tensor.chip<4>(0), DefaultDevice());
VERIFY_IS_EQUAL(chip4.data(), static_cast<float*>(0));
}
EIGEN_DECLARE_TEST(cxx11_tensor_chipping)
{
CALL_SUBTEST(test_simple_chip<ColMajor>());
CALL_SUBTEST(test_simple_chip<RowMajor>());
CALL_SUBTEST(test_dynamic_chip<ColMajor>());
CALL_SUBTEST(test_dynamic_chip<RowMajor>());
CALL_SUBTEST(test_chip_in_expr<ColMajor>());
CALL_SUBTEST(test_chip_in_expr<RowMajor>());
CALL_SUBTEST(test_chip_as_lvalue<ColMajor>());
CALL_SUBTEST(test_chip_as_lvalue<RowMajor>());
CALL_SUBTEST(test_chip_raw_data_col_major());
CALL_SUBTEST(test_chip_raw_data_row_major());
}

View File

@@ -0,0 +1,623 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2016
// Mehdi Goli Codeplay Software Ltd.
// Ralph Potter Codeplay Software Ltd.
// Luke Iwanski Codeplay Software Ltd.
// Contact: <eigen@codeplay.com>
// Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#define EIGEN_TEST_NO_LONGDOUBLE
#define EIGEN_TEST_NO_COMPLEX
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
#define EIGEN_USE_SYCL
#include "main.h"
#include <Eigen/CXX11/Tensor>
using Eigen::Tensor;
template <typename DataType, int DataLayout, typename IndexType>
static void test_static_chip_sycl(const Eigen::SyclDevice& sycl_device)
{
IndexType sizeDim1 = 2;
IndexType sizeDim2 = 3;
IndexType sizeDim3 = 5;
IndexType sizeDim4 = 7;
IndexType sizeDim5 = 11;
array<IndexType, 5> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4, sizeDim5}};
array<IndexType, 4> chip1TensorRange = {{sizeDim2, sizeDim3, sizeDim4, sizeDim5}};
Tensor<DataType, 5, DataLayout,IndexType> tensor(tensorRange);
Tensor<DataType, 4, DataLayout,IndexType> chip1(chip1TensorRange);
tensor.setRandom();
const size_t tensorBuffSize =tensor.size()*sizeof(DataType);
const size_t chip1TensorBuffSize =chip1.size()*sizeof(DataType);
DataType* gpu_data_tensor = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize));
DataType* gpu_data_chip1 = static_cast<DataType*>(sycl_device.allocate(chip1TensorBuffSize));
TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_tensor(gpu_data_tensor, tensorRange);
TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip1(gpu_data_chip1, chip1TensorRange);
sycl_device.memcpyHostToDevice(gpu_data_tensor, tensor.data(), tensorBuffSize);
gpu_chip1.device(sycl_device)=gpu_tensor.template chip<0l>(1l);
sycl_device.memcpyDeviceToHost(chip1.data(), gpu_data_chip1, chip1TensorBuffSize);
VERIFY_IS_EQUAL(chip1.dimension(0), sizeDim2);
VERIFY_IS_EQUAL(chip1.dimension(1), sizeDim3);
VERIFY_IS_EQUAL(chip1.dimension(2), sizeDim4);
VERIFY_IS_EQUAL(chip1.dimension(3), sizeDim5);
for (IndexType i = 0; i < sizeDim2; ++i) {
for (IndexType j = 0; j < sizeDim3; ++j) {
for (IndexType k = 0; k < sizeDim4; ++k) {
for (IndexType l = 0; l < sizeDim5; ++l) {
VERIFY_IS_EQUAL(chip1(i,j,k,l), tensor(1l,i,j,k,l));
}
}
}
}
array<IndexType, 4> chip2TensorRange = {{sizeDim1, sizeDim3, sizeDim4, sizeDim5}};
Tensor<DataType, 4, DataLayout,IndexType> chip2(chip2TensorRange);
const size_t chip2TensorBuffSize =chip2.size()*sizeof(DataType);
DataType* gpu_data_chip2 = static_cast<DataType*>(sycl_device.allocate(chip2TensorBuffSize));
TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip2(gpu_data_chip2, chip2TensorRange);
gpu_chip2.device(sycl_device)=gpu_tensor.template chip<1l>(1l);
sycl_device.memcpyDeviceToHost(chip2.data(), gpu_data_chip2, chip2TensorBuffSize);
VERIFY_IS_EQUAL(chip2.dimension(0), sizeDim1);
VERIFY_IS_EQUAL(chip2.dimension(1), sizeDim3);
VERIFY_IS_EQUAL(chip2.dimension(2), sizeDim4);
VERIFY_IS_EQUAL(chip2.dimension(3), sizeDim5);
for (IndexType i = 0; i < sizeDim1; ++i) {
for (IndexType j = 0; j < sizeDim3; ++j) {
for (IndexType k = 0; k < sizeDim4; ++k) {
for (IndexType l = 0; l < sizeDim5; ++l) {
VERIFY_IS_EQUAL(chip2(i,j,k,l), tensor(i,1l,j,k,l));
}
}
}
}
array<IndexType, 4> chip3TensorRange = {{sizeDim1, sizeDim2, sizeDim4, sizeDim5}};
Tensor<DataType, 4, DataLayout,IndexType> chip3(chip3TensorRange);
const size_t chip3TensorBuffSize =chip3.size()*sizeof(DataType);
DataType* gpu_data_chip3 = static_cast<DataType*>(sycl_device.allocate(chip3TensorBuffSize));
TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip3(gpu_data_chip3, chip3TensorRange);
gpu_chip3.device(sycl_device)=gpu_tensor.template chip<2l>(2l);
sycl_device.memcpyDeviceToHost(chip3.data(), gpu_data_chip3, chip3TensorBuffSize);
VERIFY_IS_EQUAL(chip3.dimension(0), sizeDim1);
VERIFY_IS_EQUAL(chip3.dimension(1), sizeDim2);
VERIFY_IS_EQUAL(chip3.dimension(2), sizeDim4);
VERIFY_IS_EQUAL(chip3.dimension(3), sizeDim5);
for (IndexType i = 0; i < sizeDim1; ++i) {
for (IndexType j = 0; j < sizeDim2; ++j) {
for (IndexType k = 0; k < sizeDim4; ++k) {
for (IndexType l = 0; l < sizeDim5; ++l) {
VERIFY_IS_EQUAL(chip3(i,j,k,l), tensor(i,j,2l,k,l));
}
}
}
}
array<IndexType, 4> chip4TensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim5}};
Tensor<DataType, 4, DataLayout,IndexType> chip4(chip4TensorRange);
const size_t chip4TensorBuffSize =chip4.size()*sizeof(DataType);
DataType* gpu_data_chip4 = static_cast<DataType*>(sycl_device.allocate(chip4TensorBuffSize));
TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip4(gpu_data_chip4, chip4TensorRange);
gpu_chip4.device(sycl_device)=gpu_tensor.template chip<3l>(5l);
sycl_device.memcpyDeviceToHost(chip4.data(), gpu_data_chip4, chip4TensorBuffSize);
VERIFY_IS_EQUAL(chip4.dimension(0), sizeDim1);
VERIFY_IS_EQUAL(chip4.dimension(1), sizeDim2);
VERIFY_IS_EQUAL(chip4.dimension(2), sizeDim3);
VERIFY_IS_EQUAL(chip4.dimension(3), sizeDim5);
for (IndexType i = 0; i < sizeDim1; ++i) {
for (IndexType j = 0; j < sizeDim2; ++j) {
for (IndexType k = 0; k < sizeDim3; ++k) {
for (IndexType l = 0; l < sizeDim5; ++l) {
VERIFY_IS_EQUAL(chip4(i,j,k,l), tensor(i,j,k,5l,l));
}
}
}
}
array<IndexType, 4> chip5TensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}};
Tensor<DataType, 4, DataLayout,IndexType> chip5(chip5TensorRange);
const size_t chip5TensorBuffSize =chip5.size()*sizeof(DataType);
DataType* gpu_data_chip5 = static_cast<DataType*>(sycl_device.allocate(chip5TensorBuffSize));
TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip5(gpu_data_chip5, chip5TensorRange);
gpu_chip5.device(sycl_device)=gpu_tensor.template chip<4l>(7l);
sycl_device.memcpyDeviceToHost(chip5.data(), gpu_data_chip5, chip5TensorBuffSize);
VERIFY_IS_EQUAL(chip5.dimension(0), sizeDim1);
VERIFY_IS_EQUAL(chip5.dimension(1), sizeDim2);
VERIFY_IS_EQUAL(chip5.dimension(2), sizeDim3);
VERIFY_IS_EQUAL(chip5.dimension(3), sizeDim4);
for (IndexType i = 0; i < sizeDim1; ++i) {
for (IndexType j = 0; j < sizeDim2; ++j) {
for (IndexType k = 0; k < sizeDim3; ++k) {
for (IndexType l = 0; l < sizeDim4; ++l) {
VERIFY_IS_EQUAL(chip5(i,j,k,l), tensor(i,j,k,l,7l));
}
}
}
}
sycl_device.deallocate(gpu_data_tensor);
sycl_device.deallocate(gpu_data_chip1);
sycl_device.deallocate(gpu_data_chip2);
sycl_device.deallocate(gpu_data_chip3);
sycl_device.deallocate(gpu_data_chip4);
sycl_device.deallocate(gpu_data_chip5);
}
template <typename DataType, int DataLayout, typename IndexType>
static void test_dynamic_chip_sycl(const Eigen::SyclDevice& sycl_device)
{
IndexType sizeDim1 = 2;
IndexType sizeDim2 = 3;
IndexType sizeDim3 = 5;
IndexType sizeDim4 = 7;
IndexType sizeDim5 = 11;
array<IndexType, 5> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4, sizeDim5}};
array<IndexType, 4> chip1TensorRange = {{sizeDim2, sizeDim3, sizeDim4, sizeDim5}};
Tensor<DataType, 5, DataLayout,IndexType> tensor(tensorRange);
Tensor<DataType, 4, DataLayout,IndexType> chip1(chip1TensorRange);
tensor.setRandom();
const size_t tensorBuffSize =tensor.size()*sizeof(DataType);
const size_t chip1TensorBuffSize =chip1.size()*sizeof(DataType);
DataType* gpu_data_tensor = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize));
DataType* gpu_data_chip1 = static_cast<DataType*>(sycl_device.allocate(chip1TensorBuffSize));
TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_tensor(gpu_data_tensor, tensorRange);
TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip1(gpu_data_chip1, chip1TensorRange);
sycl_device.memcpyHostToDevice(gpu_data_tensor, tensor.data(), tensorBuffSize);
gpu_chip1.device(sycl_device)=gpu_tensor.chip(1l,0l);
sycl_device.memcpyDeviceToHost(chip1.data(), gpu_data_chip1, chip1TensorBuffSize);
VERIFY_IS_EQUAL(chip1.dimension(0), sizeDim2);
VERIFY_IS_EQUAL(chip1.dimension(1), sizeDim3);
VERIFY_IS_EQUAL(chip1.dimension(2), sizeDim4);
VERIFY_IS_EQUAL(chip1.dimension(3), sizeDim5);
for (IndexType i = 0; i < sizeDim2; ++i) {
for (IndexType j = 0; j < sizeDim3; ++j) {
for (IndexType k = 0; k < sizeDim4; ++k) {
for (IndexType l = 0; l < sizeDim5; ++l) {
VERIFY_IS_EQUAL(chip1(i,j,k,l), tensor(1l,i,j,k,l));
}
}
}
}
array<IndexType, 4> chip2TensorRange = {{sizeDim1, sizeDim3, sizeDim4, sizeDim5}};
Tensor<DataType, 4, DataLayout,IndexType> chip2(chip2TensorRange);
const size_t chip2TensorBuffSize =chip2.size()*sizeof(DataType);
DataType* gpu_data_chip2 = static_cast<DataType*>(sycl_device.allocate(chip2TensorBuffSize));
TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip2(gpu_data_chip2, chip2TensorRange);
gpu_chip2.device(sycl_device)=gpu_tensor.chip(1l,1l);
sycl_device.memcpyDeviceToHost(chip2.data(), gpu_data_chip2, chip2TensorBuffSize);
VERIFY_IS_EQUAL(chip2.dimension(0), sizeDim1);
VERIFY_IS_EQUAL(chip2.dimension(1), sizeDim3);
VERIFY_IS_EQUAL(chip2.dimension(2), sizeDim4);
VERIFY_IS_EQUAL(chip2.dimension(3), sizeDim5);
for (IndexType i = 0; i < sizeDim1; ++i) {
for (IndexType j = 0; j < sizeDim3; ++j) {
for (IndexType k = 0; k < sizeDim4; ++k) {
for (IndexType l = 0; l < sizeDim5; ++l) {
VERIFY_IS_EQUAL(chip2(i,j,k,l), tensor(i,1l,j,k,l));
}
}
}
}
array<IndexType, 4> chip3TensorRange = {{sizeDim1, sizeDim2, sizeDim4, sizeDim5}};
Tensor<DataType, 4, DataLayout,IndexType> chip3(chip3TensorRange);
const size_t chip3TensorBuffSize =chip3.size()*sizeof(DataType);
DataType* gpu_data_chip3 = static_cast<DataType*>(sycl_device.allocate(chip3TensorBuffSize));
TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip3(gpu_data_chip3, chip3TensorRange);
gpu_chip3.device(sycl_device)=gpu_tensor.chip(2l,2l);
sycl_device.memcpyDeviceToHost(chip3.data(), gpu_data_chip3, chip3TensorBuffSize);
VERIFY_IS_EQUAL(chip3.dimension(0), sizeDim1);
VERIFY_IS_EQUAL(chip3.dimension(1), sizeDim2);
VERIFY_IS_EQUAL(chip3.dimension(2), sizeDim4);
VERIFY_IS_EQUAL(chip3.dimension(3), sizeDim5);
for (IndexType i = 0; i < sizeDim1; ++i) {
for (IndexType j = 0; j < sizeDim2; ++j) {
for (IndexType k = 0; k < sizeDim4; ++k) {
for (IndexType l = 0; l < sizeDim5; ++l) {
VERIFY_IS_EQUAL(chip3(i,j,k,l), tensor(i,j,2l,k,l));
}
}
}
}
array<IndexType, 4> chip4TensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim5}};
Tensor<DataType, 4, DataLayout,IndexType> chip4(chip4TensorRange);
const size_t chip4TensorBuffSize =chip4.size()*sizeof(DataType);
DataType* gpu_data_chip4 = static_cast<DataType*>(sycl_device.allocate(chip4TensorBuffSize));
TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip4(gpu_data_chip4, chip4TensorRange);
gpu_chip4.device(sycl_device)=gpu_tensor.chip(5l,3l);
sycl_device.memcpyDeviceToHost(chip4.data(), gpu_data_chip4, chip4TensorBuffSize);
VERIFY_IS_EQUAL(chip4.dimension(0), sizeDim1);
VERIFY_IS_EQUAL(chip4.dimension(1), sizeDim2);
VERIFY_IS_EQUAL(chip4.dimension(2), sizeDim3);
VERIFY_IS_EQUAL(chip4.dimension(3), sizeDim5);
for (IndexType i = 0; i < sizeDim1; ++i) {
for (IndexType j = 0; j < sizeDim2; ++j) {
for (IndexType k = 0; k < sizeDim3; ++k) {
for (IndexType l = 0; l < sizeDim5; ++l) {
VERIFY_IS_EQUAL(chip4(i,j,k,l), tensor(i,j,k,5l,l));
}
}
}
}
array<IndexType, 4> chip5TensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}};
Tensor<DataType, 4, DataLayout,IndexType> chip5(chip5TensorRange);
const size_t chip5TensorBuffSize =chip5.size()*sizeof(DataType);
DataType* gpu_data_chip5 = static_cast<DataType*>(sycl_device.allocate(chip5TensorBuffSize));
TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip5(gpu_data_chip5, chip5TensorRange);
gpu_chip5.device(sycl_device)=gpu_tensor.chip(7l,4l);
sycl_device.memcpyDeviceToHost(chip5.data(), gpu_data_chip5, chip5TensorBuffSize);
VERIFY_IS_EQUAL(chip5.dimension(0), sizeDim1);
VERIFY_IS_EQUAL(chip5.dimension(1), sizeDim2);
VERIFY_IS_EQUAL(chip5.dimension(2), sizeDim3);
VERIFY_IS_EQUAL(chip5.dimension(3), sizeDim4);
for (IndexType i = 0; i < sizeDim1; ++i) {
for (IndexType j = 0; j < sizeDim2; ++j) {
for (IndexType k = 0; k < sizeDim3; ++k) {
for (IndexType l = 0; l < sizeDim4; ++l) {
VERIFY_IS_EQUAL(chip5(i,j,k,l), tensor(i,j,k,l,7l));
}
}
}
}
sycl_device.deallocate(gpu_data_tensor);
sycl_device.deallocate(gpu_data_chip1);
sycl_device.deallocate(gpu_data_chip2);
sycl_device.deallocate(gpu_data_chip3);
sycl_device.deallocate(gpu_data_chip4);
sycl_device.deallocate(gpu_data_chip5);
}
template <typename DataType, int DataLayout, typename IndexType>
static void test_chip_in_expr(const Eigen::SyclDevice& sycl_device) {
IndexType sizeDim1 = 2;
IndexType sizeDim2 = 3;
IndexType sizeDim3 = 5;
IndexType sizeDim4 = 7;
IndexType sizeDim5 = 11;
array<IndexType, 5> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4, sizeDim5}};
array<IndexType, 4> chip1TensorRange = {{sizeDim2, sizeDim3, sizeDim4, sizeDim5}};
Tensor<DataType, 5, DataLayout,IndexType> tensor(tensorRange);
Tensor<DataType, 4, DataLayout,IndexType> chip1(chip1TensorRange);
Tensor<DataType, 4, DataLayout,IndexType> tensor1(chip1TensorRange);
tensor.setRandom();
tensor1.setRandom();
const size_t tensorBuffSize =tensor.size()*sizeof(DataType);
const size_t chip1TensorBuffSize =chip1.size()*sizeof(DataType);
DataType* gpu_data_tensor = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize));
DataType* gpu_data_chip1 = static_cast<DataType*>(sycl_device.allocate(chip1TensorBuffSize));
DataType* gpu_data_tensor1 = static_cast<DataType*>(sycl_device.allocate(chip1TensorBuffSize));
TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_tensor(gpu_data_tensor, tensorRange);
TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip1(gpu_data_chip1, chip1TensorRange);
TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_tensor1(gpu_data_tensor1, chip1TensorRange);
sycl_device.memcpyHostToDevice(gpu_data_tensor, tensor.data(), tensorBuffSize);
sycl_device.memcpyHostToDevice(gpu_data_tensor1, tensor1.data(), chip1TensorBuffSize);
gpu_chip1.device(sycl_device)=gpu_tensor.template chip<0l>(0l) + gpu_tensor1;
sycl_device.memcpyDeviceToHost(chip1.data(), gpu_data_chip1, chip1TensorBuffSize);
for (int i = 0; i < sizeDim2; ++i) {
for (int j = 0; j < sizeDim3; ++j) {
for (int k = 0; k < sizeDim4; ++k) {
for (int l = 0; l < sizeDim5; ++l) {
float expected = tensor(0l,i,j,k,l) + tensor1(i,j,k,l);
VERIFY_IS_EQUAL(chip1(i,j,k,l), expected);
}
}
}
}
array<IndexType, 3> chip2TensorRange = {{sizeDim2, sizeDim4, sizeDim5}};
Tensor<DataType, 3, DataLayout,IndexType> tensor2(chip2TensorRange);
Tensor<DataType, 3, DataLayout,IndexType> chip2(chip2TensorRange);
tensor2.setRandom();
const size_t chip2TensorBuffSize =tensor2.size()*sizeof(DataType);
DataType* gpu_data_tensor2 = static_cast<DataType*>(sycl_device.allocate(chip2TensorBuffSize));
DataType* gpu_data_chip2 = static_cast<DataType*>(sycl_device.allocate(chip2TensorBuffSize));
TensorMap<Tensor<DataType, 3, DataLayout,IndexType>> gpu_tensor2(gpu_data_tensor2, chip2TensorRange);
TensorMap<Tensor<DataType, 3, DataLayout,IndexType>> gpu_chip2(gpu_data_chip2, chip2TensorRange);
sycl_device.memcpyHostToDevice(gpu_data_tensor2, tensor2.data(), chip2TensorBuffSize);
gpu_chip2.device(sycl_device)=gpu_tensor.template chip<0l>(0l).template chip<1l>(2l) + gpu_tensor2;
sycl_device.memcpyDeviceToHost(chip2.data(), gpu_data_chip2, chip2TensorBuffSize);
for (int i = 0; i < sizeDim2; ++i) {
for (int j = 0; j < sizeDim4; ++j) {
for (int k = 0; k < sizeDim5; ++k) {
float expected = tensor(0l,i,2l,j,k) + tensor2(i,j,k);
VERIFY_IS_EQUAL(chip2(i,j,k), expected);
}
}
}
sycl_device.deallocate(gpu_data_tensor);
sycl_device.deallocate(gpu_data_tensor1);
sycl_device.deallocate(gpu_data_chip1);
sycl_device.deallocate(gpu_data_tensor2);
sycl_device.deallocate(gpu_data_chip2);
}
template <typename DataType, int DataLayout, typename IndexType>
static void test_chip_as_lvalue_sycl(const Eigen::SyclDevice& sycl_device)
{
IndexType sizeDim1 = 2;
IndexType sizeDim2 = 3;
IndexType sizeDim3 = 5;
IndexType sizeDim4 = 7;
IndexType sizeDim5 = 11;
array<IndexType, 5> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4, sizeDim5}};
array<IndexType, 4> input2TensorRange = {{sizeDim2, sizeDim3, sizeDim4, sizeDim5}};
Tensor<DataType, 5, DataLayout,IndexType> tensor(tensorRange);
Tensor<DataType, 5, DataLayout,IndexType> input1(tensorRange);
Tensor<DataType, 4, DataLayout,IndexType> input2(input2TensorRange);
input1.setRandom();
input2.setRandom();
const size_t tensorBuffSize =tensor.size()*sizeof(DataType);
const size_t input2TensorBuffSize =input2.size()*sizeof(DataType);
std::cout << tensorBuffSize << " , "<< input2TensorBuffSize << std::endl;
DataType* gpu_data_tensor = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize));
DataType* gpu_data_input1 = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize));
DataType* gpu_data_input2 = static_cast<DataType*>(sycl_device.allocate(input2TensorBuffSize));
TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_tensor(gpu_data_tensor, tensorRange);
TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_input1(gpu_data_input1, tensorRange);
TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_input2(gpu_data_input2, input2TensorRange);
sycl_device.memcpyHostToDevice(gpu_data_input1, input1.data(), tensorBuffSize);
gpu_tensor.device(sycl_device)=gpu_input1;
sycl_device.memcpyHostToDevice(gpu_data_input2, input2.data(), input2TensorBuffSize);
gpu_tensor.template chip<0l>(1l).device(sycl_device)=gpu_input2;
sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data_tensor, tensorBuffSize);
for (int i = 0; i < sizeDim1; ++i) {
for (int j = 0; j < sizeDim2; ++j) {
for (int k = 0; k < sizeDim3; ++k) {
for (int l = 0; l < sizeDim4; ++l) {
for (int m = 0; m < sizeDim5; ++m) {
if (i != 1) {
VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m));
} else {
VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input2(j,k,l,m));
}
}
}
}
}
}
gpu_tensor.device(sycl_device)=gpu_input1;
array<IndexType, 4> input3TensorRange = {{sizeDim1, sizeDim3, sizeDim4, sizeDim5}};
Tensor<DataType, 4, DataLayout,IndexType> input3(input3TensorRange);
input3.setRandom();
const size_t input3TensorBuffSize =input3.size()*sizeof(DataType);
DataType* gpu_data_input3 = static_cast<DataType*>(sycl_device.allocate(input3TensorBuffSize));
TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_input3(gpu_data_input3, input3TensorRange);
sycl_device.memcpyHostToDevice(gpu_data_input3, input3.data(), input3TensorBuffSize);
gpu_tensor.template chip<1l>(1l).device(sycl_device)=gpu_input3;
sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data_tensor, tensorBuffSize);
for (int i = 0; i < sizeDim1; ++i) {
for (int j = 0; j < sizeDim2; ++j) {
for (int k = 0; k <sizeDim3; ++k) {
for (int l = 0; l < sizeDim4; ++l) {
for (int m = 0; m < sizeDim5; ++m) {
if (j != 1) {
VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m));
} else {
VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input3(i,k,l,m));
}
}
}
}
}
}
gpu_tensor.device(sycl_device)=gpu_input1;
array<IndexType, 4> input4TensorRange = {{sizeDim1, sizeDim2, sizeDim4, sizeDim5}};
Tensor<DataType, 4, DataLayout,IndexType> input4(input4TensorRange);
input4.setRandom();
const size_t input4TensorBuffSize =input4.size()*sizeof(DataType);
DataType* gpu_data_input4 = static_cast<DataType*>(sycl_device.allocate(input4TensorBuffSize));
TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_input4(gpu_data_input4, input4TensorRange);
sycl_device.memcpyHostToDevice(gpu_data_input4, input4.data(), input4TensorBuffSize);
gpu_tensor.template chip<2l>(3l).device(sycl_device)=gpu_input4;
sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data_tensor, tensorBuffSize);
for (int i = 0; i < sizeDim1; ++i) {
for (int j = 0; j < sizeDim2; ++j) {
for (int k = 0; k <sizeDim3; ++k) {
for (int l = 0; l < sizeDim4; ++l) {
for (int m = 0; m < sizeDim5; ++m) {
if (k != 3) {
VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m));
} else {
VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input4(i,j,l,m));
}
}
}
}
}
}
gpu_tensor.device(sycl_device)=gpu_input1;
array<IndexType, 4> input5TensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim5}};
Tensor<DataType, 4, DataLayout,IndexType> input5(input5TensorRange);
input5.setRandom();
const size_t input5TensorBuffSize =input5.size()*sizeof(DataType);
DataType* gpu_data_input5 = static_cast<DataType*>(sycl_device.allocate(input5TensorBuffSize));
TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_input5(gpu_data_input5, input5TensorRange);
sycl_device.memcpyHostToDevice(gpu_data_input5, input5.data(), input5TensorBuffSize);
gpu_tensor.template chip<3l>(4l).device(sycl_device)=gpu_input5;
sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data_tensor, tensorBuffSize);
for (int i = 0; i < sizeDim1; ++i) {
for (int j = 0; j < sizeDim2; ++j) {
for (int k = 0; k <sizeDim3; ++k) {
for (int l = 0; l < sizeDim4; ++l) {
for (int m = 0; m < sizeDim5; ++m) {
if (l != 4) {
VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m));
} else {
VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input5(i,j,k,m));
}
}
}
}
}
}
gpu_tensor.device(sycl_device)=gpu_input1;
array<IndexType, 4> input6TensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}};
Tensor<DataType, 4, DataLayout,IndexType> input6(input6TensorRange);
input6.setRandom();
const size_t input6TensorBuffSize =input6.size()*sizeof(DataType);
DataType* gpu_data_input6 = static_cast<DataType*>(sycl_device.allocate(input6TensorBuffSize));
TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_input6(gpu_data_input6, input6TensorRange);
sycl_device.memcpyHostToDevice(gpu_data_input6, input6.data(), input6TensorBuffSize);
gpu_tensor.template chip<4l>(5l).device(sycl_device)=gpu_input6;
sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data_tensor, tensorBuffSize);
for (int i = 0; i < sizeDim1; ++i) {
for (int j = 0; j < sizeDim2; ++j) {
for (int k = 0; k <sizeDim3; ++k) {
for (int l = 0; l < sizeDim4; ++l) {
for (int m = 0; m < sizeDim5; ++m) {
if (m != 5) {
VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m));
} else {
VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input6(i,j,k,l));
}
}
}
}
}
}
gpu_tensor.device(sycl_device)=gpu_input1;
Tensor<DataType, 5, DataLayout,IndexType> input7(tensorRange);
input7.setRandom();
DataType* gpu_data_input7 = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize));
TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_input7(gpu_data_input7, tensorRange);
sycl_device.memcpyHostToDevice(gpu_data_input7, input7.data(), tensorBuffSize);
gpu_tensor.chip(0l,0l).device(sycl_device)=gpu_input7.chip(0l,0l);
sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data_tensor, tensorBuffSize);
for (int i = 0; i < sizeDim1; ++i) {
for (int j = 0; j < sizeDim2; ++j) {
for (int k = 0; k <sizeDim3; ++k) {
for (int l = 0; l < sizeDim4; ++l) {
for (int m = 0; m < sizeDim5; ++m) {
if (i != 0) {
VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m));
} else {
VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input7(i,j,k,l,m));
}
}
}
}
}
}
sycl_device.deallocate(gpu_data_tensor);
sycl_device.deallocate(gpu_data_input1);
sycl_device.deallocate(gpu_data_input2);
sycl_device.deallocate(gpu_data_input3);
sycl_device.deallocate(gpu_data_input4);
sycl_device.deallocate(gpu_data_input5);
sycl_device.deallocate(gpu_data_input6);
sycl_device.deallocate(gpu_data_input7);
}
template<typename DataType, typename dev_Selector> void sycl_chipping_test_per_device(dev_Selector s){
QueueInterface queueInterface(s);
auto sycl_device = Eigen::SyclDevice(&queueInterface);
/* test_static_chip_sycl<DataType, RowMajor, int64_t>(sycl_device);
test_static_chip_sycl<DataType, ColMajor, int64_t>(sycl_device);
test_dynamic_chip_sycl<DataType, RowMajor, int64_t>(sycl_device);
test_dynamic_chip_sycl<DataType, ColMajor, int64_t>(sycl_device);
test_chip_in_expr<DataType, RowMajor, int64_t>(sycl_device);
test_chip_in_expr<DataType, ColMajor, int64_t>(sycl_device);*/
test_chip_as_lvalue_sycl<DataType, RowMajor, int64_t>(sycl_device);
// test_chip_as_lvalue_sycl<DataType, ColMajor, int64_t>(sycl_device);
}
EIGEN_DECLARE_TEST(cxx11_tensor_chipping_sycl)
{
for (const auto& device :Eigen::get_sycl_supported_devices()) {
CALL_SUBTEST(sycl_chipping_test_per_device<float>(device));
}
}

View File

@@ -0,0 +1,84 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
#include <Eigen/CXX11/Tensor>
using Eigen::Tensor;
using Eigen::RowMajor;
static void test_orderings()
{
Tensor<float, 3> mat1(2,3,7);
Tensor<float, 3> mat2(2,3,7);
Tensor<bool, 3> lt(2,3,7);
Tensor<bool, 3> le(2,3,7);
Tensor<bool, 3> gt(2,3,7);
Tensor<bool, 3> ge(2,3,7);
mat1.setRandom();
mat2.setRandom();
lt = mat1 < mat2;
le = mat1 <= mat2;
gt = mat1 > mat2;
ge = mat1 >= mat2;
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 7; ++k) {
VERIFY_IS_EQUAL(lt(i,j,k), mat1(i,j,k) < mat2(i,j,k));
VERIFY_IS_EQUAL(le(i,j,k), mat1(i,j,k) <= mat2(i,j,k));
VERIFY_IS_EQUAL(gt(i,j,k), mat1(i,j,k) > mat2(i,j,k));
VERIFY_IS_EQUAL(ge(i,j,k), mat1(i,j,k) >= mat2(i,j,k));
}
}
}
}
static void test_equality()
{
Tensor<float, 3> mat1(2,3,7);
Tensor<float, 3> mat2(2,3,7);
mat1.setRandom();
mat2.setRandom();
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 7; ++k) {
if (internal::random<bool>()) {
mat2(i,j,k) = mat1(i,j,k);
}
}
}
}
Tensor<bool, 3> eq(2,3,7);
Tensor<bool, 3> ne(2,3,7);
eq = (mat1 == mat2);
ne = (mat1 != mat2);
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 7; ++k) {
VERIFY_IS_EQUAL(eq(i,j,k), mat1(i,j,k) == mat2(i,j,k));
VERIFY_IS_EQUAL(ne(i,j,k), mat1(i,j,k) != mat2(i,j,k));
}
}
}
}
EIGEN_DECLARE_TEST(cxx11_tensor_comparisons)
{
CALL_SUBTEST(test_orderings());
CALL_SUBTEST(test_equality());
}

View File

@@ -0,0 +1,102 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#define EIGEN_TEST_NO_LONGDOUBLE
#define EIGEN_USE_GPU
#include "main.h"
#include <unsupported/Eigen/CXX11/Tensor>
using Eigen::Tensor;
template<typename T>
void test_cuda_complex_cwise_ops() {
const int kNumItems = 2;
std::size_t complex_bytes = kNumItems * sizeof(std::complex<T>);
std::complex<T>* d_in1;
std::complex<T>* d_in2;
std::complex<T>* d_out;
cudaMalloc((void**)(&d_in1), complex_bytes);
cudaMalloc((void**)(&d_in2), complex_bytes);
cudaMalloc((void**)(&d_out), complex_bytes);
Eigen::GpuStreamDevice stream;
Eigen::GpuDevice gpu_device(&stream);
Eigen::TensorMap<Eigen::Tensor<std::complex<T>, 1, 0, int>, Eigen::Aligned> gpu_in1(
d_in1, kNumItems);
Eigen::TensorMap<Eigen::Tensor<std::complex<T>, 1, 0, int>, Eigen::Aligned> gpu_in2(
d_in2, kNumItems);
Eigen::TensorMap<Eigen::Tensor<std::complex<T>, 1, 0, int>, Eigen::Aligned> gpu_out(
d_out, kNumItems);
const std::complex<T> a(3.14f, 2.7f);
const std::complex<T> b(-10.6f, 1.4f);
gpu_in1.device(gpu_device) = gpu_in1.constant(a);
gpu_in2.device(gpu_device) = gpu_in2.constant(b);
enum CwiseOp {
Add = 0,
Sub,
Mul,
Div,
Neg,
NbOps
};
Tensor<std::complex<T>, 1, 0, int> actual(kNumItems);
for (int op = Add; op < NbOps; op++) {
std::complex<T> expected;
switch (static_cast<CwiseOp>(op)) {
case Add:
gpu_out.device(gpu_device) = gpu_in1 + gpu_in2;
expected = a + b;
break;
case Sub:
gpu_out.device(gpu_device) = gpu_in1 - gpu_in2;
expected = a - b;
break;
case Mul:
gpu_out.device(gpu_device) = gpu_in1 * gpu_in2;
expected = a * b;
break;
case Div:
gpu_out.device(gpu_device) = gpu_in1 / gpu_in2;
expected = a / b;
break;
case Neg:
gpu_out.device(gpu_device) = -gpu_in1;
expected = -a;
break;
case NbOps:
break;
}
assert(cudaMemcpyAsync(actual.data(), d_out, complex_bytes, cudaMemcpyDeviceToHost,
gpu_device.stream()) == cudaSuccess);
assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
for (int i = 0; i < kNumItems; ++i) {
VERIFY_IS_APPROX(actual(i), expected);
}
}
cudaFree(d_in1);
cudaFree(d_in2);
cudaFree(d_out);
}
EIGEN_DECLARE_TEST(test_cxx11_tensor_complex_cwise_ops)
{
CALL_SUBTEST(test_cuda_complex_cwise_ops<float>());
CALL_SUBTEST(test_cuda_complex_cwise_ops<double>());
}

View File

@@ -0,0 +1,186 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#define EIGEN_TEST_NO_LONGDOUBLE
#define EIGEN_USE_GPU
#include "main.h"
#include <unsupported/Eigen/CXX11/Tensor>
using Eigen::Tensor;
void test_cuda_nullary() {
Tensor<std::complex<float>, 1, 0, int> in1(2);
Tensor<std::complex<float>, 1, 0, int> in2(2);
in1.setRandom();
in2.setRandom();
std::size_t float_bytes = in1.size() * sizeof(float);
std::size_t complex_bytes = in1.size() * sizeof(std::complex<float>);
std::complex<float>* d_in1;
std::complex<float>* d_in2;
float* d_out2;
cudaMalloc((void**)(&d_in1), complex_bytes);
cudaMalloc((void**)(&d_in2), complex_bytes);
cudaMalloc((void**)(&d_out2), float_bytes);
cudaMemcpy(d_in1, in1.data(), complex_bytes, cudaMemcpyHostToDevice);
cudaMemcpy(d_in2, in2.data(), complex_bytes, cudaMemcpyHostToDevice);
Eigen::GpuStreamDevice stream;
Eigen::GpuDevice gpu_device(&stream);
Eigen::TensorMap<Eigen::Tensor<std::complex<float>, 1, 0, int>, Eigen::Aligned> gpu_in1(
d_in1, 2);
Eigen::TensorMap<Eigen::Tensor<std::complex<float>, 1, 0, int>, Eigen::Aligned> gpu_in2(
d_in2, 2);
Eigen::TensorMap<Eigen::Tensor<float, 1, 0, int>, Eigen::Aligned> gpu_out2(
d_out2, 2);
gpu_in1.device(gpu_device) = gpu_in1.constant(std::complex<float>(3.14f, 2.7f));
gpu_out2.device(gpu_device) = gpu_in2.abs();
Tensor<std::complex<float>, 1, 0, int> new1(2);
Tensor<float, 1, 0, int> new2(2);
assert(cudaMemcpyAsync(new1.data(), d_in1, complex_bytes, cudaMemcpyDeviceToHost,
gpu_device.stream()) == cudaSuccess);
assert(cudaMemcpyAsync(new2.data(), d_out2, float_bytes, cudaMemcpyDeviceToHost,
gpu_device.stream()) == cudaSuccess);
assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
for (int i = 0; i < 2; ++i) {
VERIFY_IS_APPROX(new1(i), std::complex<float>(3.14f, 2.7f));
VERIFY_IS_APPROX(new2(i), std::abs(in2(i)));
}
cudaFree(d_in1);
cudaFree(d_in2);
cudaFree(d_out2);
}
static void test_cuda_sum_reductions() {
Eigen::GpuStreamDevice stream;
Eigen::GpuDevice gpu_device(&stream);
const int num_rows = internal::random<int>(1024, 5*1024);
const int num_cols = internal::random<int>(1024, 5*1024);
Tensor<std::complex<float>, 2> in(num_rows, num_cols);
in.setRandom();
Tensor<std::complex<float>, 0> full_redux;
full_redux = in.sum();
std::size_t in_bytes = in.size() * sizeof(std::complex<float>);
std::size_t out_bytes = full_redux.size() * sizeof(std::complex<float>);
std::complex<float>* gpu_in_ptr = static_cast<std::complex<float>*>(gpu_device.allocate(in_bytes));
std::complex<float>* gpu_out_ptr = static_cast<std::complex<float>*>(gpu_device.allocate(out_bytes));
gpu_device.memcpyHostToDevice(gpu_in_ptr, in.data(), in_bytes);
TensorMap<Tensor<std::complex<float>, 2> > in_gpu(gpu_in_ptr, num_rows, num_cols);
TensorMap<Tensor<std::complex<float>, 0> > out_gpu(gpu_out_ptr);
out_gpu.device(gpu_device) = in_gpu.sum();
Tensor<std::complex<float>, 0> full_redux_gpu;
gpu_device.memcpyDeviceToHost(full_redux_gpu.data(), gpu_out_ptr, out_bytes);
gpu_device.synchronize();
// Check that the CPU and GPU reductions return the same result.
VERIFY_IS_APPROX(full_redux(), full_redux_gpu());
gpu_device.deallocate(gpu_in_ptr);
gpu_device.deallocate(gpu_out_ptr);
}
static void test_cuda_mean_reductions() {
Eigen::GpuStreamDevice stream;
Eigen::GpuDevice gpu_device(&stream);
const int num_rows = internal::random<int>(1024, 5*1024);
const int num_cols = internal::random<int>(1024, 5*1024);
Tensor<std::complex<float>, 2> in(num_rows, num_cols);
in.setRandom();
Tensor<std::complex<float>, 0> full_redux;
full_redux = in.mean();
std::size_t in_bytes = in.size() * sizeof(std::complex<float>);
std::size_t out_bytes = full_redux.size() * sizeof(std::complex<float>);
std::complex<float>* gpu_in_ptr = static_cast<std::complex<float>*>(gpu_device.allocate(in_bytes));
std::complex<float>* gpu_out_ptr = static_cast<std::complex<float>*>(gpu_device.allocate(out_bytes));
gpu_device.memcpyHostToDevice(gpu_in_ptr, in.data(), in_bytes);
TensorMap<Tensor<std::complex<float>, 2> > in_gpu(gpu_in_ptr, num_rows, num_cols);
TensorMap<Tensor<std::complex<float>, 0> > out_gpu(gpu_out_ptr);
out_gpu.device(gpu_device) = in_gpu.mean();
Tensor<std::complex<float>, 0> full_redux_gpu;
gpu_device.memcpyDeviceToHost(full_redux_gpu.data(), gpu_out_ptr, out_bytes);
gpu_device.synchronize();
// Check that the CPU and GPU reductions return the same result.
VERIFY_IS_APPROX(full_redux(), full_redux_gpu());
gpu_device.deallocate(gpu_in_ptr);
gpu_device.deallocate(gpu_out_ptr);
}
static void test_cuda_product_reductions() {
Eigen::GpuStreamDevice stream;
Eigen::GpuDevice gpu_device(&stream);
const int num_rows = internal::random<int>(1024, 5*1024);
const int num_cols = internal::random<int>(1024, 5*1024);
Tensor<std::complex<float>, 2> in(num_rows, num_cols);
in.setRandom();
Tensor<std::complex<float>, 0> full_redux;
full_redux = in.prod();
std::size_t in_bytes = in.size() * sizeof(std::complex<float>);
std::size_t out_bytes = full_redux.size() * sizeof(std::complex<float>);
std::complex<float>* gpu_in_ptr = static_cast<std::complex<float>*>(gpu_device.allocate(in_bytes));
std::complex<float>* gpu_out_ptr = static_cast<std::complex<float>*>(gpu_device.allocate(out_bytes));
gpu_device.memcpyHostToDevice(gpu_in_ptr, in.data(), in_bytes);
TensorMap<Tensor<std::complex<float>, 2> > in_gpu(gpu_in_ptr, num_rows, num_cols);
TensorMap<Tensor<std::complex<float>, 0> > out_gpu(gpu_out_ptr);
out_gpu.device(gpu_device) = in_gpu.prod();
Tensor<std::complex<float>, 0> full_redux_gpu;
gpu_device.memcpyDeviceToHost(full_redux_gpu.data(), gpu_out_ptr, out_bytes);
gpu_device.synchronize();
// Check that the CPU and GPU reductions return the same result.
VERIFY_IS_APPROX(full_redux(), full_redux_gpu());
gpu_device.deallocate(gpu_in_ptr);
gpu_device.deallocate(gpu_out_ptr);
}
EIGEN_DECLARE_TEST(test_cxx11_tensor_complex)
{
CALL_SUBTEST(test_cuda_nullary());
CALL_SUBTEST(test_cuda_sum_reductions());
CALL_SUBTEST(test_cuda_mean_reductions());
CALL_SUBTEST(test_cuda_product_reductions());
}

View File

@@ -0,0 +1,143 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
#include <Eigen/CXX11/Tensor>
using Eigen::Tensor;
template<int DataLayout>
static void test_dimension_failures()
{
Tensor<int, 3, DataLayout> left(2, 3, 1);
Tensor<int, 3, DataLayout> right(3, 3, 1);
left.setRandom();
right.setRandom();
// Okay; other dimensions are equal.
Tensor<int, 3, DataLayout> concatenation = left.concatenate(right, 0);
// Dimension mismatches.
VERIFY_RAISES_ASSERT(concatenation = left.concatenate(right, 1));
VERIFY_RAISES_ASSERT(concatenation = left.concatenate(right, 2));
// Axis > NumDims or < 0.
VERIFY_RAISES_ASSERT(concatenation = left.concatenate(right, 3));
VERIFY_RAISES_ASSERT(concatenation = left.concatenate(right, -1));
}
template<int DataLayout>
static void test_static_dimension_failure()
{
Tensor<int, 2, DataLayout> left(2, 3);
Tensor<int, 3, DataLayout> right(2, 3, 1);
#ifdef CXX11_TENSOR_CONCATENATION_STATIC_DIMENSION_FAILURE
// Technically compatible, but we static assert that the inputs have same
// NumDims.
Tensor<int, 3, DataLayout> concatenation = left.concatenate(right, 0);
#endif
// This can be worked around in this case.
Tensor<int, 3, DataLayout> concatenation = left
.reshape(Tensor<int, 3>::Dimensions(2, 3, 1))
.concatenate(right, 0);
Tensor<int, 2, DataLayout> alternative = left
// Clang compiler break with {{{}}} with an ambiguous error on copy constructor
// the variadic DSize constructor added for #ifndef EIGEN_EMULATE_CXX11_META_H.
// Solution:
// either the code should change to
// Tensor<int, 2>::Dimensions{{2, 3}}
// or Tensor<int, 2>::Dimensions{Tensor<int, 2>::Dimensions{{2, 3}}}
.concatenate(right.reshape(Tensor<int, 2>::Dimensions(2, 3)), 0);
}
template<int DataLayout>
static void test_simple_concatenation()
{
Tensor<int, 3, DataLayout> left(2, 3, 1);
Tensor<int, 3, DataLayout> right(2, 3, 1);
left.setRandom();
right.setRandom();
Tensor<int, 3, DataLayout> concatenation = left.concatenate(right, 0);
VERIFY_IS_EQUAL(concatenation.dimension(0), 4);
VERIFY_IS_EQUAL(concatenation.dimension(1), 3);
VERIFY_IS_EQUAL(concatenation.dimension(2), 1);
for (int j = 0; j < 3; ++j) {
for (int i = 0; i < 2; ++i) {
VERIFY_IS_EQUAL(concatenation(i, j, 0), left(i, j, 0));
}
for (int i = 2; i < 4; ++i) {
VERIFY_IS_EQUAL(concatenation(i, j, 0), right(i - 2, j, 0));
}
}
concatenation = left.concatenate(right, 1);
VERIFY_IS_EQUAL(concatenation.dimension(0), 2);
VERIFY_IS_EQUAL(concatenation.dimension(1), 6);
VERIFY_IS_EQUAL(concatenation.dimension(2), 1);
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
VERIFY_IS_EQUAL(concatenation(i, j, 0), left(i, j, 0));
}
for (int j = 3; j < 6; ++j) {
VERIFY_IS_EQUAL(concatenation(i, j, 0), right(i, j - 3, 0));
}
}
concatenation = left.concatenate(right, 2);
VERIFY_IS_EQUAL(concatenation.dimension(0), 2);
VERIFY_IS_EQUAL(concatenation.dimension(1), 3);
VERIFY_IS_EQUAL(concatenation.dimension(2), 2);
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
VERIFY_IS_EQUAL(concatenation(i, j, 0), left(i, j, 0));
VERIFY_IS_EQUAL(concatenation(i, j, 1), right(i, j, 0));
}
}
}
// TODO(phli): Add test once we have a real vectorized implementation.
// static void test_vectorized_concatenation() {}
static void test_concatenation_as_lvalue()
{
Tensor<int, 2> t1(2, 3);
Tensor<int, 2> t2(2, 3);
t1.setRandom();
t2.setRandom();
Tensor<int, 2> result(4, 3);
result.setRandom();
t1.concatenate(t2, 0) = result;
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
VERIFY_IS_EQUAL(t1(i, j), result(i, j));
VERIFY_IS_EQUAL(t2(i, j), result(i+2, j));
}
}
}
EIGEN_DECLARE_TEST(cxx11_tensor_concatenation)
{
CALL_SUBTEST(test_dimension_failures<ColMajor>());
CALL_SUBTEST(test_dimension_failures<RowMajor>());
CALL_SUBTEST(test_static_dimension_failure<ColMajor>());
CALL_SUBTEST(test_static_dimension_failure<RowMajor>());
CALL_SUBTEST(test_simple_concatenation<ColMajor>());
CALL_SUBTEST(test_simple_concatenation<RowMajor>());
// CALL_SUBTEST(test_vectorized_concatenation());
CALL_SUBTEST(test_concatenation_as_lvalue());
}

View File

@@ -0,0 +1,180 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2016
// Mehdi Goli Codeplay Software Ltd.
// Ralph Potter Codeplay Software Ltd.
// Luke Iwanski Codeplay Software Ltd.
// Contact: <eigen@codeplay.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#define EIGEN_TEST_NO_LONGDOUBLE
#define EIGEN_TEST_NO_COMPLEX
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
#define EIGEN_USE_SYCL
#include "main.h"
#include <unsupported/Eigen/CXX11/Tensor>
using Eigen::Tensor;
template<typename DataType, int DataLayout, typename IndexType>
static void test_simple_concatenation(const Eigen::SyclDevice& sycl_device)
{
IndexType leftDim1 = 2;
IndexType leftDim2 = 3;
IndexType leftDim3 = 1;
Eigen::array<IndexType, 3> leftRange = {{leftDim1, leftDim2, leftDim3}};
IndexType rightDim1 = 2;
IndexType rightDim2 = 3;
IndexType rightDim3 = 1;
Eigen::array<IndexType, 3> rightRange = {{rightDim1, rightDim2, rightDim3}};
//IndexType concatDim1 = 3;
// IndexType concatDim2 = 3;
// IndexType concatDim3 = 1;
//Eigen::array<IndexType, 3> concatRange = {{concatDim1, concatDim2, concatDim3}};
Tensor<DataType, 3, DataLayout, IndexType> left(leftRange);
Tensor<DataType, 3, DataLayout, IndexType> right(rightRange);
left.setRandom();
right.setRandom();
DataType * gpu_in1_data = static_cast<DataType*>(sycl_device.allocate(left.dimensions().TotalSize()*sizeof(DataType)));
DataType * gpu_in2_data = static_cast<DataType*>(sycl_device.allocate(right.dimensions().TotalSize()*sizeof(DataType)));
Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType>> gpu_in1(gpu_in1_data, leftRange);
Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType>> gpu_in2(gpu_in2_data, rightRange);
sycl_device.memcpyHostToDevice(gpu_in1_data, left.data(),(left.dimensions().TotalSize())*sizeof(DataType));
sycl_device.memcpyHostToDevice(gpu_in2_data, right.data(),(right.dimensions().TotalSize())*sizeof(DataType));
///
Tensor<DataType, 3, DataLayout, IndexType> concatenation1(leftDim1+rightDim1, leftDim2, leftDim3);
DataType * gpu_out_data1 = static_cast<DataType*>(sycl_device.allocate(concatenation1.dimensions().TotalSize()*sizeof(DataType)));
Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType>> gpu_out1(gpu_out_data1, concatenation1.dimensions());
//concatenation = left.concatenate(right, 0);
gpu_out1.device(sycl_device) =gpu_in1.concatenate(gpu_in2, 0);
sycl_device.memcpyDeviceToHost(concatenation1.data(), gpu_out_data1,(concatenation1.dimensions().TotalSize())*sizeof(DataType));
VERIFY_IS_EQUAL(concatenation1.dimension(0), 4);
VERIFY_IS_EQUAL(concatenation1.dimension(1), 3);
VERIFY_IS_EQUAL(concatenation1.dimension(2), 1);
for (IndexType j = 0; j < 3; ++j) {
for (IndexType i = 0; i < 2; ++i) {
VERIFY_IS_EQUAL(concatenation1(i, j, 0), left(i, j, 0));
}
for (IndexType i = 2; i < 4; ++i) {
VERIFY_IS_EQUAL(concatenation1(i, j, 0), right(i - 2, j, 0));
}
}
sycl_device.deallocate(gpu_out_data1);
Tensor<DataType, 3, DataLayout, IndexType> concatenation2(leftDim1, leftDim2 +rightDim2, leftDim3);
DataType * gpu_out_data2 = static_cast<DataType*>(sycl_device.allocate(concatenation2.dimensions().TotalSize()*sizeof(DataType)));
Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType>> gpu_out2(gpu_out_data2, concatenation2.dimensions());
gpu_out2.device(sycl_device) =gpu_in1.concatenate(gpu_in2, 1);
sycl_device.memcpyDeviceToHost(concatenation2.data(), gpu_out_data2,(concatenation2.dimensions().TotalSize())*sizeof(DataType));
//concatenation = left.concatenate(right, 1);
VERIFY_IS_EQUAL(concatenation2.dimension(0), 2);
VERIFY_IS_EQUAL(concatenation2.dimension(1), 6);
VERIFY_IS_EQUAL(concatenation2.dimension(2), 1);
for (IndexType i = 0; i < 2; ++i) {
for (IndexType j = 0; j < 3; ++j) {
VERIFY_IS_EQUAL(concatenation2(i, j, 0), left(i, j, 0));
}
for (IndexType j = 3; j < 6; ++j) {
VERIFY_IS_EQUAL(concatenation2(i, j, 0), right(i, j - 3, 0));
}
}
sycl_device.deallocate(gpu_out_data2);
Tensor<DataType, 3, DataLayout, IndexType> concatenation3(leftDim1, leftDim2, leftDim3+rightDim3);
DataType * gpu_out_data3 = static_cast<DataType*>(sycl_device.allocate(concatenation3.dimensions().TotalSize()*sizeof(DataType)));
Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType>> gpu_out3(gpu_out_data3, concatenation3.dimensions());
gpu_out3.device(sycl_device) =gpu_in1.concatenate(gpu_in2, 2);
sycl_device.memcpyDeviceToHost(concatenation3.data(), gpu_out_data3,(concatenation3.dimensions().TotalSize())*sizeof(DataType));
//concatenation = left.concatenate(right, 2);
VERIFY_IS_EQUAL(concatenation3.dimension(0), 2);
VERIFY_IS_EQUAL(concatenation3.dimension(1), 3);
VERIFY_IS_EQUAL(concatenation3.dimension(2), 2);
for (IndexType i = 0; i < 2; ++i) {
for (IndexType j = 0; j < 3; ++j) {
VERIFY_IS_EQUAL(concatenation3(i, j, 0), left(i, j, 0));
VERIFY_IS_EQUAL(concatenation3(i, j, 1), right(i, j, 0));
}
}
sycl_device.deallocate(gpu_out_data3);
sycl_device.deallocate(gpu_in1_data);
sycl_device.deallocate(gpu_in2_data);
}
template<typename DataType, int DataLayout, typename IndexType>
static void test_concatenation_as_lvalue(const Eigen::SyclDevice& sycl_device)
{
IndexType leftDim1 = 2;
IndexType leftDim2 = 3;
Eigen::array<IndexType, 2> leftRange = {{leftDim1, leftDim2}};
IndexType rightDim1 = 2;
IndexType rightDim2 = 3;
Eigen::array<IndexType, 2> rightRange = {{rightDim1, rightDim2}};
IndexType concatDim1 = 4;
IndexType concatDim2 = 3;
Eigen::array<IndexType, 2> resRange = {{concatDim1, concatDim2}};
Tensor<DataType, 2, DataLayout, IndexType> left(leftRange);
Tensor<DataType, 2, DataLayout, IndexType> right(rightRange);
Tensor<DataType, 2, DataLayout, IndexType> result(resRange);
left.setRandom();
right.setRandom();
result.setRandom();
DataType * gpu_in1_data = static_cast<DataType*>(sycl_device.allocate(left.dimensions().TotalSize()*sizeof(DataType)));
DataType * gpu_in2_data = static_cast<DataType*>(sycl_device.allocate(right.dimensions().TotalSize()*sizeof(DataType)));
DataType * gpu_out_data = static_cast<DataType*>(sycl_device.allocate(result.dimensions().TotalSize()*sizeof(DataType)));
Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType>> gpu_in1(gpu_in1_data, leftRange);
Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType>> gpu_in2(gpu_in2_data, rightRange);
Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType>> gpu_out(gpu_out_data, resRange);
sycl_device.memcpyHostToDevice(gpu_in1_data, left.data(),(left.dimensions().TotalSize())*sizeof(DataType));
sycl_device.memcpyHostToDevice(gpu_in2_data, right.data(),(right.dimensions().TotalSize())*sizeof(DataType));
sycl_device.memcpyHostToDevice(gpu_out_data, result.data(),(result.dimensions().TotalSize())*sizeof(DataType));
// t1.concatenate(t2, 0) = result;
gpu_in1.concatenate(gpu_in2, 0).device(sycl_device) =gpu_out;
sycl_device.memcpyDeviceToHost(left.data(), gpu_in1_data,(left.dimensions().TotalSize())*sizeof(DataType));
sycl_device.memcpyDeviceToHost(right.data(), gpu_in2_data,(right.dimensions().TotalSize())*sizeof(DataType));
for (IndexType i = 0; i < 2; ++i) {
for (IndexType j = 0; j < 3; ++j) {
VERIFY_IS_EQUAL(left(i, j), result(i, j));
VERIFY_IS_EQUAL(right(i, j), result(i+2, j));
}
}
sycl_device.deallocate(gpu_in1_data);
sycl_device.deallocate(gpu_in2_data);
sycl_device.deallocate(gpu_out_data);
}
template <typename DataType, typename Dev_selector> void tensorConcat_perDevice(Dev_selector s){
QueueInterface queueInterface(s);
auto sycl_device = Eigen::SyclDevice(&queueInterface);
test_simple_concatenation<DataType, RowMajor, int64_t>(sycl_device);
test_simple_concatenation<DataType, ColMajor, int64_t>(sycl_device);
test_concatenation_as_lvalue<DataType, ColMajor, int64_t>(sycl_device);
}
EIGEN_DECLARE_TEST(cxx11_tensor_concatenation_sycl) {
for (const auto& device :Eigen::get_sycl_supported_devices()) {
CALL_SUBTEST(tensorConcat_perDevice<float>(device));
}
}

View File

@@ -0,0 +1,62 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
#include <Eigen/CXX11/Tensor>
using Eigen::Tensor;
static void test_simple_assign()
{
Tensor<int, 3> random(2,3,7);
random.setRandom();
TensorMap<Tensor<const int, 3> > constant(random.data(), 2, 3, 7);
Tensor<int, 3> result(2,3,7);
result = constant;
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 7; ++k) {
VERIFY_IS_EQUAL((result(i,j,k)), random(i,j,k));
}
}
}
}
static void test_assign_of_const_tensor()
{
Tensor<int, 3> random(2,3,7);
random.setRandom();
TensorMap<Tensor<const int, 3> > constant1(random.data(), 2, 3, 7);
TensorMap<const Tensor<int, 3> > constant2(random.data(), 2, 3, 7);
const TensorMap<Tensor<int, 3> > constant3(random.data(), 2, 3, 7);
Tensor<int, 2> result1 = constant1.chip(0, 2);
Tensor<int, 2> result2 = constant2.chip(0, 2);
Tensor<int, 2> result3 = constant3.chip(0, 2);
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
VERIFY_IS_EQUAL((result1(i,j)), random(i,j,0));
VERIFY_IS_EQUAL((result2(i,j)), random(i,j,0));
VERIFY_IS_EQUAL((result3(i,j)), random(i,j,0));
}
}
}
EIGEN_DECLARE_TEST(cxx11_tensor_const)
{
CALL_SUBTEST(test_simple_assign());
CALL_SUBTEST(test_assign_of_const_tensor());
}

View File

@@ -0,0 +1,218 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
// Copyright (C) 2014 Navdeep Jaitly <ndjaitly@google.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#define EIGEN_TEST_NO_LONGDOUBLE
#define EIGEN_TEST_NO_COMPLEX
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
#define EIGEN_USE_GPU
#include "main.h"
#include <unsupported/Eigen/CXX11/Tensor>
#include <unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h>
using Eigen::Tensor;
typedef Tensor<float, 1>::DimensionPair DimPair;
template<int DataLayout>
void test_gpu_contraction(int m_size, int k_size, int n_size)
{
std::cout << "Testing for (" << m_size << "," << k_size << "," << n_size << ")" << std::endl;
// with these dimensions, the output has 300 * 140 elements, which is
// more than 30 * 1024, which is the number of threads in blocks on
// a 15 SM GK110 GPU
Tensor<float, 2, DataLayout> t_left(m_size, k_size);
Tensor<float, 2, DataLayout> t_right(k_size, n_size);
Tensor<float, 2, DataLayout> t_result(m_size, n_size);
Tensor<float, 2, DataLayout> t_result_gpu(m_size, n_size);
Eigen::array<DimPair, 1> dims(DimPair(1, 0));
t_left.setRandom();
t_right.setRandom();
std::size_t t_left_bytes = t_left.size() * sizeof(float);
std::size_t t_right_bytes = t_right.size() * sizeof(float);
std::size_t t_result_bytes = t_result.size() * sizeof(float);
float* d_t_left;
float* d_t_right;
float* d_t_result;
gpuMalloc((void**)(&d_t_left), t_left_bytes);
gpuMalloc((void**)(&d_t_right), t_right_bytes);
gpuMalloc((void**)(&d_t_result), t_result_bytes);
gpuMemcpy(d_t_left, t_left.data(), t_left_bytes, gpuMemcpyHostToDevice);
gpuMemcpy(d_t_right, t_right.data(), t_right_bytes, gpuMemcpyHostToDevice);
Eigen::GpuStreamDevice stream;
Eigen::GpuDevice gpu_device(&stream);
Eigen::TensorMap<Eigen::Tensor<float, 2, DataLayout> >
gpu_t_left(d_t_left, Eigen::array<int, 2>(m_size, k_size));
Eigen::TensorMap<Eigen::Tensor<float, 2, DataLayout> >
gpu_t_right(d_t_right, Eigen::array<int, 2>(k_size, n_size));
Eigen::TensorMap<Eigen::Tensor<float, 2, DataLayout> >
gpu_t_result(d_t_result, Eigen::array<int, 2>(m_size, n_size));
gpu_t_result.device(gpu_device) = gpu_t_left.contract(gpu_t_right, dims);
t_result = t_left.contract(t_right, dims);
gpuMemcpy(t_result_gpu.data(), d_t_result, t_result_bytes, gpuMemcpyDeviceToHost);
for (DenseIndex i = 0; i < t_result.size(); i++) {
if (fabs(t_result(i) - t_result_gpu(i)) < 1e-4f) {
continue;
}
if (Eigen::internal::isApprox(t_result(i), t_result_gpu(i), 1e-4f)) {
continue;
}
std::cout << "mismatch detected at index " << i << ": " << t_result(i)
<< " vs " << t_result_gpu(i) << std::endl;
assert(false);
}
gpuFree((void*)d_t_left);
gpuFree((void*)d_t_right);
gpuFree((void*)d_t_result);
}
template<int DataLayout>
void test_scalar(int m_size, int k_size, int n_size)
{
std::cout << "Testing for (" << m_size << "," << k_size << "," << n_size << ")" << std::endl;
// with these dimensions, the output has 300 * 140 elements, which is
// more than 30 * 1024, which is the number of threads in blocks on
// a 15 SM GK110 GPU
Tensor<float, 2, DataLayout> t_left(m_size, k_size);
Tensor<float, 2, DataLayout> t_right(k_size, n_size);
Tensor<float, 0, DataLayout> t_result;
Tensor<float, 0, DataLayout> t_result_gpu;
Eigen::array<DimPair, 2> dims(DimPair(0, 0), DimPair(1, 1));
t_left.setRandom();
t_right.setRandom();
std::size_t t_left_bytes = t_left.size() * sizeof(float);
std::size_t t_right_bytes = t_right.size() * sizeof(float);
std::size_t t_result_bytes = sizeof(float);
float* d_t_left;
float* d_t_right;
float* d_t_result;
gpuMalloc((void**)(&d_t_left), t_left_bytes);
gpuMalloc((void**)(&d_t_right), t_right_bytes);
gpuMalloc((void**)(&d_t_result), t_result_bytes);
gpuMemcpy(d_t_left, t_left.data(), t_left_bytes, gpuMemcpyHostToDevice);
gpuMemcpy(d_t_right, t_right.data(), t_right_bytes, gpuMemcpyHostToDevice);
Eigen::GpuStreamDevice stream;
Eigen::GpuDevice gpu_device(&stream);
Eigen::TensorMap<Eigen::Tensor<float, 2, DataLayout> >
gpu_t_left(d_t_left, m_size, k_size);
Eigen::TensorMap<Eigen::Tensor<float, 2, DataLayout> >
gpu_t_right(d_t_right, k_size, n_size);
Eigen::TensorMap<Eigen::Tensor<float, 0, DataLayout> >
gpu_t_result(d_t_result);
gpu_t_result.device(gpu_device) = gpu_t_left.contract(gpu_t_right, dims);
t_result = t_left.contract(t_right, dims);
gpuMemcpy(t_result_gpu.data(), d_t_result, t_result_bytes, gpuMemcpyDeviceToHost);
if (fabs(t_result() - t_result_gpu()) > 1e-4f &&
!Eigen::internal::isApprox(t_result(), t_result_gpu(), 1e-4f)) {
std::cout << "mismatch detected: " << t_result()
<< " vs " << t_result_gpu() << std::endl;
assert(false);
}
gpuFree((void*)d_t_left);
gpuFree((void*)d_t_right);
gpuFree((void*)d_t_result);
}
template<int DataLayout>
void test_gpu_contraction_m() {
for (int k = 32; k < 256; k++) {
test_gpu_contraction<ColMajor>(k, 128, 128);
test_gpu_contraction<RowMajor>(k, 128, 128);
}
}
template<int DataLayout>
void test_gpu_contraction_k() {
for (int k = 32; k < 256; k++) {
test_gpu_contraction<ColMajor>(128, k, 128);
test_gpu_contraction<RowMajor>(128, k, 128);
}
}
template<int DataLayout>
void test_gpu_contraction_n() {
for (int k = 32; k < 256; k++) {
test_gpu_contraction<ColMajor>(128, 128, k);
test_gpu_contraction<RowMajor>(128, 128, k);
}
}
template<int DataLayout>
void test_gpu_contraction_sizes() {
int m_sizes[] = { 31, 39, 63, 64, 65,
127, 129, 255, 257 , 511,
512, 513, 1023, 1024, 1025};
int n_sizes[] = { 31, 39, 63, 64, 65,
127, 129, 255, 257, 511,
512, 513, 1023, 1024, 1025};
int k_sizes[] = { 31, 39, 63, 64, 65,
95, 96, 127, 129, 255,
257, 511, 512, 513, 1023,
1024, 1025};
for (int i = 0; i < 15; i++) {
for (int j = 0; j < 15; j++) {
for (int k = 0; k < 17; k++) {
test_gpu_contraction<DataLayout>(m_sizes[i], n_sizes[j], k_sizes[k]);
}
}
}
}
EIGEN_DECLARE_TEST(cxx11_tensor_contract_gpu)
{
CALL_SUBTEST_1(test_gpu_contraction<ColMajor>(128, 128, 128));
CALL_SUBTEST_1(test_gpu_contraction<RowMajor>(128, 128, 128));
CALL_SUBTEST_1(test_scalar<ColMajor>(128, 128, 128));
CALL_SUBTEST_1(test_scalar<RowMajor>(128, 128, 128));
CALL_SUBTEST_2(test_gpu_contraction_m<ColMajor>());
CALL_SUBTEST_3(test_gpu_contraction_m<RowMajor>());
CALL_SUBTEST_4(test_gpu_contraction_k<ColMajor>());
CALL_SUBTEST_5(test_gpu_contraction_k<RowMajor>());
CALL_SUBTEST_6(test_gpu_contraction_n<ColMajor>());
CALL_SUBTEST_7(test_gpu_contraction_n<RowMajor>());
#if !defined(EIGEN_USE_HIP)
// disable these subtests for HIP
CALL_SUBTEST_8(test_gpu_contraction_sizes<ColMajor>());
CALL_SUBTEST_9(test_gpu_contraction_sizes<RowMajor>());
#endif
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,601 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
#include <Eigen/CXX11/Tensor>
using Eigen::DefaultDevice;
using Eigen::Tensor;
typedef Tensor<float, 1>::DimensionPair DimPair;
template<int DataLayout>
static void test_evals()
{
Tensor<float, 2, DataLayout> mat1(2, 3);
Tensor<float, 2, DataLayout> mat2(2, 3);
Tensor<float, 2, DataLayout> mat3(3, 2);
mat1.setRandom();
mat2.setRandom();
mat3.setRandom();
Tensor<float, 2, DataLayout> mat4(3,3);
mat4.setZero();
Eigen::array<DimPair, 1> dims3 = {{DimPair(0, 0)}};
typedef TensorEvaluator<decltype(mat1.contract(mat2, dims3)), DefaultDevice> Evaluator;
Evaluator eval(mat1.contract(mat2, dims3), DefaultDevice());
eval.evalTo(mat4.data());
EIGEN_STATIC_ASSERT(Evaluator::NumDims==2ul, YOU_MADE_A_PROGRAMMING_MISTAKE);
VERIFY_IS_EQUAL(eval.dimensions()[0], 3);
VERIFY_IS_EQUAL(eval.dimensions()[1], 3);
VERIFY_IS_APPROX(mat4(0,0), mat1(0,0)*mat2(0,0) + mat1(1,0)*mat2(1,0));
VERIFY_IS_APPROX(mat4(0,1), mat1(0,0)*mat2(0,1) + mat1(1,0)*mat2(1,1));
VERIFY_IS_APPROX(mat4(0,2), mat1(0,0)*mat2(0,2) + mat1(1,0)*mat2(1,2));
VERIFY_IS_APPROX(mat4(1,0), mat1(0,1)*mat2(0,0) + mat1(1,1)*mat2(1,0));
VERIFY_IS_APPROX(mat4(1,1), mat1(0,1)*mat2(0,1) + mat1(1,1)*mat2(1,1));
VERIFY_IS_APPROX(mat4(1,2), mat1(0,1)*mat2(0,2) + mat1(1,1)*mat2(1,2));
VERIFY_IS_APPROX(mat4(2,0), mat1(0,2)*mat2(0,0) + mat1(1,2)*mat2(1,0));
VERIFY_IS_APPROX(mat4(2,1), mat1(0,2)*mat2(0,1) + mat1(1,2)*mat2(1,1));
VERIFY_IS_APPROX(mat4(2,2), mat1(0,2)*mat2(0,2) + mat1(1,2)*mat2(1,2));
Tensor<float, 2, DataLayout> mat5(2,2);
mat5.setZero();
Eigen::array<DimPair, 1> dims4 = {{DimPair(1, 1)}};
typedef TensorEvaluator<decltype(mat1.contract(mat2, dims4)), DefaultDevice> Evaluator2;
Evaluator2 eval2(mat1.contract(mat2, dims4), DefaultDevice());
eval2.evalTo(mat5.data());
EIGEN_STATIC_ASSERT(Evaluator2::NumDims==2ul, YOU_MADE_A_PROGRAMMING_MISTAKE);
VERIFY_IS_EQUAL(eval2.dimensions()[0], 2);
VERIFY_IS_EQUAL(eval2.dimensions()[1], 2);
VERIFY_IS_APPROX(mat5(0,0), mat1(0,0)*mat2(0,0) + mat1(0,1)*mat2(0,1) + mat1(0,2)*mat2(0,2));
VERIFY_IS_APPROX(mat5(0,1), mat1(0,0)*mat2(1,0) + mat1(0,1)*mat2(1,1) + mat1(0,2)*mat2(1,2));
VERIFY_IS_APPROX(mat5(1,0), mat1(1,0)*mat2(0,0) + mat1(1,1)*mat2(0,1) + mat1(1,2)*mat2(0,2));
VERIFY_IS_APPROX(mat5(1,1), mat1(1,0)*mat2(1,0) + mat1(1,1)*mat2(1,1) + mat1(1,2)*mat2(1,2));
Tensor<float, 2, DataLayout> mat6(2,2);
mat6.setZero();
Eigen::array<DimPair, 1> dims6 = {{DimPair(1, 0)}};
typedef TensorEvaluator<decltype(mat1.contract(mat3, dims6)), DefaultDevice> Evaluator3;
Evaluator3 eval3(mat1.contract(mat3, dims6), DefaultDevice());
eval3.evalTo(mat6.data());
EIGEN_STATIC_ASSERT(Evaluator3::NumDims==2ul, YOU_MADE_A_PROGRAMMING_MISTAKE);
VERIFY_IS_EQUAL(eval3.dimensions()[0], 2);
VERIFY_IS_EQUAL(eval3.dimensions()[1], 2);
VERIFY_IS_APPROX(mat6(0,0), mat1(0,0)*mat3(0,0) + mat1(0,1)*mat3(1,0) + mat1(0,2)*mat3(2,0));
VERIFY_IS_APPROX(mat6(0,1), mat1(0,0)*mat3(0,1) + mat1(0,1)*mat3(1,1) + mat1(0,2)*mat3(2,1));
VERIFY_IS_APPROX(mat6(1,0), mat1(1,0)*mat3(0,0) + mat1(1,1)*mat3(1,0) + mat1(1,2)*mat3(2,0));
VERIFY_IS_APPROX(mat6(1,1), mat1(1,0)*mat3(0,1) + mat1(1,1)*mat3(1,1) + mat1(1,2)*mat3(2,1));
}
template<int DataLayout>
static void test_scalar()
{
Tensor<float, 1, DataLayout> vec1({6});
Tensor<float, 1, DataLayout> vec2({6});
vec1.setRandom();
vec2.setRandom();
Eigen::array<DimPair, 1> dims = {{DimPair(0, 0)}};
Tensor<float, 0, DataLayout> scalar = vec1.contract(vec2, dims);
float expected = 0.0f;
for (int i = 0; i < 6; ++i) {
expected += vec1(i) * vec2(i);
}
VERIFY_IS_APPROX(scalar(), expected);
}
template<int DataLayout>
static void test_multidims()
{
Tensor<float, 3, DataLayout> mat1(2, 2, 2);
Tensor<float, 4, DataLayout> mat2(2, 2, 2, 2);
mat1.setRandom();
mat2.setRandom();
Tensor<float, 3, DataLayout> mat3(2, 2, 2);
mat3.setZero();
Eigen::array<DimPair, 2> dims = {{DimPair(1, 2), DimPair(2, 3)}};
typedef TensorEvaluator<decltype(mat1.contract(mat2, dims)), DefaultDevice> Evaluator;
Evaluator eval(mat1.contract(mat2, dims), DefaultDevice());
eval.evalTo(mat3.data());
EIGEN_STATIC_ASSERT(Evaluator::NumDims==3ul, YOU_MADE_A_PROGRAMMING_MISTAKE);
VERIFY_IS_EQUAL(eval.dimensions()[0], 2);
VERIFY_IS_EQUAL(eval.dimensions()[1], 2);
VERIFY_IS_EQUAL(eval.dimensions()[2], 2);
VERIFY_IS_APPROX(mat3(0,0,0), mat1(0,0,0)*mat2(0,0,0,0) + mat1(0,1,0)*mat2(0,0,1,0) +
mat1(0,0,1)*mat2(0,0,0,1) + mat1(0,1,1)*mat2(0,0,1,1));
VERIFY_IS_APPROX(mat3(0,0,1), mat1(0,0,0)*mat2(0,1,0,0) + mat1(0,1,0)*mat2(0,1,1,0) +
mat1(0,0,1)*mat2(0,1,0,1) + mat1(0,1,1)*mat2(0,1,1,1));
VERIFY_IS_APPROX(mat3(0,1,0), mat1(0,0,0)*mat2(1,0,0,0) + mat1(0,1,0)*mat2(1,0,1,0) +
mat1(0,0,1)*mat2(1,0,0,1) + mat1(0,1,1)*mat2(1,0,1,1));
VERIFY_IS_APPROX(mat3(0,1,1), mat1(0,0,0)*mat2(1,1,0,0) + mat1(0,1,0)*mat2(1,1,1,0) +
mat1(0,0,1)*mat2(1,1,0,1) + mat1(0,1,1)*mat2(1,1,1,1));
VERIFY_IS_APPROX(mat3(1,0,0), mat1(1,0,0)*mat2(0,0,0,0) + mat1(1,1,0)*mat2(0,0,1,0) +
mat1(1,0,1)*mat2(0,0,0,1) + mat1(1,1,1)*mat2(0,0,1,1));
VERIFY_IS_APPROX(mat3(1,0,1), mat1(1,0,0)*mat2(0,1,0,0) + mat1(1,1,0)*mat2(0,1,1,0) +
mat1(1,0,1)*mat2(0,1,0,1) + mat1(1,1,1)*mat2(0,1,1,1));
VERIFY_IS_APPROX(mat3(1,1,0), mat1(1,0,0)*mat2(1,0,0,0) + mat1(1,1,0)*mat2(1,0,1,0) +
mat1(1,0,1)*mat2(1,0,0,1) + mat1(1,1,1)*mat2(1,0,1,1));
VERIFY_IS_APPROX(mat3(1,1,1), mat1(1,0,0)*mat2(1,1,0,0) + mat1(1,1,0)*mat2(1,1,1,0) +
mat1(1,0,1)*mat2(1,1,0,1) + mat1(1,1,1)*mat2(1,1,1,1));
Tensor<float, 2, DataLayout> mat4(2, 2);
Tensor<float, 3, DataLayout> mat5(2, 2, 2);
mat4.setRandom();
mat5.setRandom();
Tensor<float, 1, DataLayout> mat6(2);
mat6.setZero();
Eigen::array<DimPair, 2> dims2({{DimPair(0, 1), DimPair(1, 0)}});
typedef TensorEvaluator<decltype(mat4.contract(mat5, dims2)), DefaultDevice> Evaluator2;
Evaluator2 eval2(mat4.contract(mat5, dims2), DefaultDevice());
eval2.evalTo(mat6.data());
EIGEN_STATIC_ASSERT(Evaluator2::NumDims==1ul, YOU_MADE_A_PROGRAMMING_MISTAKE);
VERIFY_IS_EQUAL(eval2.dimensions()[0], 2);
VERIFY_IS_APPROX(mat6(0), mat4(0,0)*mat5(0,0,0) + mat4(1,0)*mat5(0,1,0) +
mat4(0,1)*mat5(1,0,0) + mat4(1,1)*mat5(1,1,0));
VERIFY_IS_APPROX(mat6(1), mat4(0,0)*mat5(0,0,1) + mat4(1,0)*mat5(0,1,1) +
mat4(0,1)*mat5(1,0,1) + mat4(1,1)*mat5(1,1,1));
}
template<int DataLayout>
static void test_holes() {
Tensor<float, 4, DataLayout> t1(2, 5, 7, 3);
Tensor<float, 5, DataLayout> t2(2, 7, 11, 13, 3);
t1.setRandom();
t2.setRandom();
Eigen::array<DimPair, 2> dims = {{DimPair(0, 0), DimPair(3, 4)}};
Tensor<float, 5, DataLayout> result = t1.contract(t2, dims);
VERIFY_IS_EQUAL(result.dimension(0), 5);
VERIFY_IS_EQUAL(result.dimension(1), 7);
VERIFY_IS_EQUAL(result.dimension(2), 7);
VERIFY_IS_EQUAL(result.dimension(3), 11);
VERIFY_IS_EQUAL(result.dimension(4), 13);
for (int i = 0; i < 5; ++i) {
for (int j = 0; j < 5; ++j) {
for (int k = 0; k < 5; ++k) {
for (int l = 0; l < 5; ++l) {
for (int m = 0; m < 5; ++m) {
VERIFY_IS_APPROX(result(i, j, k, l, m),
t1(0, i, j, 0) * t2(0, k, l, m, 0) +
t1(1, i, j, 0) * t2(1, k, l, m, 0) +
t1(0, i, j, 1) * t2(0, k, l, m, 1) +
t1(1, i, j, 1) * t2(1, k, l, m, 1) +
t1(0, i, j, 2) * t2(0, k, l, m, 2) +
t1(1, i, j, 2) * t2(1, k, l, m, 2));
}
}
}
}
}
}
template<int DataLayout>
static void test_full_redux()
{
Tensor<float, 2, DataLayout> t1(2, 2);
Tensor<float, 3, DataLayout> t2(2, 2, 2);
t1.setRandom();
t2.setRandom();
Eigen::array<DimPair, 2> dims = {{DimPair(0, 0), DimPair(1, 1)}};
Tensor<float, 1, DataLayout> result = t1.contract(t2, dims);
VERIFY_IS_EQUAL(result.dimension(0), 2);
VERIFY_IS_APPROX(result(0), t1(0, 0) * t2(0, 0, 0) + t1(1, 0) * t2(1, 0, 0)
+ t1(0, 1) * t2(0, 1, 0) + t1(1, 1) * t2(1, 1, 0));
VERIFY_IS_APPROX(result(1), t1(0, 0) * t2(0, 0, 1) + t1(1, 0) * t2(1, 0, 1)
+ t1(0, 1) * t2(0, 1, 1) + t1(1, 1) * t2(1, 1, 1));
dims[0] = DimPair(1, 0);
dims[1] = DimPair(2, 1);
result = t2.contract(t1, dims);
VERIFY_IS_EQUAL(result.dimension(0), 2);
VERIFY_IS_APPROX(result(0), t1(0, 0) * t2(0, 0, 0) + t1(1, 0) * t2(0, 1, 0)
+ t1(0, 1) * t2(0, 0, 1) + t1(1, 1) * t2(0, 1, 1));
VERIFY_IS_APPROX(result(1), t1(0, 0) * t2(1, 0, 0) + t1(1, 0) * t2(1, 1, 0)
+ t1(0, 1) * t2(1, 0, 1) + t1(1, 1) * t2(1, 1, 1));
}
template<int DataLayout>
static void test_contraction_of_contraction()
{
Tensor<float, 2, DataLayout> t1(2, 2);
Tensor<float, 2, DataLayout> t2(2, 2);
Tensor<float, 2, DataLayout> t3(2, 2);
Tensor<float, 2, DataLayout> t4(2, 2);
t1.setRandom();
t2.setRandom();
t3.setRandom();
t4.setRandom();
Eigen::array<DimPair, 1> dims = {{DimPair(1, 0)}};
auto contract1 = t1.contract(t2, dims);
auto diff = t3 - contract1;
auto contract2 = t1.contract(t4, dims);
Tensor<float, 2, DataLayout> result = contract2.contract(diff, dims);
VERIFY_IS_EQUAL(result.dimension(0), 2);
VERIFY_IS_EQUAL(result.dimension(1), 2);
Eigen::Map<Eigen::Matrix<float, Dynamic, Dynamic, DataLayout>>
m1(t1.data(), 2, 2), m2(t2.data(), 2, 2), m3(t3.data(), 2, 2),
m4(t4.data(), 2, 2);
Eigen::Matrix<float, Dynamic, Dynamic, DataLayout>
expected = (m1 * m4) * (m3 - m1 * m2);
VERIFY_IS_APPROX(result(0, 0), expected(0, 0));
VERIFY_IS_APPROX(result(0, 1), expected(0, 1));
VERIFY_IS_APPROX(result(1, 0), expected(1, 0));
VERIFY_IS_APPROX(result(1, 1), expected(1, 1));
}
template<int DataLayout>
static void test_expr()
{
Tensor<float, 2, DataLayout> mat1(2, 3);
Tensor<float, 2, DataLayout> mat2(3, 2);
mat1.setRandom();
mat2.setRandom();
Tensor<float, 2, DataLayout> mat3(2,2);
Eigen::array<DimPair, 1> dims = {{DimPair(1, 0)}};
mat3 = mat1.contract(mat2, dims);
VERIFY_IS_APPROX(mat3(0,0), mat1(0,0)*mat2(0,0) + mat1(0,1)*mat2(1,0) + mat1(0,2)*mat2(2,0));
VERIFY_IS_APPROX(mat3(0,1), mat1(0,0)*mat2(0,1) + mat1(0,1)*mat2(1,1) + mat1(0,2)*mat2(2,1));
VERIFY_IS_APPROX(mat3(1,0), mat1(1,0)*mat2(0,0) + mat1(1,1)*mat2(1,0) + mat1(1,2)*mat2(2,0));
VERIFY_IS_APPROX(mat3(1,1), mat1(1,0)*mat2(0,1) + mat1(1,1)*mat2(1,1) + mat1(1,2)*mat2(2,1));
}
template<int DataLayout>
static void test_out_of_order_contraction()
{
Tensor<float, 3, DataLayout> mat1(2, 2, 2);
Tensor<float, 3, DataLayout> mat2(2, 2, 2);
mat1.setRandom();
mat2.setRandom();
Tensor<float, 2, DataLayout> mat3(2, 2);
Eigen::array<DimPair, 2> dims = {{DimPair(2, 0), DimPair(0, 2)}};
mat3 = mat1.contract(mat2, dims);
VERIFY_IS_APPROX(mat3(0, 0),
mat1(0,0,0)*mat2(0,0,0) + mat1(1,0,0)*mat2(0,0,1) +
mat1(0,0,1)*mat2(1,0,0) + mat1(1,0,1)*mat2(1,0,1));
VERIFY_IS_APPROX(mat3(1, 0),
mat1(0,1,0)*mat2(0,0,0) + mat1(1,1,0)*mat2(0,0,1) +
mat1(0,1,1)*mat2(1,0,0) + mat1(1,1,1)*mat2(1,0,1));
VERIFY_IS_APPROX(mat3(0, 1),
mat1(0,0,0)*mat2(0,1,0) + mat1(1,0,0)*mat2(0,1,1) +
mat1(0,0,1)*mat2(1,1,0) + mat1(1,0,1)*mat2(1,1,1));
VERIFY_IS_APPROX(mat3(1, 1),
mat1(0,1,0)*mat2(0,1,0) + mat1(1,1,0)*mat2(0,1,1) +
mat1(0,1,1)*mat2(1,1,0) + mat1(1,1,1)*mat2(1,1,1));
Eigen::array<DimPair, 2> dims2 = {{DimPair(0, 2), DimPair(2, 0)}};
mat3 = mat1.contract(mat2, dims2);
VERIFY_IS_APPROX(mat3(0, 0),
mat1(0,0,0)*mat2(0,0,0) + mat1(1,0,0)*mat2(0,0,1) +
mat1(0,0,1)*mat2(1,0,0) + mat1(1,0,1)*mat2(1,0,1));
VERIFY_IS_APPROX(mat3(1, 0),
mat1(0,1,0)*mat2(0,0,0) + mat1(1,1,0)*mat2(0,0,1) +
mat1(0,1,1)*mat2(1,0,0) + mat1(1,1,1)*mat2(1,0,1));
VERIFY_IS_APPROX(mat3(0, 1),
mat1(0,0,0)*mat2(0,1,0) + mat1(1,0,0)*mat2(0,1,1) +
mat1(0,0,1)*mat2(1,1,0) + mat1(1,0,1)*mat2(1,1,1));
VERIFY_IS_APPROX(mat3(1, 1),
mat1(0,1,0)*mat2(0,1,0) + mat1(1,1,0)*mat2(0,1,1) +
mat1(0,1,1)*mat2(1,1,0) + mat1(1,1,1)*mat2(1,1,1));
}
template<int DataLayout>
static void test_consistency()
{
// this does something like testing (A*B)^T = (B^T * A^T)
Tensor<float, 3, DataLayout> mat1(4, 3, 5);
Tensor<float, 5, DataLayout> mat2(3, 2, 1, 5, 4);
mat1.setRandom();
mat2.setRandom();
Tensor<float, 4, DataLayout> mat3(5, 2, 1, 5);
Tensor<float, 4, DataLayout> mat4(2, 1, 5, 5);
// contract on dimensions of size 4 and 3
Eigen::array<DimPair, 2> dims1 = {{DimPair(0, 4), DimPair(1, 0)}};
Eigen::array<DimPair, 2> dims2 = {{DimPair(4, 0), DimPair(0, 1)}};
mat3 = mat1.contract(mat2, dims1);
mat4 = mat2.contract(mat1, dims2);
// check that these are equal except for ordering of dimensions
if (DataLayout == ColMajor) {
for (size_t i = 0; i < 5; i++) {
for (size_t j = 0; j < 10; j++) {
VERIFY_IS_APPROX(mat3.data()[i + 5 * j], mat4.data()[j + 10 * i]);
}
}
} else {
// Row major
for (size_t i = 0; i < 5; i++) {
for (size_t j = 0; j < 10; j++) {
VERIFY_IS_APPROX(mat3.data()[10 * i + j], mat4.data()[i + 5 * j]);
}
}
}
}
template<int DataLayout>
static void test_large_contraction()
{
Tensor<float, 4, DataLayout> t_left(30, 50, 8, 31);
Tensor<float, 5, DataLayout> t_right(8, 31, 7, 20, 10);
Tensor<float, 5, DataLayout> t_result(30, 50, 7, 20, 10);
t_left.setRandom();
t_right.setRandom();
// Add a little offset so that the results won't be close to zero.
t_left += t_left.constant(1.0f);
t_right += t_right.constant(1.0f);
typedef Map<Eigen::Matrix<float, Dynamic, Dynamic, DataLayout>> MapXf;
MapXf m_left(t_left.data(), 1500, 248);
MapXf m_right(t_right.data(), 248, 1400);
Eigen::Matrix<float, Dynamic, Dynamic, DataLayout> m_result(1500, 1400);
// this contraction should be equivalent to a single matrix multiplication
Eigen::array<DimPair, 2> dims = {{DimPair(2, 0), DimPair(3, 1)}};
// compute results by separate methods
t_result = t_left.contract(t_right, dims);
m_result = m_left * m_right;
for (int i = 0; i < t_result.dimensions().TotalSize(); i++) {
VERIFY(&t_result.data()[i] != &m_result.data()[i]);
VERIFY_IS_APPROX(t_result.data()[i], m_result.data()[i]);
}
}
template<int DataLayout>
static void test_matrix_vector()
{
Tensor<float, 2, DataLayout> t_left(30, 50);
Tensor<float, 1, DataLayout> t_right(50);
Tensor<float, 1, DataLayout> t_result(30);
t_left.setRandom();
t_right.setRandom();
typedef Map<Eigen::Matrix<float, Dynamic, Dynamic, DataLayout>> MapXf;
MapXf m_left(t_left.data(), 30, 50);
MapXf m_right(t_right.data(), 50, 1);
Eigen::Matrix<float, Dynamic, Dynamic, DataLayout> m_result(30, 1);
// this contraction should be equivalent to a single matrix multiplication
Eigen::array<DimPair, 1> dims{{DimPair(1, 0)}};
// compute results by separate methods
t_result = t_left.contract(t_right, dims);
m_result = m_left * m_right;
for (int i = 0; i < t_result.dimensions().TotalSize(); i++) {
VERIFY(internal::isApprox(t_result(i), m_result(i, 0), 1));
}
}
template<int DataLayout>
static void test_tensor_vector()
{
Tensor<float, 3, DataLayout> t_left(7, 13, 17);
Tensor<float, 2, DataLayout> t_right(1, 7);
t_left.setRandom();
t_right.setRandom();
typedef typename Tensor<float, 1, DataLayout>::DimensionPair DimensionPair;
Eigen::array<DimensionPair, 1> dim_pair01{{{0, 1}}};
Tensor<float, 3, DataLayout> t_result = t_left.contract(t_right, dim_pair01);
typedef Map<Eigen::Matrix<float, Dynamic, Dynamic, DataLayout>> MapXf;
MapXf m_left(t_left.data(), 7, 13*17);
MapXf m_right(t_right.data(), 1, 7);
Eigen::Matrix<float, Dynamic, Dynamic, DataLayout> m_result = m_left.transpose() * m_right.transpose();
for (int i = 0; i < t_result.dimensions().TotalSize(); i++) {
VERIFY(internal::isApprox(t_result(i), m_result(i, 0), 1));
}
}
template<int DataLayout>
static void test_small_blocking_factors()
{
Tensor<float, 4, DataLayout> t_left(30, 5, 3, 31);
Tensor<float, 5, DataLayout> t_right(3, 31, 7, 20, 1);
t_left.setRandom();
t_right.setRandom();
// Add a little offset so that the results won't be close to zero.
t_left += t_left.constant(1.0f);
t_right += t_right.constant(1.0f);
// Force the cache sizes, which results in smaller blocking factors.
Eigen::setCpuCacheSizes(896, 1920, 2944);
// this contraction should be equivalent to a single matrix multiplication
Eigen::array<DimPair, 2> dims = {{DimPair(2, 0), DimPair(3, 1)}};
Tensor<float, 5, DataLayout> t_result;
t_result = t_left.contract(t_right, dims);
// compute result using a simple eigen matrix product
Map<Eigen::Matrix<float, Dynamic, Dynamic, DataLayout>> m_left(t_left.data(), 150, 93);
Map<Eigen::Matrix<float, Dynamic, Dynamic, DataLayout>> m_right(t_right.data(), 93, 140);
Eigen::Matrix<float, Dynamic, Dynamic, DataLayout> m_result = m_left * m_right;
for (int i = 0; i < t_result.dimensions().TotalSize(); i++) {
VERIFY_IS_APPROX(t_result.data()[i], m_result.data()[i]);
}
}
template<int DataLayout>
static void test_tensor_product()
{
Tensor<float, 2, DataLayout> mat1(2, 3);
Tensor<float, 2, DataLayout> mat2(4, 1);
mat1.setRandom();
mat2.setRandom();
Eigen::array<DimPair, 0> dims;
Tensor<float, 4, DataLayout> result = mat1.contract(mat2, dims);
VERIFY_IS_EQUAL(result.dimension(0), 2);
VERIFY_IS_EQUAL(result.dimension(1), 3);
VERIFY_IS_EQUAL(result.dimension(2), 4);
VERIFY_IS_EQUAL(result.dimension(3), 1);
for (int i = 0; i < result.dimension(0); ++i) {
for (int j = 0; j < result.dimension(1); ++j) {
for (int k = 0; k < result.dimension(2); ++k) {
for (int l = 0; l < result.dimension(3); ++l) {
VERIFY_IS_APPROX(result(i, j, k, l), mat1(i, j) * mat2(k, l) );
}
}
}
}
}
template<int DataLayout>
static void test_const_inputs()
{
Tensor<float, 2, DataLayout> in1(2, 3);
Tensor<float, 2, DataLayout> in2(3, 2);
in1.setRandom();
in2.setRandom();
TensorMap<Tensor<const float, 2, DataLayout> > mat1(in1.data(), 2, 3);
TensorMap<Tensor<const float, 2, DataLayout> > mat2(in2.data(), 3, 2);
Tensor<float, 2, DataLayout> mat3(2,2);
Eigen::array<DimPair, 1> dims = {{DimPair(1, 0)}};
mat3 = mat1.contract(mat2, dims);
VERIFY_IS_APPROX(mat3(0,0), mat1(0,0)*mat2(0,0) + mat1(0,1)*mat2(1,0) + mat1(0,2)*mat2(2,0));
VERIFY_IS_APPROX(mat3(0,1), mat1(0,0)*mat2(0,1) + mat1(0,1)*mat2(1,1) + mat1(0,2)*mat2(2,1));
VERIFY_IS_APPROX(mat3(1,0), mat1(1,0)*mat2(0,0) + mat1(1,1)*mat2(1,0) + mat1(1,2)*mat2(2,0));
VERIFY_IS_APPROX(mat3(1,1), mat1(1,0)*mat2(0,1) + mat1(1,1)*mat2(1,1) + mat1(1,2)*mat2(2,1));
}
// Apply Sqrt to all output elements.
struct SqrtOutputKernel {
template <typename Index, typename Scalar>
EIGEN_ALWAYS_INLINE void operator()(
const internal::blas_data_mapper<Scalar, Index, ColMajor>& output_mapper,
const TensorContractionParams&, Index, Index, Index num_rows,
Index num_cols) const {
for (int i = 0; i < num_rows; ++i) {
for (int j = 0; j < num_cols; ++j) {
output_mapper(i, j) = std::sqrt(output_mapper(i, j));
}
}
}
};
template <int DataLayout>
static void test_large_contraction_with_output_kernel() {
Tensor<float, 4, DataLayout> t_left(30, 50, 8, 31);
Tensor<float, 5, DataLayout> t_right(8, 31, 7, 20, 10);
Tensor<float, 5, DataLayout> t_result(30, 50, 7, 20, 10);
t_left.setRandom();
t_right.setRandom();
// Put trash in mat4 to verify contraction clears output memory.
t_result.setRandom();
// Add a little offset so that the results won't be close to zero.
t_left += t_left.constant(1.0f);
t_right += t_right.constant(1.0f);
typedef Map<Eigen::Matrix<float, Dynamic, Dynamic, DataLayout>> MapXf;
MapXf m_left(t_left.data(), 1500, 248);
MapXf m_right(t_right.data(), 248, 1400);
Eigen::Matrix<float, Dynamic, Dynamic, DataLayout> m_result(1500, 1400);
// this contraction should be equivalent to a single matrix multiplication
Eigen::array<DimPair, 2> dims({{DimPair(2, 0), DimPair(3, 1)}});
// compute results by separate methods
t_result = t_left.contract(t_right, dims, SqrtOutputKernel());
m_result = m_left * m_right;
for (std::ptrdiff_t i = 0; i < t_result.dimensions().TotalSize(); i++) {
VERIFY(&t_result.data()[i] != &m_result.data()[i]);
VERIFY_IS_APPROX(t_result.data()[i], std::sqrt(m_result.data()[i]));
}
}
EIGEN_DECLARE_TEST(cxx11_tensor_contraction)
{
CALL_SUBTEST_1(test_evals<ColMajor>());
CALL_SUBTEST_1(test_evals<RowMajor>());
CALL_SUBTEST_1(test_scalar<ColMajor>());
CALL_SUBTEST_1(test_scalar<RowMajor>());
CALL_SUBTEST_2(test_multidims<ColMajor>());
CALL_SUBTEST_2(test_multidims<RowMajor>());
CALL_SUBTEST_2(test_holes<ColMajor>());
CALL_SUBTEST_2(test_holes<RowMajor>());
CALL_SUBTEST_3(test_full_redux<ColMajor>());
CALL_SUBTEST_3(test_full_redux<RowMajor>());
CALL_SUBTEST_3(test_contraction_of_contraction<ColMajor>());
CALL_SUBTEST_3(test_contraction_of_contraction<RowMajor>());
CALL_SUBTEST_4(test_expr<ColMajor>());
CALL_SUBTEST_4(test_expr<RowMajor>());
CALL_SUBTEST_4(test_out_of_order_contraction<ColMajor>());
CALL_SUBTEST_4(test_out_of_order_contraction<RowMajor>());
CALL_SUBTEST_5(test_consistency<ColMajor>());
CALL_SUBTEST_5(test_consistency<RowMajor>());
CALL_SUBTEST_5(test_large_contraction<ColMajor>());
CALL_SUBTEST_5(test_large_contraction<RowMajor>());
CALL_SUBTEST_6(test_matrix_vector<ColMajor>());
CALL_SUBTEST_6(test_matrix_vector<RowMajor>());
CALL_SUBTEST_6(test_tensor_vector<ColMajor>());
CALL_SUBTEST_6(test_tensor_vector<RowMajor>());
CALL_SUBTEST_7(test_small_blocking_factors<ColMajor>());
CALL_SUBTEST_7(test_small_blocking_factors<RowMajor>());
CALL_SUBTEST_7(test_tensor_product<ColMajor>());
CALL_SUBTEST_7(test_tensor_product<RowMajor>());
CALL_SUBTEST_8(test_const_inputs<ColMajor>());
CALL_SUBTEST_8(test_const_inputs<RowMajor>());
CALL_SUBTEST_8(test_large_contraction_with_output_kernel<ColMajor>());
CALL_SUBTEST_8(test_large_contraction_with_output_kernel<RowMajor>());
// Force CMake to split this test.
// EIGEN_SUFFIXES;1;2;3;4;5;6;7;8
}

View File

@@ -0,0 +1,150 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
#include <Eigen/CXX11/Tensor>
using Eigen::Tensor;
using Eigen::DefaultDevice;
template <int DataLayout>
static void test_evals()
{
Tensor<float, 2, DataLayout> input(3, 3);
Tensor<float, 1, DataLayout> kernel(2);
input.setRandom();
kernel.setRandom();
Tensor<float, 2, DataLayout> result(2,3);
result.setZero();
Eigen::array<Tensor<float, 2>::Index, 1> dims3;
dims3[0] = 0;
typedef TensorEvaluator<decltype(input.convolve(kernel, dims3)), DefaultDevice> Evaluator;
Evaluator eval(input.convolve(kernel, dims3), DefaultDevice());
eval.evalTo(result.data());
EIGEN_STATIC_ASSERT(Evaluator::NumDims==2ul, YOU_MADE_A_PROGRAMMING_MISTAKE);
VERIFY_IS_EQUAL(eval.dimensions()[0], 2);
VERIFY_IS_EQUAL(eval.dimensions()[1], 3);
VERIFY_IS_APPROX(result(0,0), input(0,0)*kernel(0) + input(1,0)*kernel(1)); // index 0
VERIFY_IS_APPROX(result(0,1), input(0,1)*kernel(0) + input(1,1)*kernel(1)); // index 2
VERIFY_IS_APPROX(result(0,2), input(0,2)*kernel(0) + input(1,2)*kernel(1)); // index 4
VERIFY_IS_APPROX(result(1,0), input(1,0)*kernel(0) + input(2,0)*kernel(1)); // index 1
VERIFY_IS_APPROX(result(1,1), input(1,1)*kernel(0) + input(2,1)*kernel(1)); // index 3
VERIFY_IS_APPROX(result(1,2), input(1,2)*kernel(0) + input(2,2)*kernel(1)); // index 5
}
template <int DataLayout>
static void test_expr()
{
Tensor<float, 2, DataLayout> input(3, 3);
Tensor<float, 2, DataLayout> kernel(2, 2);
input.setRandom();
kernel.setRandom();
Tensor<float, 2, DataLayout> result(2,2);
Eigen::array<ptrdiff_t, 2> dims;
dims[0] = 0;
dims[1] = 1;
result = input.convolve(kernel, dims);
VERIFY_IS_APPROX(result(0,0), input(0,0)*kernel(0,0) + input(0,1)*kernel(0,1) +
input(1,0)*kernel(1,0) + input(1,1)*kernel(1,1));
VERIFY_IS_APPROX(result(0,1), input(0,1)*kernel(0,0) + input(0,2)*kernel(0,1) +
input(1,1)*kernel(1,0) + input(1,2)*kernel(1,1));
VERIFY_IS_APPROX(result(1,0), input(1,0)*kernel(0,0) + input(1,1)*kernel(0,1) +
input(2,0)*kernel(1,0) + input(2,1)*kernel(1,1));
VERIFY_IS_APPROX(result(1,1), input(1,1)*kernel(0,0) + input(1,2)*kernel(0,1) +
input(2,1)*kernel(1,0) + input(2,2)*kernel(1,1));
}
template <int DataLayout>
static void test_modes() {
Tensor<float, 1, DataLayout> input(3);
Tensor<float, 1, DataLayout> kernel(3);
input(0) = 1.0f;
input(1) = 2.0f;
input(2) = 3.0f;
kernel(0) = 0.5f;
kernel(1) = 1.0f;
kernel(2) = 0.0f;
Eigen::array<ptrdiff_t, 1> dims;
dims[0] = 0;
Eigen::array<std::pair<ptrdiff_t, ptrdiff_t>, 1> padding;
// Emulate VALID mode (as defined in
// http://docs.scipy.org/doc/numpy/reference/generated/numpy.convolve.html).
padding[0] = std::make_pair(0, 0);
Tensor<float, 1, DataLayout> valid(1);
valid = input.pad(padding).convolve(kernel, dims);
VERIFY_IS_EQUAL(valid.dimension(0), 1);
VERIFY_IS_APPROX(valid(0), 2.5f);
// Emulate SAME mode (as defined in
// http://docs.scipy.org/doc/numpy/reference/generated/numpy.convolve.html).
padding[0] = std::make_pair(1, 1);
Tensor<float, 1, DataLayout> same(3);
same = input.pad(padding).convolve(kernel, dims);
VERIFY_IS_EQUAL(same.dimension(0), 3);
VERIFY_IS_APPROX(same(0), 1.0f);
VERIFY_IS_APPROX(same(1), 2.5f);
VERIFY_IS_APPROX(same(2), 4.0f);
// Emulate FULL mode (as defined in
// http://docs.scipy.org/doc/numpy/reference/generated/numpy.convolve.html).
padding[0] = std::make_pair(2, 2);
Tensor<float, 1, DataLayout> full(5);
full = input.pad(padding).convolve(kernel, dims);
VERIFY_IS_EQUAL(full.dimension(0), 5);
VERIFY_IS_APPROX(full(0), 0.0f);
VERIFY_IS_APPROX(full(1), 1.0f);
VERIFY_IS_APPROX(full(2), 2.5f);
VERIFY_IS_APPROX(full(3), 4.0f);
VERIFY_IS_APPROX(full(4), 1.5f);
}
template <int DataLayout>
static void test_strides() {
Tensor<float, 1, DataLayout> input(13);
Tensor<float, 1, DataLayout> kernel(3);
input.setRandom();
kernel.setRandom();
Eigen::array<ptrdiff_t, 1> dims;
dims[0] = 0;
Eigen::array<ptrdiff_t, 1> stride_of_3;
stride_of_3[0] = 3;
Eigen::array<ptrdiff_t, 1> stride_of_2;
stride_of_2[0] = 2;
Tensor<float, 1, DataLayout> result;
result = input.stride(stride_of_3).convolve(kernel, dims).stride(stride_of_2);
VERIFY_IS_EQUAL(result.dimension(0), 2);
VERIFY_IS_APPROX(result(0), (input(0)*kernel(0) + input(3)*kernel(1) +
input(6)*kernel(2)));
VERIFY_IS_APPROX(result(1), (input(6)*kernel(0) + input(9)*kernel(1) +
input(12)*kernel(2)));
}
EIGEN_DECLARE_TEST(cxx11_tensor_convolution)
{
CALL_SUBTEST(test_evals<ColMajor>());
CALL_SUBTEST(test_evals<RowMajor>());
CALL_SUBTEST(test_expr<ColMajor>());
CALL_SUBTEST(test_expr<RowMajor>());
CALL_SUBTEST(test_modes<ColMajor>());
CALL_SUBTEST(test_modes<RowMajor>());
CALL_SUBTEST(test_strides<ColMajor>());
CALL_SUBTEST(test_strides<RowMajor>());
}

View File

@@ -0,0 +1,469 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2016
// Mehdi Goli Codeplay Software Ltd.
// Ralph Potter Codeplay Software Ltd.
// Luke Iwanski Codeplay Software Ltd.
// Contact: <eigen@codeplay.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#define EIGEN_TEST_NO_LONGDOUBLE
#define EIGEN_TEST_NO_COMPLEX
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
#define EIGEN_USE_SYCL
#include <iostream>
#include <chrono>
#include <ctime>
#include "main.h"
#include <unsupported/Eigen/CXX11/Tensor>
#include <iomanip>
using Eigen::array;
using Eigen::SyclDevice;
using Eigen::Tensor;
using Eigen::TensorMap;
static const float error_threshold =1e-4f;
template <typename DataType, int DataLayout, typename IndexType>
static void test_larg_expr1D(const Eigen::SyclDevice& sycl_device)
{
IndexType indim0 =53;
IndexType indim1= 55;
IndexType indim2= 51;
IndexType outdim0=50;
IndexType outdim1=55;
IndexType outdim2=51;
Eigen::array<IndexType, 3> input_dims = {{indim0, indim1, indim2}};
Eigen::array<IndexType, 1> kernel_dims = {{4}};
Eigen::array<IndexType, 3> result_dims = {{outdim0, outdim1, outdim2}};
Tensor<DataType, 3, DataLayout, IndexType> input(input_dims);
Tensor<DataType, 1, DataLayout,IndexType> kernel(kernel_dims);
Tensor<DataType, 3, DataLayout,IndexType> result(result_dims);
Tensor<DataType, 3, DataLayout,IndexType> result_host(result_dims);
Eigen::array<IndexType, 1> dims3{{0}};
input.setRandom();
kernel.setRandom();
result.setZero();
result_host.setZero();
std::size_t input_bytes = input.size() * sizeof(DataType);
std::size_t kernel_bytes = kernel.size() * sizeof(DataType);
std::size_t result_bytes = result.size() * sizeof(DataType);
DataType * d_input = static_cast<DataType*>(sycl_device.allocate(input_bytes));
DataType * d_kernel = static_cast<DataType*>(sycl_device.allocate(kernel_bytes));
DataType * d_result = static_cast<DataType*>(sycl_device.allocate(result_bytes));
Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > gpu_input(d_input, input_dims);
Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout, IndexType> > gpu_kernel(d_kernel, kernel_dims);
Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > gpu_result(d_result, result_dims);
sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes);
sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes);
gpu_result.device(sycl_device)=gpu_input.convolve(gpu_kernel, dims3);
sycl_device.memcpyDeviceToHost(result.data(), d_result, result_bytes);
result_host=input.convolve(kernel, dims3);
for(IndexType i=0; i< outdim0; i++ ){
for(IndexType j=0; j< outdim1; j++ ){
for(IndexType k=0; k< outdim2; k++ ){
if (!(Eigen::internal::isApprox(result(i,j,k), result_host(i,j,k), error_threshold))) {
std::cout <<std::setprecision(16)<< "mismatch detected at index ( "<< i << " , " << j << ", " << k << " ) " << " \t " << result(i,j,k) << " vs "<< result_host(i,j,k) << std::endl;
assert(false);
}
}
}
}
sycl_device.deallocate(d_input);
sycl_device.deallocate(d_kernel);
sycl_device.deallocate(d_result);
}
template <typename DataType, int DataLayout, typename IndexType>
static void test_larg_expr2D(const Eigen::SyclDevice& sycl_device)
{
IndexType indim0 =53;
IndexType indim1= 55;
IndexType indim2= 51;
IndexType outdim0=50;
IndexType outdim1=51;
IndexType outdim2=51;
Eigen::array<IndexType, 3> input_dims = {{indim0, indim1, indim2}};
Eigen::array<IndexType, 2> kernel_dims = {{4,5}};
Eigen::array<IndexType, 3> result_dims = {{outdim0, outdim1, outdim2}};
Tensor<DataType, 3, DataLayout, IndexType> input(input_dims);
Tensor<DataType, 2, DataLayout,IndexType> kernel(kernel_dims);
Tensor<DataType, 3, DataLayout,IndexType> result(result_dims);
Tensor<DataType, 3, DataLayout,IndexType> result_host(result_dims);
Eigen::array<IndexType, 2> dims3{{0,1}};
input.setRandom();
kernel.setRandom();
result.setZero();
result_host.setZero();
std::size_t input_bytes = input.size() * sizeof(DataType);
std::size_t kernel_bytes = kernel.size() * sizeof(DataType);
std::size_t result_bytes = result.size() * sizeof(DataType);
DataType * d_input = static_cast<DataType*>(sycl_device.allocate(input_bytes));
DataType * d_kernel = static_cast<DataType*>(sycl_device.allocate(kernel_bytes));
DataType * d_result = static_cast<DataType*>(sycl_device.allocate(result_bytes));
Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > gpu_input(d_input, input_dims);
Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > gpu_kernel(d_kernel, kernel_dims);
Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > gpu_result(d_result, result_dims);
sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes);
sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes);
gpu_result.device(sycl_device)=gpu_input.convolve(gpu_kernel, dims3);
sycl_device.memcpyDeviceToHost(result.data(), d_result, result_bytes);
result_host=input.convolve(kernel, dims3);
for(IndexType i=0; i< outdim0; i++ ){
for(IndexType j=0; j< outdim1; j++ ){
for(IndexType k=0; k< outdim2; k++ ){
if (!(Eigen::internal::isApprox(result(i,j,k), result_host(i,j,k), error_threshold))) {
std::cout <<std::setprecision(16)<< "mismatch detected at index ( "<< i << " , " << j << ", " << k << " ) " << " \t " << result(i,j,k) << " vs "<< result_host(i,j,k) << std::endl;
assert(false);
}
}
}
}
sycl_device.deallocate(d_input);
sycl_device.deallocate(d_kernel);
sycl_device.deallocate(d_result);
}
template <typename DataType, int DataLayout, typename IndexType>
static void test_larg_expr3D(const Eigen::SyclDevice& sycl_device)
{
IndexType indim0 =53;
IndexType indim1= 55;
IndexType indim2= 51;
IndexType outdim0=50;
IndexType outdim1=51;
IndexType outdim2=49;
Eigen::array<IndexType, 3> input_dims = {{indim0, indim1, indim2}};
Eigen::array<IndexType, 3> kernel_dims = {{4,5,3}};
Eigen::array<IndexType, 3> result_dims = {{outdim0, outdim1, outdim2}};
Tensor<DataType, 3, DataLayout, IndexType> input(input_dims);
Tensor<DataType, 3, DataLayout,IndexType> kernel(kernel_dims);
Tensor<DataType, 3, DataLayout,IndexType> result(result_dims);
Tensor<DataType, 3, DataLayout,IndexType> result_host(result_dims);
Eigen::array<IndexType, 3> dims3{{0,1,2}};
input.setRandom();
kernel.setRandom();
result.setZero();
result_host.setZero();
std::size_t input_bytes = input.size() * sizeof(DataType);
std::size_t kernel_bytes = kernel.size() * sizeof(DataType);
std::size_t result_bytes = result.size() * sizeof(DataType);
DataType * d_input = static_cast<DataType*>(sycl_device.allocate(input_bytes));
DataType * d_kernel = static_cast<DataType*>(sycl_device.allocate(kernel_bytes));
DataType * d_result = static_cast<DataType*>(sycl_device.allocate(result_bytes));
Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > gpu_input(d_input, input_dims);
Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > gpu_kernel(d_kernel, kernel_dims);
Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > gpu_result(d_result, result_dims);
sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes);
sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes);
gpu_result.device(sycl_device)=gpu_input.convolve(gpu_kernel, dims3);
sycl_device.memcpyDeviceToHost(result.data(), d_result, result_bytes);
result_host=input.convolve(kernel, dims3);
for(IndexType i=0; i< outdim0; i++ ){
for(IndexType j=0; j< outdim1; j++ ){
for(IndexType k=0; k< outdim2; k++ ){
if (!(Eigen::internal::isApprox(result(i,j,k), result_host(i,j,k), error_threshold))) {
std::cout <<std::setprecision(16)<< "mismatch detected at index ( "<< i << " , " << j << ", " << k << " ) " << " \t " << result(i,j,k) << " vs "<< result_host(i,j,k) << std::endl;
assert(false);
}
}
}
}
sycl_device.deallocate(d_input);
sycl_device.deallocate(d_kernel);
sycl_device.deallocate(d_result);
}
template <typename DataType, int DataLayout, typename IndexType>
static void test_evals(const Eigen::SyclDevice& sycl_device)
{
Eigen::array<IndexType, 2> input_dims = {{3, 3}};
Eigen::array<IndexType, 1> kernel_dims = {{2}};
Eigen::array<IndexType, 2> result_dims = {{2, 3}};
Tensor<DataType, 2, DataLayout, IndexType> input(input_dims);
Tensor<DataType, 1, DataLayout,IndexType> kernel(kernel_dims);
Tensor<DataType, 2, DataLayout,IndexType> result(result_dims);
Eigen::array<IndexType, 1> dims3{{0}};
input.setRandom();
kernel.setRandom();
result.setZero();
std::size_t input_bytes = input.size() * sizeof(DataType);
std::size_t kernel_bytes = kernel.size() * sizeof(DataType);
std::size_t result_bytes = result.size() * sizeof(DataType);
DataType * d_input = static_cast<DataType*>(sycl_device.allocate(input_bytes));
DataType * d_kernel = static_cast<DataType*>(sycl_device.allocate(kernel_bytes));
DataType * d_result = static_cast<DataType*>(sycl_device.allocate(result_bytes));
Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > gpu_input(d_input, input_dims);
Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout, IndexType> > gpu_kernel(d_kernel, kernel_dims);
Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > gpu_result(d_result, result_dims);
sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes);
sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes);
gpu_result.device(sycl_device)=gpu_input.convolve(gpu_kernel, dims3);
sycl_device.memcpyDeviceToHost(result.data(), d_result, result_bytes);
VERIFY_IS_APPROX(result(0,0), input(0,0)*kernel(0) + input(1,0)*kernel(1)); // index 0
VERIFY_IS_APPROX(result(0,1), input(0,1)*kernel(0) + input(1,1)*kernel(1)); // index 2
VERIFY_IS_APPROX(result(0,2), input(0,2)*kernel(0) + input(1,2)*kernel(1)); // index 4
VERIFY_IS_APPROX(result(1,0), input(1,0)*kernel(0) + input(2,0)*kernel(1)); // index 1
VERIFY_IS_APPROX(result(1,1), input(1,1)*kernel(0) + input(2,1)*kernel(1)); // index 3
VERIFY_IS_APPROX(result(1,2), input(1,2)*kernel(0) + input(2,2)*kernel(1)); // index 5
sycl_device.deallocate(d_input);
sycl_device.deallocate(d_kernel);
sycl_device.deallocate(d_result);
}
template <typename DataType, int DataLayout, typename IndexType>
static void test_expr(const Eigen::SyclDevice& sycl_device)
{
Eigen::array<IndexType, 2> input_dims = {{3, 3}};
Eigen::array<IndexType, 2> kernel_dims = {{2, 2}};
Eigen::array<IndexType, 2> result_dims = {{2, 2}};
Tensor<DataType, 2, DataLayout, IndexType> input(input_dims);
Tensor<DataType, 2, DataLayout, IndexType> kernel(kernel_dims);
Tensor<DataType, 2, DataLayout, IndexType> result(result_dims);
input.setRandom();
kernel.setRandom();
Eigen::array<IndexType, 2> dims;
dims[0] = 0;
dims[1] = 1;
std::size_t input_bytes = input.size() * sizeof(DataType);
std::size_t kernel_bytes = kernel.size() * sizeof(DataType);
std::size_t result_bytes = result.size() * sizeof(DataType);
DataType * d_input = static_cast<DataType*>(sycl_device.allocate(input_bytes));
DataType * d_kernel = static_cast<DataType*>(sycl_device.allocate(kernel_bytes));
DataType * d_result = static_cast<DataType*>(sycl_device.allocate(result_bytes));
Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout,IndexType> > gpu_input(d_input, input_dims);
Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout,IndexType> > gpu_kernel(d_kernel, kernel_dims);
Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout,IndexType> > gpu_result(d_result, result_dims);
sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes);
sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes);
gpu_result.device(sycl_device)=gpu_input.convolve(gpu_kernel, dims);
sycl_device.memcpyDeviceToHost(result.data(), d_result, result_bytes);
VERIFY_IS_APPROX(result(0,0), input(0,0)*kernel(0,0) + input(0,1)*kernel(0,1) +
input(1,0)*kernel(1,0) + input(1,1)*kernel(1,1));
VERIFY_IS_APPROX(result(0,1), input(0,1)*kernel(0,0) + input(0,2)*kernel(0,1) +
input(1,1)*kernel(1,0) + input(1,2)*kernel(1,1));
VERIFY_IS_APPROX(result(1,0), input(1,0)*kernel(0,0) + input(1,1)*kernel(0,1) +
input(2,0)*kernel(1,0) + input(2,1)*kernel(1,1));
VERIFY_IS_APPROX(result(1,1), input(1,1)*kernel(0,0) + input(1,2)*kernel(0,1) +
input(2,1)*kernel(1,0) + input(2,2)*kernel(1,1));
sycl_device.deallocate(d_input);
sycl_device.deallocate(d_kernel);
sycl_device.deallocate(d_result);
}
template <typename DataType, int DataLayout, typename IndexType>
static void test_modes(const Eigen::SyclDevice& sycl_device){
Eigen::array<IndexType, 1> input_dims = {{3}};
Eigen::array<IndexType, 1> kernel_dims = {{3}};
Tensor<DataType, 1, DataLayout, IndexType> input(input_dims);
Tensor<DataType, 1, DataLayout, IndexType> kernel(kernel_dims);
input.setRandom();
kernel.setRandom();
Eigen::array<IndexType, 1> dims;
dims[0] = 0;
input(0) = 1.0f;
input(1) = 2.0f;
input(2) = 3.0f;
kernel(0) = 0.5f;
kernel(1) = 1.0f;
kernel(2) = 0.0f;
Eigen::array<std::pair<IndexType, IndexType>, 1> padding;
// Emulate VALID mode (as defined in
// http://docs.scipy.org/doc/numpy/reference/generated/numpy.convolve.html).
padding[0] = std::make_pair(0, 0);
Tensor<DataType, 1, DataLayout, IndexType> valid(1);
std::size_t input_bytes = input.size() * sizeof(DataType);
std::size_t kernel_bytes = kernel.size() * sizeof(DataType);
std::size_t valid_bytes = valid.size() * sizeof(DataType);
DataType * d_input = static_cast<DataType*>(sycl_device.allocate(input_bytes));
DataType * d_kernel = static_cast<DataType*>(sycl_device.allocate(kernel_bytes));
DataType * d_valid = static_cast<DataType*>(sycl_device.allocate(valid_bytes));
Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_input(d_input, input_dims);
Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_kernel(d_kernel, kernel_dims);
Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_valid(d_valid, valid.dimensions());
sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes);
sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes);
gpu_valid.device(sycl_device)=gpu_input.pad(padding).convolve(gpu_kernel, dims);
sycl_device.memcpyDeviceToHost(valid.data(), d_valid, valid_bytes);
VERIFY_IS_EQUAL(valid.dimension(0), 1);
VERIFY_IS_APPROX(valid(0), 2.5f);
// Emulate SAME mode (as defined in
// http://docs.scipy.org/doc/numpy/reference/generated/numpy.convolve.html).
padding[0] = std::make_pair(1, 1);
Tensor<DataType, 1, DataLayout, IndexType> same(3);
std::size_t same_bytes = same.size() * sizeof(DataType);
DataType * d_same = static_cast<DataType*>(sycl_device.allocate(same_bytes));
Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_same(d_same, same.dimensions());
gpu_same.device(sycl_device)=gpu_input.pad(padding).convolve(gpu_kernel, dims);
sycl_device.memcpyDeviceToHost(same.data(), d_same, same_bytes);
VERIFY_IS_EQUAL(same.dimension(0), 3);
VERIFY_IS_APPROX(same(0), 1.0f);
VERIFY_IS_APPROX(same(1), 2.5f);
VERIFY_IS_APPROX(same(2), 4.0f);
// Emulate FULL mode (as defined in
// http://docs.scipy.org/doc/numpy/reference/generated/numpy.convolve.html).
padding[0] = std::make_pair(2, 2);
Tensor<DataType, 1, DataLayout, IndexType> full(5);
std::size_t full_bytes = full.size() * sizeof(DataType);
DataType * d_full = static_cast<DataType*>(sycl_device.allocate(full_bytes));
Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_full(d_full, full.dimensions());
gpu_full.device(sycl_device)=gpu_input.pad(padding).convolve(gpu_kernel, dims);
sycl_device.memcpyDeviceToHost(full.data(), d_full, full_bytes);
VERIFY_IS_EQUAL(full.dimension(0), 5);
VERIFY_IS_APPROX(full(0), 0.0f);
VERIFY_IS_APPROX(full(1), 1.0f);
VERIFY_IS_APPROX(full(2), 2.5f);
VERIFY_IS_APPROX(full(3), 4.0f);
VERIFY_IS_APPROX(full(4), 1.5f);
sycl_device.deallocate(d_input);
sycl_device.deallocate(d_kernel);
sycl_device.deallocate(d_valid);
sycl_device.deallocate(d_same);
sycl_device.deallocate(d_full);
}
template <typename DataType, int DataLayout, typename IndexType>
static void test_strides(const Eigen::SyclDevice& sycl_device){
Eigen::array<IndexType, 1> input_dims = {{13}};
Eigen::array<IndexType, 1> kernel_dims = {{3}};
Tensor<DataType, 1, DataLayout, IndexType> input(input_dims);
Tensor<DataType, 1, DataLayout, IndexType> kernel(kernel_dims);
Tensor<DataType, 1, DataLayout, IndexType> result(2);
input.setRandom();
kernel.setRandom();
Eigen::array<IndexType, 1> dims;
dims[0] = 0;
Eigen::array<IndexType, 1> stride_of_3;
stride_of_3[0] = 3;
Eigen::array<IndexType, 1> stride_of_2;
stride_of_2[0] = 2;
std::size_t input_bytes = input.size() * sizeof(DataType);
std::size_t kernel_bytes = kernel.size() * sizeof(DataType);
std::size_t result_bytes = result.size() * sizeof(DataType);
DataType * d_input = static_cast<DataType*>(sycl_device.allocate(input_bytes));
DataType * d_kernel = static_cast<DataType*>(sycl_device.allocate(kernel_bytes));
DataType * d_result = static_cast<DataType*>(sycl_device.allocate(result_bytes));
Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_input(d_input, input_dims);
Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_kernel(d_kernel, kernel_dims);
Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_result(d_result, result.dimensions());
sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes);
sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes);
gpu_result.device(sycl_device)=gpu_input.stride(stride_of_3).convolve(gpu_kernel, dims).stride(stride_of_2);
sycl_device.memcpyDeviceToHost(result.data(), d_result, result_bytes);
VERIFY_IS_EQUAL(result.dimension(0), 2);
VERIFY_IS_APPROX(result(0), (input(0)*kernel(0) + input(3)*kernel(1) +
input(6)*kernel(2)));
VERIFY_IS_APPROX(result(1), (input(6)*kernel(0) + input(9)*kernel(1) +
input(12)*kernel(2)));
}
template <typename Dev_selector> void tensorConvolutionPerDevice(Dev_selector& s){
QueueInterface queueInterface(s);
auto sycl_device=Eigen::SyclDevice(&queueInterface);
test_larg_expr1D<float, RowMajor, int64_t>(sycl_device);
test_larg_expr1D<float, ColMajor, int64_t>(sycl_device);
test_larg_expr2D<float, RowMajor, int64_t>(sycl_device);
test_larg_expr2D<float, ColMajor, int64_t>(sycl_device);
test_larg_expr3D<float, RowMajor, int64_t>(sycl_device);
test_larg_expr3D<float, ColMajor, int64_t>(sycl_device);
test_evals<float, ColMajor, int64_t>(sycl_device);
test_evals<float, RowMajor, int64_t>(sycl_device);
test_expr<float, ColMajor, int64_t>(sycl_device);
test_expr<float, RowMajor, int64_t>(sycl_device);
test_modes<float, ColMajor, int64_t>(sycl_device);
test_modes<float, RowMajor, int64_t>(sycl_device);
test_strides<float, ColMajor, int64_t>(sycl_device);
test_strides<float, RowMajor, int64_t>(sycl_device);
}
EIGEN_DECLARE_TEST(cxx11_tensor_convolution_sycl) {
for (const auto& device :Eigen::get_sycl_supported_devices()) {
CALL_SUBTEST(tensorConvolutionPerDevice(device));
}
}

View File

@@ -0,0 +1,100 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
#include <limits>
#include <map>
#include <Eigen/Dense>
#include <Eigen/CXX11/Tensor>
using Eigen::Tensor;
template <int DataLayout>
static void test_map_as_index()
{
#ifdef EIGEN_HAS_SFINAE
Tensor<float, 4, DataLayout> tensor(2, 3, 5, 7);
tensor.setRandom();
using NormalIndex = DSizes<ptrdiff_t, 4>;
using CustomIndex = std::map<ptrdiff_t, ptrdiff_t>;
CustomIndex coeffC;
coeffC[0] = 1;
coeffC[1] = 2;
coeffC[2] = 4;
coeffC[3] = 1;
NormalIndex coeff(1,2,4,1);
VERIFY_IS_EQUAL(tensor.coeff(coeffC), tensor.coeff(coeff));
VERIFY_IS_EQUAL(tensor.coeffRef(coeffC), tensor.coeffRef(coeff));
#endif
}
template <int DataLayout>
static void test_matrix_as_index()
{
#ifdef EIGEN_HAS_SFINAE
Tensor<float, 4, DataLayout> tensor(2, 3, 5, 7);
tensor.setRandom();
using NormalIndex = DSizes<ptrdiff_t, 4>;
using CustomIndex = Matrix<unsigned int, 4, 1>;
CustomIndex coeffC(1,2,4,1);
NormalIndex coeff(1,2,4,1);
VERIFY_IS_EQUAL(tensor.coeff(coeffC), tensor.coeff(coeff));
VERIFY_IS_EQUAL(tensor.coeffRef(coeffC), tensor.coeffRef(coeff));
#endif
}
template <int DataLayout>
static void test_varlist_as_index()
{
#ifdef EIGEN_HAS_SFINAE
Tensor<float, 4, DataLayout> tensor(2, 3, 5, 7);
tensor.setRandom();
DSizes<ptrdiff_t, 4> coeff(1,2,4,1);
VERIFY_IS_EQUAL(tensor.coeff({1,2,4,1}), tensor.coeff(coeff));
VERIFY_IS_EQUAL(tensor.coeffRef({1,2,4,1}), tensor.coeffRef(coeff));
#endif
}
template <int DataLayout>
static void test_sizes_as_index()
{
#ifdef EIGEN_HAS_SFINAE
Tensor<float, 4, DataLayout> tensor(2, 3, 5, 7);
tensor.setRandom();
DSizes<ptrdiff_t, 4> coeff(1,2,4,1);
Sizes<1,2,4,1> coeffC;
VERIFY_IS_EQUAL(tensor.coeff(coeffC), tensor.coeff(coeff));
VERIFY_IS_EQUAL(tensor.coeffRef(coeffC), tensor.coeffRef(coeff));
#endif
}
EIGEN_DECLARE_TEST(cxx11_tensor_custom_index) {
test_map_as_index<ColMajor>();
test_map_as_index<RowMajor>();
test_matrix_as_index<ColMajor>();
test_matrix_as_index<RowMajor>();
test_varlist_as_index<ColMajor>();
test_varlist_as_index<RowMajor>();
test_sizes_as_index<ColMajor>();
test_sizes_as_index<RowMajor>();
}

View File

@@ -0,0 +1,111 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
#include <Eigen/CXX11/Tensor>
using Eigen::Tensor;
struct InsertZeros {
DSizes<DenseIndex, 2> dimensions(const Tensor<float, 2>& input) const {
DSizes<DenseIndex, 2> result;
result[0] = input.dimension(0) * 2;
result[1] = input.dimension(1) * 2;
return result;
}
template <typename Output, typename Device>
void eval(const Tensor<float, 2>& input, Output& output, const Device& device) const
{
array<DenseIndex, 2> strides;
strides[0] = 2;
strides[1] = 2;
output.stride(strides).device(device) = input;
Eigen::DSizes<DenseIndex, 2> offsets(1,1);
Eigen::DSizes<DenseIndex, 2> extents(output.dimension(0)-1, output.dimension(1)-1);
output.slice(offsets, extents).stride(strides).device(device) = input.constant(0.0f);
}
};
static void test_custom_unary_op()
{
Tensor<float, 2> tensor(3,5);
tensor.setRandom();
Tensor<float, 2> result = tensor.customOp(InsertZeros());
VERIFY_IS_EQUAL(result.dimension(0), 6);
VERIFY_IS_EQUAL(result.dimension(1), 10);
for (int i = 0; i < 6; i+=2) {
for (int j = 0; j < 10; j+=2) {
VERIFY_IS_EQUAL(result(i, j), tensor(i/2, j/2));
}
}
for (int i = 1; i < 6; i+=2) {
for (int j = 1; j < 10; j+=2) {
VERIFY_IS_EQUAL(result(i, j), 0);
}
}
}
struct BatchMatMul {
DSizes<DenseIndex, 3> dimensions(const Tensor<float, 3>& input1, const Tensor<float, 3>& input2) const {
DSizes<DenseIndex, 3> result;
result[0] = input1.dimension(0);
result[1] = input2.dimension(1);
result[2] = input2.dimension(2);
return result;
}
template <typename Output, typename Device>
void eval(const Tensor<float, 3>& input1, const Tensor<float, 3>& input2,
Output& output, const Device& device) const
{
typedef Tensor<float, 3>::DimensionPair DimPair;
array<DimPair, 1> dims;
dims[0] = DimPair(1, 0);
for (int i = 0; i < output.dimension(2); ++i) {
output.template chip<2>(i).device(device) = input1.chip<2>(i).contract(input2.chip<2>(i), dims);
}
}
};
static void test_custom_binary_op()
{
Tensor<float, 3> tensor1(2,3,5);
tensor1.setRandom();
Tensor<float, 3> tensor2(3,7,5);
tensor2.setRandom();
Tensor<float, 3> result = tensor1.customOp(tensor2, BatchMatMul());
for (int i = 0; i < 5; ++i) {
typedef Tensor<float, 3>::DimensionPair DimPair;
array<DimPair, 1> dims;
dims[0] = DimPair(1, 0);
Tensor<float, 2> reference = tensor1.chip<2>(i).contract(tensor2.chip<2>(i), dims);
TensorRef<Tensor<float, 2> > val = result.chip<2>(i);
for (int j = 0; j < 2; ++j) {
for (int k = 0; k < 7; ++k) {
VERIFY_IS_APPROX(val(j, k), reference(j, k));
}
}
}
}
EIGEN_DECLARE_TEST(cxx11_tensor_custom_op)
{
CALL_SUBTEST(test_custom_unary_op());
CALL_SUBTEST(test_custom_binary_op());
}

View File

@@ -0,0 +1,170 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2016
// Mehdi Goli Codeplay Software Ltd.
// Ralph Potter Codeplay Software Ltd.
// Luke Iwanski Codeplay Software Ltd.
// Contact: <eigen@codeplay.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#define EIGEN_TEST_NO_LONGDOUBLE
#define EIGEN_TEST_NO_COMPLEX
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
#define EIGEN_USE_SYCL
#include "main.h"
#include <unsupported/Eigen/CXX11/Tensor>
using Eigen::Tensor;
template<typename TensorType>
struct InsertZeros {
DSizes<DenseIndex, 2> dimensions(const TensorType& input) const {
DSizes<DenseIndex, 2> result;
result[0] = input.dimension(0) * 2;
result[1] = input.dimension(1) * 2;
return result;
}
template <typename Output, typename Device>
void eval(const TensorType& input, Output& output, const Device& device) const
{
array<DenseIndex, 2> strides;
strides[0] = 2;
strides[1] = 2;
output.stride(strides).device(device) = input;
Eigen::DSizes<DenseIndex, 2> offsets(1,1);
Eigen::DSizes<DenseIndex, 2> extents(output.dimension(0)-1, output.dimension(1)-1);
output.slice(offsets, extents).stride(strides).device(device) = input.constant(0.0f);
}
};
template<typename DataType, int DataLayout, typename IndexType>
static void test_custom_unary_op_sycl(const Eigen::SyclDevice &sycl_device)
{
IndexType sizeDim1 = 3;
IndexType sizeDim2 = 5;
Eigen::array<IndexType, 2> tensorRange = {{sizeDim1, sizeDim2}};
Eigen::array<IndexType, 2> tensorResultRange = {{6, 10}};
Eigen::Tensor<DataType, 2, DataLayout, IndexType> in1(tensorRange);
Eigen::Tensor<DataType, 2, DataLayout, IndexType> out(tensorResultRange);
DataType * gpu_in1_data = static_cast<DataType*>(sycl_device.allocate(in1.dimensions().TotalSize()*sizeof(DataType)));
DataType * gpu_out_data = static_cast<DataType*>(sycl_device.allocate(out.dimensions().TotalSize()*sizeof(DataType)));
typedef Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > TensorType;
TensorType gpu_in1(gpu_in1_data, tensorRange);
TensorType gpu_out(gpu_out_data, tensorResultRange);
in1.setRandom();
sycl_device.memcpyHostToDevice(gpu_in1_data, in1.data(),(in1.dimensions().TotalSize())*sizeof(DataType));
gpu_out.device(sycl_device) = gpu_in1.customOp(InsertZeros<TensorType>());
sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(DataType));
VERIFY_IS_EQUAL(out.dimension(0), 6);
VERIFY_IS_EQUAL(out.dimension(1), 10);
for (int i = 0; i < 6; i+=2) {
for (int j = 0; j < 10; j+=2) {
VERIFY_IS_EQUAL(out(i, j), in1(i/2, j/2));
}
}
for (int i = 1; i < 6; i+=2) {
for (int j = 1; j < 10; j+=2) {
VERIFY_IS_EQUAL(out(i, j), 0);
}
}
sycl_device.deallocate(gpu_in1_data);
sycl_device.deallocate(gpu_out_data);
}
template<typename TensorType>
struct BatchMatMul {
DSizes<DenseIndex, 3> dimensions(const TensorType& input1, const TensorType& input2) const {
DSizes<DenseIndex, 3> result;
result[0] = input1.dimension(0);
result[1] = input2.dimension(1);
result[2] = input2.dimension(2);
return result;
}
template <typename Output, typename Device>
void eval(const TensorType& input1, const TensorType& input2,
Output& output, const Device& device) const
{
typedef typename TensorType::DimensionPair DimPair;
array<DimPair, 1> dims;
dims[0] = DimPair(1, 0);
for (int64_t i = 0; i < output.dimension(2); ++i) {
output.template chip<2>(i).device(device) = input1.template chip<2>(i).contract(input2.template chip<2>(i), dims);
}
}
};
template<typename DataType, int DataLayout, typename IndexType>
static void test_custom_binary_op_sycl(const Eigen::SyclDevice &sycl_device)
{
Eigen::array<IndexType, 3> tensorRange1 = {{2, 3, 5}};
Eigen::array<IndexType, 3> tensorRange2 = {{3,7,5}};
Eigen::array<IndexType, 3> tensorResultRange = {{2, 7, 5}};
Eigen::Tensor<DataType, 3, DataLayout, IndexType> in1(tensorRange1);
Eigen::Tensor<DataType, 3, DataLayout, IndexType> in2(tensorRange2);
Eigen::Tensor<DataType, 3, DataLayout, IndexType> out(tensorResultRange);
DataType * gpu_in1_data = static_cast<DataType*>(sycl_device.allocate(in1.dimensions().TotalSize()*sizeof(DataType)));
DataType * gpu_in2_data = static_cast<DataType*>(sycl_device.allocate(in2.dimensions().TotalSize()*sizeof(DataType)));
DataType * gpu_out_data = static_cast<DataType*>(sycl_device.allocate(out.dimensions().TotalSize()*sizeof(DataType)));
typedef Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > TensorType;
TensorType gpu_in1(gpu_in1_data, tensorRange1);
TensorType gpu_in2(gpu_in2_data, tensorRange2);
TensorType gpu_out(gpu_out_data, tensorResultRange);
in1.setRandom();
in2.setRandom();
sycl_device.memcpyHostToDevice(gpu_in1_data, in1.data(),(in1.dimensions().TotalSize())*sizeof(DataType));
sycl_device.memcpyHostToDevice(gpu_in2_data, in2.data(),(in2.dimensions().TotalSize())*sizeof(DataType));
gpu_out.device(sycl_device) = gpu_in1.customOp(gpu_in2, BatchMatMul<TensorType>());
sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(DataType));
for (IndexType i = 0; i < 5; ++i) {
typedef typename Eigen::Tensor<DataType, 3, DataLayout, IndexType>::DimensionPair DimPair;
array<DimPair, 1> dims;
dims[0] = DimPair(1, 0);
Eigen::Tensor<DataType, 2, DataLayout, IndexType> reference = in1.template chip<2>(i).contract(in2.template chip<2>(i), dims);
TensorRef<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > val = out.template chip<2>(i);
for (IndexType j = 0; j < 2; ++j) {
for (IndexType k = 0; k < 7; ++k) {
VERIFY_IS_APPROX(val(j, k), reference(j, k));
}
}
}
sycl_device.deallocate(gpu_in1_data);
sycl_device.deallocate(gpu_in2_data);
sycl_device.deallocate(gpu_out_data);
}
template <typename DataType, typename Dev_selector> void custom_op_perDevice(Dev_selector s){
QueueInterface queueInterface(s);
auto sycl_device = Eigen::SyclDevice(&queueInterface);
test_custom_unary_op_sycl<DataType, RowMajor, int64_t>(sycl_device);
test_custom_unary_op_sycl<DataType, ColMajor, int64_t>(sycl_device);
test_custom_binary_op_sycl<DataType, ColMajor, int64_t>(sycl_device);
test_custom_binary_op_sycl<DataType, RowMajor, int64_t>(sycl_device);
}
EIGEN_DECLARE_TEST(cxx11_tensor_custom_op_sycl) {
for (const auto& device :Eigen::get_sycl_supported_devices()) {
CALL_SUBTEST(custom_op_perDevice<float>(device));
}
}

View File

@@ -0,0 +1,396 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#define EIGEN_TEST_NO_LONGDOUBLE
#define EIGEN_TEST_NO_COMPLEX
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
#define EIGEN_USE_GPU
#include "main.h"
#include <unsupported/Eigen/CXX11/Tensor>
#include <unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h>
using Eigen::Tensor;
using Eigen::RowMajor;
// Context for evaluation on cpu
struct CPUContext {
CPUContext(const Eigen::Tensor<float, 3>& in1, Eigen::Tensor<float, 3>& in2, Eigen::Tensor<float, 3>& out) : in1_(in1), in2_(in2), out_(out), kernel_1d_(2), kernel_2d_(2,2), kernel_3d_(2,2,2) {
kernel_1d_(0) = 3.14f;
kernel_1d_(1) = 2.7f;
kernel_2d_(0,0) = 3.14f;
kernel_2d_(1,0) = 2.7f;
kernel_2d_(0,1) = 0.2f;
kernel_2d_(1,1) = 7.0f;
kernel_3d_(0,0,0) = 3.14f;
kernel_3d_(0,1,0) = 2.7f;
kernel_3d_(0,0,1) = 0.2f;
kernel_3d_(0,1,1) = 7.0f;
kernel_3d_(1,0,0) = -1.0f;
kernel_3d_(1,1,0) = -0.3f;
kernel_3d_(1,0,1) = -0.7f;
kernel_3d_(1,1,1) = -0.5f;
}
const Eigen::DefaultDevice& device() const { return cpu_device_; }
const Eigen::Tensor<float, 3>& in1() const { return in1_; }
const Eigen::Tensor<float, 3>& in2() const { return in2_; }
Eigen::Tensor<float, 3>& out() { return out_; }
const Eigen::Tensor<float, 1>& kernel1d() const { return kernel_1d_; }
const Eigen::Tensor<float, 2>& kernel2d() const { return kernel_2d_; }
const Eigen::Tensor<float, 3>& kernel3d() const { return kernel_3d_; }
private:
const Eigen::Tensor<float, 3>& in1_;
const Eigen::Tensor<float, 3>& in2_;
Eigen::Tensor<float, 3>& out_;
Eigen::Tensor<float, 1> kernel_1d_;
Eigen::Tensor<float, 2> kernel_2d_;
Eigen::Tensor<float, 3> kernel_3d_;
Eigen::DefaultDevice cpu_device_;
};
// Context for evaluation on GPU
struct GPUContext {
GPUContext(const Eigen::TensorMap<Eigen::Tensor<float, 3> >& in1, Eigen::TensorMap<Eigen::Tensor<float, 3> >& in2, Eigen::TensorMap<Eigen::Tensor<float, 3> >& out) : in1_(in1), in2_(in2), out_(out), gpu_device_(&stream_) {
assert(gpuMalloc((void**)(&kernel_1d_), 2*sizeof(float)) == gpuSuccess);
float kernel_1d_val[] = {3.14f, 2.7f};
assert(gpuMemcpy(kernel_1d_, kernel_1d_val, 2*sizeof(float), gpuMemcpyHostToDevice) == gpuSuccess);
assert(gpuMalloc((void**)(&kernel_2d_), 4*sizeof(float)) == gpuSuccess);
float kernel_2d_val[] = {3.14f, 2.7f, 0.2f, 7.0f};
assert(gpuMemcpy(kernel_2d_, kernel_2d_val, 4*sizeof(float), gpuMemcpyHostToDevice) == gpuSuccess);
assert(gpuMalloc((void**)(&kernel_3d_), 8*sizeof(float)) == gpuSuccess);
float kernel_3d_val[] = {3.14f, -1.0f, 2.7f, -0.3f, 0.2f, -0.7f, 7.0f, -0.5f};
assert(gpuMemcpy(kernel_3d_, kernel_3d_val, 8*sizeof(float), gpuMemcpyHostToDevice) == gpuSuccess);
}
~GPUContext() {
assert(gpuFree(kernel_1d_) == gpuSuccess);
assert(gpuFree(kernel_2d_) == gpuSuccess);
assert(gpuFree(kernel_3d_) == gpuSuccess);
}
const Eigen::GpuDevice& device() const { return gpu_device_; }
const Eigen::TensorMap<Eigen::Tensor<float, 3> >& in1() const { return in1_; }
const Eigen::TensorMap<Eigen::Tensor<float, 3> >& in2() const { return in2_; }
Eigen::TensorMap<Eigen::Tensor<float, 3> >& out() { return out_; }
Eigen::TensorMap<Eigen::Tensor<float, 1> > kernel1d() const { return Eigen::TensorMap<Eigen::Tensor<float, 1> >(kernel_1d_, 2); }
Eigen::TensorMap<Eigen::Tensor<float, 2> > kernel2d() const { return Eigen::TensorMap<Eigen::Tensor<float, 2> >(kernel_2d_, 2, 2); }
Eigen::TensorMap<Eigen::Tensor<float, 3> > kernel3d() const { return Eigen::TensorMap<Eigen::Tensor<float, 3> >(kernel_3d_, 2, 2, 2); }
private:
const Eigen::TensorMap<Eigen::Tensor<float, 3> >& in1_;
const Eigen::TensorMap<Eigen::Tensor<float, 3> >& in2_;
Eigen::TensorMap<Eigen::Tensor<float, 3> >& out_;
float* kernel_1d_;
float* kernel_2d_;
float* kernel_3d_;
Eigen::GpuStreamDevice stream_;
Eigen::GpuDevice gpu_device_;
};
// The actual expression to evaluate
template <typename Context>
void test_contextual_eval(Context* context)
{
context->out().device(context->device()) = context->in1() + context->in2() * 3.14f + context->in1().constant(2.718f);
}
template <typename Context>
void test_forced_contextual_eval(Context* context)
{
context->out().device(context->device()) = (context->in1() + context->in2()).eval() * 3.14f + context->in1().constant(2.718f);
}
template <typename Context>
void test_compound_assignment(Context* context)
{
context->out().device(context->device()) = context->in1().constant(2.718f);
context->out().device(context->device()) += context->in1() + context->in2() * 3.14f;
}
template <typename Context>
void test_contraction(Context* context)
{
Eigen::array<std::pair<int, int>, 2> dims;
dims[0] = std::make_pair(1, 1);
dims[1] = std::make_pair(2, 2);
Eigen::array<int, 2> shape(40, 50*70);
Eigen::DSizes<int, 2> indices(0,0);
Eigen::DSizes<int, 2> sizes(40,40);
context->out().reshape(shape).slice(indices, sizes).device(context->device()) = context->in1().contract(context->in2(), dims);
}
template <typename Context>
void test_1d_convolution(Context* context)
{
Eigen::DSizes<int, 3> indices(0,0,0);
Eigen::DSizes<int, 3> sizes(40,49,70);
Eigen::array<int, 1> dims(1);
context->out().slice(indices, sizes).device(context->device()) = context->in1().convolve(context->kernel1d(), dims);
}
template <typename Context>
void test_2d_convolution(Context* context)
{
Eigen::DSizes<int, 3> indices(0,0,0);
Eigen::DSizes<int, 3> sizes(40,49,69);
Eigen::array<int, 2> dims(1,2);
context->out().slice(indices, sizes).device(context->device()) = context->in1().convolve(context->kernel2d(), dims);
}
template <typename Context>
void test_3d_convolution(Context* context)
{
Eigen::DSizes<int, 3> indices(0,0,0);
Eigen::DSizes<int, 3> sizes(39,49,69);
Eigen::array<int, 3> dims(0,1,2);
context->out().slice(indices, sizes).device(context->device()) = context->in1().convolve(context->kernel3d(), dims);
}
void test_cpu() {
Eigen::Tensor<float, 3> in1(40,50,70);
Eigen::Tensor<float, 3> in2(40,50,70);
Eigen::Tensor<float, 3> out(40,50,70);
in1 = in1.random() + in1.constant(10.0f);
in2 = in2.random() + in2.constant(10.0f);
CPUContext context(in1, in2, out);
test_contextual_eval(&context);
for (int i = 0; i < 40; ++i) {
for (int j = 0; j < 50; ++j) {
for (int k = 0; k < 70; ++k) {
VERIFY_IS_APPROX(out(i,j,k), in1(i,j,k) + in2(i,j,k) * 3.14f + 2.718f);
}
}
}
test_forced_contextual_eval(&context);
for (int i = 0; i < 40; ++i) {
for (int j = 0; j < 50; ++j) {
for (int k = 0; k < 70; ++k) {
VERIFY_IS_APPROX(out(i,j,k), (in1(i,j,k) + in2(i,j,k)) * 3.14f + 2.718f);
}
}
}
test_compound_assignment(&context);
for (int i = 0; i < 40; ++i) {
for (int j = 0; j < 50; ++j) {
for (int k = 0; k < 70; ++k) {
VERIFY_IS_APPROX(out(i,j,k), in1(i,j,k) + in2(i,j,k) * 3.14f + 2.718f);
}
}
}
test_contraction(&context);
for (int i = 0; i < 40; ++i) {
for (int j = 0; j < 40; ++j) {
const float result = out(i,j,0);
float expected = 0;
for (int k = 0; k < 50; ++k) {
for (int l = 0; l < 70; ++l) {
expected += in1(i, k, l) * in2(j, k, l);
}
}
VERIFY_IS_APPROX(expected, result);
}
}
test_1d_convolution(&context);
for (int i = 0; i < 40; ++i) {
for (int j = 0; j < 49; ++j) {
for (int k = 0; k < 70; ++k) {
VERIFY_IS_APPROX(out(i,j,k), (in1(i,j,k) * 3.14f + in1(i,j+1,k) * 2.7f));
}
}
}
test_2d_convolution(&context);
for (int i = 0; i < 40; ++i) {
for (int j = 0; j < 49; ++j) {
for (int k = 0; k < 69; ++k) {
const float result = out(i,j,k);
const float expected = (in1(i,j,k) * 3.14f + in1(i,j+1,k) * 2.7f) +
(in1(i,j,k+1) * 0.2f + in1(i,j+1,k+1) * 7.0f);
if (fabs(expected) < 1e-4f && fabs(result) < 1e-4f) {
continue;
}
VERIFY_IS_APPROX(expected, result);
}
}
}
test_3d_convolution(&context);
for (int i = 0; i < 39; ++i) {
for (int j = 0; j < 49; ++j) {
for (int k = 0; k < 69; ++k) {
const float result = out(i,j,k);
const float expected = (in1(i,j,k) * 3.14f + in1(i,j+1,k) * 2.7f +
in1(i,j,k+1) * 0.2f + in1(i,j+1,k+1) * 7.0f) +
(in1(i+1,j,k) * -1.0f + in1(i+1,j+1,k) * -0.3f +
in1(i+1,j,k+1) * -0.7f + in1(i+1,j+1,k+1) * -0.5f);
if (fabs(expected) < 1e-4f && fabs(result) < 1e-4f) {
continue;
}
VERIFY_IS_APPROX(expected, result);
}
}
}
}
void test_gpu() {
Eigen::Tensor<float, 3> in1(40,50,70);
Eigen::Tensor<float, 3> in2(40,50,70);
Eigen::Tensor<float, 3> out(40,50,70);
in1 = in1.random() + in1.constant(10.0f);
in2 = in2.random() + in2.constant(10.0f);
std::size_t in1_bytes = in1.size() * sizeof(float);
std::size_t in2_bytes = in2.size() * sizeof(float);
std::size_t out_bytes = out.size() * sizeof(float);
float* d_in1;
float* d_in2;
float* d_out;
gpuMalloc((void**)(&d_in1), in1_bytes);
gpuMalloc((void**)(&d_in2), in2_bytes);
gpuMalloc((void**)(&d_out), out_bytes);
gpuMemcpy(d_in1, in1.data(), in1_bytes, gpuMemcpyHostToDevice);
gpuMemcpy(d_in2, in2.data(), in2_bytes, gpuMemcpyHostToDevice);
Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_in1(d_in1, 40,50,70);
Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_in2(d_in2, 40,50,70);
Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_out(d_out, 40,50,70);
GPUContext context(gpu_in1, gpu_in2, gpu_out);
test_contextual_eval(&context);
assert(gpuMemcpy(out.data(), d_out, out_bytes, gpuMemcpyDeviceToHost) == gpuSuccess);
for (int i = 0; i < 40; ++i) {
for (int j = 0; j < 50; ++j) {
for (int k = 0; k < 70; ++k) {
VERIFY_IS_APPROX(out(i,j,k), in1(i,j,k) + in2(i,j,k) * 3.14f + 2.718f);
}
}
}
test_forced_contextual_eval(&context);
assert(gpuMemcpy(out.data(), d_out, out_bytes, gpuMemcpyDeviceToHost) == gpuSuccess);
for (int i = 0; i < 40; ++i) {
for (int j = 0; j < 50; ++j) {
for (int k = 0; k < 70; ++k) {
VERIFY_IS_APPROX(out(i,j,k), (in1(i,j,k) + in2(i,j,k)) * 3.14f + 2.718f);
}
}
}
test_compound_assignment(&context);
assert(gpuMemcpy(out.data(), d_out, out_bytes, gpuMemcpyDeviceToHost) == gpuSuccess);
for (int i = 0; i < 40; ++i) {
for (int j = 0; j < 50; ++j) {
for (int k = 0; k < 70; ++k) {
VERIFY_IS_APPROX(out(i,j,k), in1(i,j,k) + in2(i,j,k) * 3.14f + 2.718f);
}
}
}
test_contraction(&context);
assert(gpuMemcpy(out.data(), d_out, out_bytes, gpuMemcpyDeviceToHost) == gpuSuccess);
for (int i = 0; i < 40; ++i) {
for (int j = 0; j < 40; ++j) {
const float result = out(i,j,0);
float expected = 0;
for (int k = 0; k < 50; ++k) {
for (int l = 0; l < 70; ++l) {
expected += in1(i, k, l) * in2(j, k, l);
}
}
VERIFY_IS_APPROX(expected, result);
}
}
test_1d_convolution(&context);
assert(gpuMemcpyAsync(out.data(), d_out, out_bytes, gpuMemcpyDeviceToHost, context.device().stream()) == gpuSuccess);
assert(gpuStreamSynchronize(context.device().stream()) == gpuSuccess);
for (int i = 0; i < 40; ++i) {
for (int j = 0; j < 49; ++j) {
for (int k = 0; k < 70; ++k) {
VERIFY_IS_APPROX(out(i,j,k), (in1(i,j,k) * 3.14f + in1(i,j+1,k) * 2.7f));
}
}
}
test_2d_convolution(&context);
assert(gpuMemcpyAsync(out.data(), d_out, out_bytes, gpuMemcpyDeviceToHost, context.device().stream()) == gpuSuccess);
assert(gpuStreamSynchronize(context.device().stream()) == gpuSuccess);
for (int i = 0; i < 40; ++i) {
for (int j = 0; j < 49; ++j) {
for (int k = 0; k < 69; ++k) {
const float result = out(i,j,k);
const float expected = (in1(i,j,k) * 3.14f + in1(i,j+1,k) * 2.7f +
in1(i,j,k+1) * 0.2f + in1(i,j+1,k+1) * 7.0f);
VERIFY_IS_APPROX(expected, result);
}
}
}
#if !defined(EIGEN_USE_HIP)
// disable this test on the HIP platform
// 3D tensor convolutions seem to hang on the HIP platform
test_3d_convolution(&context);
assert(gpuMemcpyAsync(out.data(), d_out, out_bytes, gpuMemcpyDeviceToHost, context.device().stream()) == gpuSuccess);
assert(gpuStreamSynchronize(context.device().stream()) == gpuSuccess);
for (int i = 0; i < 39; ++i) {
for (int j = 0; j < 49; ++j) {
for (int k = 0; k < 69; ++k) {
const float result = out(i,j,k);
const float expected = (in1(i,j,k) * 3.14f + in1(i,j+1,k) * 2.7f +
in1(i,j,k+1) * 0.2f + in1(i,j+1,k+1) * 7.0f +
in1(i+1,j,k) * -1.0f + in1(i+1,j+1,k) * -0.3f +
in1(i+1,j,k+1) * -0.7f + in1(i+1,j+1,k+1) * -0.5f);
VERIFY_IS_APPROX(expected, result);
}
}
}
#endif
}
EIGEN_DECLARE_TEST(cxx11_tensor_device)
{
CALL_SUBTEST_1(test_cpu());
CALL_SUBTEST_2(test_gpu());
}

View File

@@ -0,0 +1,77 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2016
// Mehdi Goli Codeplay Software Ltd.
// Ralph Potter Codeplay Software Ltd.
// Luke Iwanski Codeplay Software Ltd.
// Contact: <eigen@codeplay.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#define EIGEN_TEST_NO_LONGDOUBLE
#define EIGEN_TEST_NO_COMPLEX
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
#define EIGEN_USE_SYCL
#include "main.h"
#include <unsupported/Eigen/CXX11/Tensor>
#include <stdint.h>
#include <iostream>
template <typename DataType, int DataLayout, typename IndexType>
void test_device_memory(const Eigen::SyclDevice &sycl_device) {
std::cout << "Running on : "
<< sycl_device.sycl_queue().get_device(). template get_info<cl::sycl::info::device::name>()
<<std::endl;
IndexType sizeDim1 = 100;
array<IndexType, 1> tensorRange = {{sizeDim1}};
Tensor<DataType, 1, DataLayout,IndexType> in(tensorRange);
Tensor<DataType, 1, DataLayout,IndexType> in1(tensorRange);
memset(in1.data(), 1, in1.size() * sizeof(DataType));
DataType* gpu_in_data = static_cast<DataType*>(sycl_device.allocate(in.size()*sizeof(DataType)));
sycl_device.memset(gpu_in_data, 1, in.size()*sizeof(DataType));
sycl_device.memcpyDeviceToHost(in.data(), gpu_in_data, in.size()*sizeof(DataType));
for (IndexType i=0; i<in.size(); i++) {
VERIFY_IS_EQUAL(in(i), in1(i));
}
sycl_device.deallocate(gpu_in_data);
}
template <typename DataType, int DataLayout, typename IndexType>
void test_device_exceptions(const Eigen::SyclDevice &sycl_device) {
VERIFY(sycl_device.ok());
IndexType sizeDim1 = 100;
array<IndexType, 1> tensorDims = {{sizeDim1}};
DataType* gpu_data = static_cast<DataType*>(sycl_device.allocate(sizeDim1*sizeof(DataType)));
sycl_device.memset(gpu_data, 1, sizeDim1*sizeof(DataType));
TensorMap<Tensor<DataType, 1, DataLayout,IndexType>> in(gpu_data, tensorDims);
TensorMap<Tensor<DataType, 1, DataLayout,IndexType>> out(gpu_data, tensorDims);
out.device(sycl_device) = in / in.constant(0);
sycl_device.synchronize();
VERIFY(!sycl_device.ok());
sycl_device.deallocate(gpu_data);
}
template<typename DataType> void sycl_device_test_per_device(const cl::sycl::device& d){
std::cout << "Running on " << d.template get_info<cl::sycl::info::device::name>() << std::endl;
QueueInterface queueInterface(d);
auto sycl_device = Eigen::SyclDevice(&queueInterface);
test_device_memory<DataType, RowMajor, int64_t>(sycl_device);
test_device_memory<DataType, ColMajor, int64_t>(sycl_device);
/// this test throw an exception. enable it if you want to see the exception
//test_device_exceptions<DataType, RowMajor>(sycl_device);
/// this test throw an exception. enable it if you want to see the exception
//test_device_exceptions<DataType, ColMajor>(sycl_device);
}
EIGEN_DECLARE_TEST(cxx11_tensor_device_sycl) {
for (const auto& device :Eigen::get_sycl_supported_devices()) {
CALL_SUBTEST(sycl_device_test_per_device<float>(device));
}
}

View File

@@ -0,0 +1,88 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
#include <Eigen/CXX11/Tensor>
using Eigen::Tensor;
static void test_dynamic_size()
{
Eigen::DSizes<int, 3> dimensions(2,3,7);
VERIFY_IS_EQUAL((int)Eigen::internal::array_get<0>(dimensions), 2);
VERIFY_IS_EQUAL((int)Eigen::internal::array_get<1>(dimensions), 3);
VERIFY_IS_EQUAL((int)Eigen::internal::array_get<2>(dimensions), 7);
VERIFY_IS_EQUAL((int)dimensions.TotalSize(), 2*3*7);
VERIFY_IS_EQUAL((int)dimensions[0], 2);
VERIFY_IS_EQUAL((int)dimensions[1], 3);
VERIFY_IS_EQUAL((int)dimensions[2], 7);
}
static void test_fixed_size()
{
Eigen::Sizes<2,3,7> dimensions;
VERIFY_IS_EQUAL((int)Eigen::internal::array_get<0>(dimensions), 2);
VERIFY_IS_EQUAL((int)Eigen::internal::array_get<1>(dimensions), 3);
VERIFY_IS_EQUAL((int)Eigen::internal::array_get<2>(dimensions), 7);
VERIFY_IS_EQUAL((int)dimensions.TotalSize(), 2*3*7);
}
static void test_match()
{
Eigen::DSizes<unsigned int, 3> dyn((unsigned int)2,(unsigned int)3,(unsigned int)7);
Eigen::Sizes<2,3,7> stat;
VERIFY_IS_EQUAL(Eigen::dimensions_match(dyn, stat), true);
Eigen::DSizes<int, 3> dyn1(2,3,7);
Eigen::DSizes<int, 2> dyn2(2,3);
VERIFY_IS_EQUAL(Eigen::dimensions_match(dyn1, dyn2), false);
}
static void test_rank_zero()
{
Eigen::Sizes<> scalar;
VERIFY_IS_EQUAL((int)scalar.TotalSize(), 1);
VERIFY_IS_EQUAL((int)scalar.rank(), 0);
VERIFY_IS_EQUAL((int)internal::array_prod(scalar), 1);
Eigen::DSizes<ptrdiff_t, 0> dscalar;
VERIFY_IS_EQUAL((int)dscalar.TotalSize(), 1);
VERIFY_IS_EQUAL((int)dscalar.rank(), 0);
}
static void test_index_type_promotion() {
Eigen::DSizes<int, 3> src0(1, 2, 3);
Eigen::array<int, 3> src1;
src1[0] = 4;
src1[1] = 5;
src1[2] = 6;
Eigen::DSizes<long, 3> dst0(src0);
Eigen::DSizes<long, 3> dst1(src1);
VERIFY_IS_EQUAL(dst0[0], 1L);
VERIFY_IS_EQUAL(dst0[1], 2L);
VERIFY_IS_EQUAL(dst0[2], 3L);
VERIFY_IS_EQUAL(dst1[0], 4L);
VERIFY_IS_EQUAL(dst1[1], 5L);
VERIFY_IS_EQUAL(dst1[2], 6L);
}
EIGEN_DECLARE_TEST(cxx11_tensor_dimension)
{
CALL_SUBTEST(test_dynamic_size());
CALL_SUBTEST(test_fixed_size());
CALL_SUBTEST(test_match());
CALL_SUBTEST(test_rank_zero());
CALL_SUBTEST(test_index_type_promotion());
}

View File

@@ -0,0 +1,40 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
#include <Eigen/CXX11/Tensor>
static void test_empty_tensor()
{
Tensor<float, 2> source;
Tensor<float, 2> tgt1 = source;
Tensor<float, 2> tgt2(source);
Tensor<float, 2> tgt3;
tgt3 = tgt1;
tgt3 = tgt2;
}
static void test_empty_fixed_size_tensor()
{
TensorFixedSize<float, Sizes<0> > source;
TensorFixedSize<float, Sizes<0> > tgt1 = source;
TensorFixedSize<float, Sizes<0> > tgt2(source);
TensorFixedSize<float, Sizes<0> > tgt3;
tgt3 = tgt1;
tgt3 = tgt2;
}
EIGEN_DECLARE_TEST(cxx11_tensor_empty)
{
CALL_SUBTEST(test_empty_tensor());
CALL_SUBTEST(test_empty_fixed_size_tensor());
}

View File

@@ -0,0 +1,731 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2018 Eugene Zhulenev <ezhulenev@google.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#define EIGEN_USE_THREADS
#include "main.h"
#include <Eigen/CXX11/Tensor>
using Eigen::Tensor;
using Eigen::RowMajor;
using Eigen::ColMajor;
using Eigen::internal::TiledEvaluation;
// A set of tests to verify that different TensorExecutor strategies yields the
// same results for all the ops, supporting tiled evaluation.
// Default assignment that does no use block evaluation or vectorization.
// We assume that default coefficient evaluation is well tested and correct.
template <typename Dst, typename Expr>
static void DefaultAssign(Dst& dst, Expr expr) {
using Assign = Eigen::TensorAssignOp<Dst, const Expr>;
using Executor =
Eigen::internal::TensorExecutor<const Assign, DefaultDevice,
/*Vectorizable=*/false,
/*Tiling=*/TiledEvaluation::Off>;
Executor::run(Assign(dst, expr), DefaultDevice());
}
// Assignment with specified device and tiling strategy.
template <bool Vectorizable, TiledEvaluation Tiling, typename Device,
typename Dst, typename Expr>
static void DeviceAssign(Device& d, Dst& dst, Expr expr) {
using Assign = Eigen::TensorAssignOp<Dst, const Expr>;
using Executor = Eigen::internal::TensorExecutor<const Assign, Device,
Vectorizable, Tiling>;
Executor::run(Assign(dst, expr), d);
}
template <int NumDims>
static array<Index, NumDims> RandomDims(int min_dim = 1, int max_dim = 20) {
array<Index, NumDims> dims;
for (int i = 0; i < NumDims; ++i) {
dims[i] = internal::random<int>(min_dim, max_dim);
}
return dims;
}
template <typename T, int NumDims, typename Device, bool Vectorizable,
TiledEvaluation Tiling, int Layout>
static void test_execute_unary_expr(Device d)
{
static constexpr int Options = 0 | Layout;
// Pick a large enough tensor size to bypass small tensor block evaluation
// optimization.
auto dims = RandomDims<NumDims>(50 / NumDims, 100 / NumDims);
Tensor<T, NumDims, Options, Index> src(dims);
Tensor<T, NumDims, Options, Index> dst(dims);
src.setRandom();
const auto expr = src.square();
using Assign = TensorAssignOp<decltype(dst), const decltype(expr)>;
using Executor =
internal::TensorExecutor<const Assign, Device, Vectorizable, Tiling>;
Executor::run(Assign(dst, expr), d);
for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
T square = src.coeff(i) * src.coeff(i);
VERIFY_IS_EQUAL(square, dst.coeff(i));
}
}
template <typename T, int NumDims, typename Device, bool Vectorizable,
TiledEvaluation Tiling, int Layout>
static void test_execute_binary_expr(Device d)
{
static constexpr int Options = 0 | Layout;
// Pick a large enough tensor size to bypass small tensor block evaluation
// optimization.
auto dims = RandomDims<NumDims>(50 / NumDims, 100 / NumDims);
Tensor<T, NumDims, Options, Index> lhs(dims);
Tensor<T, NumDims, Options, Index> rhs(dims);
Tensor<T, NumDims, Options, Index> dst(dims);
lhs.setRandom();
rhs.setRandom();
const auto expr = lhs + rhs;
using Assign = TensorAssignOp<decltype(dst), const decltype(expr)>;
using Executor =
internal::TensorExecutor<const Assign, Device, Vectorizable, Tiling>;
Executor::run(Assign(dst, expr), d);
for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
T sum = lhs.coeff(i) + rhs.coeff(i);
VERIFY_IS_EQUAL(sum, dst.coeff(i));
}
}
template <typename T, int NumDims, typename Device, bool Vectorizable,
TiledEvaluation Tiling, int Layout>
static void test_execute_broadcasting(Device d)
{
static constexpr int Options = 0 | Layout;
auto dims = RandomDims<NumDims>(1, 10);
Tensor<T, NumDims, Options, Index> src(dims);
src.setRandom();
const auto broadcasts = RandomDims<NumDims>(1, 7);
const auto expr = src.broadcast(broadcasts);
// We assume that broadcasting on a default device is tested and correct, so
// we can rely on it to verify correctness of tensor executor and tiling.
Tensor<T, NumDims, Options, Index> golden;
golden = expr;
// Now do the broadcasting using configured tensor executor.
Tensor<T, NumDims, Options, Index> dst(golden.dimensions());
using Assign = TensorAssignOp<decltype(dst), const decltype(expr)>;
using Executor =
internal::TensorExecutor<const Assign, Device, Vectorizable, Tiling>;
Executor::run(Assign(dst, expr), d);
for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i));
}
}
template <typename T, int NumDims, typename Device, bool Vectorizable,
TiledEvaluation Tiling, int Layout>
static void test_execute_chipping_rvalue(Device d)
{
auto dims = RandomDims<NumDims>(1, 10);
Tensor<T, NumDims, Layout, Index> src(dims);
src.setRandom();
#define TEST_CHIPPING(CHIP_DIM) \
if (NumDims > (CHIP_DIM)) { \
const auto offset = internal::random<Index>(0, dims[(CHIP_DIM)] - 1); \
const auto expr = src.template chip<(CHIP_DIM)>(offset); \
\
Tensor<T, NumDims - 1, Layout, Index> golden; \
golden = expr; \
\
Tensor<T, NumDims - 1, Layout, Index> dst(golden.dimensions()); \
\
using Assign = TensorAssignOp<decltype(dst), const decltype(expr)>; \
using Executor = internal::TensorExecutor<const Assign, Device, \
Vectorizable, Tiling>; \
\
Executor::run(Assign(dst, expr), d); \
\
for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) { \
VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i)); \
} \
}
TEST_CHIPPING(0)
TEST_CHIPPING(1)
TEST_CHIPPING(2)
TEST_CHIPPING(3)
TEST_CHIPPING(4)
TEST_CHIPPING(5)
#undef TEST_CHIPPING
}
template <typename T, int NumDims, typename Device, bool Vectorizable,
TiledEvaluation Tiling, int Layout>
static void test_execute_chipping_lvalue(Device d)
{
auto dims = RandomDims<NumDims>(1, 10);
#define TEST_CHIPPING(CHIP_DIM) \
if (NumDims > (CHIP_DIM)) { \
/* Generate random data that we'll assign to the chipped tensor dim. */ \
array<Index, NumDims - 1> src_dims; \
for (int i = 0; i < NumDims - 1; ++i) { \
int dim = i < (CHIP_DIM) ? i : i + 1; \
src_dims[i] = dims[dim]; \
} \
\
Tensor<T, NumDims - 1, Layout, Index> src(src_dims); \
src.setRandom(); \
\
const auto offset = internal::random<Index>(0, dims[(CHIP_DIM)] - 1); \
\
Tensor<T, NumDims, Layout, Index> random(dims); \
random.setZero(); \
\
Tensor<T, NumDims, Layout, Index> golden(dims); \
golden = random; \
golden.template chip<(CHIP_DIM)>(offset) = src; \
\
Tensor<T, NumDims, Layout, Index> dst(dims); \
dst = random; \
auto expr = dst.template chip<(CHIP_DIM)>(offset); \
\
using Assign = TensorAssignOp<decltype(expr), const decltype(src)>; \
using Executor = internal::TensorExecutor<const Assign, Device, \
Vectorizable, Tiling>; \
\
Executor::run(Assign(expr, src), d); \
\
for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) { \
VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i)); \
} \
}
TEST_CHIPPING(0)
TEST_CHIPPING(1)
TEST_CHIPPING(2)
TEST_CHIPPING(3)
TEST_CHIPPING(4)
TEST_CHIPPING(5)
#undef TEST_CHIPPING
}
template <typename T, int NumDims, typename Device, bool Vectorizable,
TiledEvaluation Tiling, int Layout>
static void test_execute_shuffle_rvalue(Device d)
{
static constexpr int Options = 0 | Layout;
auto dims = RandomDims<NumDims>(1, 10);
Tensor<T, NumDims, Options, Index> src(dims);
src.setRandom();
DSizes<Index, NumDims> shuffle;
for (int i = 0; i < NumDims; ++i) shuffle[i] = i;
// Test all possible shuffle permutations.
do {
DSizes<Index, NumDims> shuffled_dims;
for (int i = 0; i < NumDims; ++i) {
shuffled_dims[i] = dims[shuffle[i]];
}
const auto expr = src.shuffle(shuffle);
// We assume that shuffling on a default device is tested and correct, so
// we can rely on it to verify correctness of tensor executor and tiling.
Tensor<T, NumDims, Options, Index> golden(shuffled_dims);
DefaultAssign(golden, expr);
// Now do the shuffling using configured tensor executor.
Tensor<T, NumDims, Options, Index> dst(shuffled_dims);
DeviceAssign<Vectorizable, Tiling>(d, dst, expr);
for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i));
}
} while (std::next_permutation(&shuffle[0], &shuffle[0] + NumDims));
}
template <typename T, int NumDims, typename Device, bool Vectorizable,
TiledEvaluation Tiling, int Layout>
static void test_execute_shuffle_lvalue(Device d)
{
static constexpr int Options = 0 | Layout;
auto dims = RandomDims<NumDims>(5, 10);
Tensor<T, NumDims, Options, Index> src(dims);
src.setRandom();
DSizes<Index, NumDims> shuffle;
for (int i = 0; i < NumDims; ++i) shuffle[i] = i;
// Test all possible shuffle permutations.
do {
DSizes<Index, NumDims> shuffled_dims;
for (int i = 0; i < NumDims; ++i) shuffled_dims[shuffle[i]] = dims[i];
// We assume that shuffling on a default device is tested and correct, so
// we can rely on it to verify correctness of tensor executor and tiling.
Tensor<T, NumDims, Options, Index> golden(shuffled_dims);
auto golden_shuffle = golden.shuffle(shuffle);
DefaultAssign(golden_shuffle, src);
// Now do the shuffling using configured tensor executor.
Tensor<T, NumDims, Options, Index> dst(shuffled_dims);
auto dst_shuffle = dst.shuffle(shuffle);
DeviceAssign<Vectorizable, Tiling>(d, dst_shuffle, src);
for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i));
}
} while (std::next_permutation(&shuffle[0], &shuffle[0] + NumDims));
}
template <typename T, int NumDims, typename Device, bool Vectorizable,
TiledEvaluation Tiling, int Layout>
static void test_execute_reshape(Device d)
{
static_assert(NumDims >= 2, "NumDims must be greater or equal than 2");
static constexpr int ReshapedDims = NumDims - 1;
static constexpr int Options = 0 | Layout;
auto dims = RandomDims<NumDims>(5, 10);
Tensor<T, NumDims, Options, Index> src(dims);
src.setRandom();
// Multiple 0th dimension and then shuffle.
std::vector<Index> shuffle;
for (int i = 0; i < ReshapedDims; ++i) shuffle.push_back(i);
std::shuffle(shuffle.begin(), shuffle.end(), std::mt19937());
DSizes<Index, ReshapedDims> reshaped_dims;
reshaped_dims[shuffle[0]] = dims[0] * dims[1];
for (int i = 1; i < ReshapedDims; ++i) reshaped_dims[shuffle[i]] = dims[i + 1];
Tensor<T, ReshapedDims, Options, Index> golden = src.reshape(reshaped_dims);
// Now reshape using configured tensor executor.
Tensor<T, ReshapedDims, Options, Index> dst(golden.dimensions());
auto expr = src.reshape(reshaped_dims);
using Assign = TensorAssignOp<decltype(dst), const decltype(expr)>;
using Executor =
internal::TensorExecutor<const Assign, Device, Vectorizable, Tiling>;
Executor::run(Assign(dst, expr), d);
for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i));
}
}
template <typename T, int NumDims, typename Device, bool Vectorizable,
TiledEvaluation Tiling, int Layout>
static void test_execute_slice_rvalue(Device d)
{
static_assert(NumDims >= 2, "NumDims must be greater or equal than 2");
static constexpr int Options = 0 | Layout;
auto dims = RandomDims<NumDims>(5, 10);
Tensor<T, NumDims, Options, Index> src(dims);
src.setRandom();
// Pick a random slice of src tensor.
auto slice_start = DSizes<Index, NumDims>(RandomDims<NumDims>());
auto slice_size = DSizes<Index, NumDims>(RandomDims<NumDims>());
// Make sure that slice start + size do not overflow tensor dims.
for (int i = 0; i < NumDims; ++i) {
slice_start[i] = numext::mini(dims[i] - 1, slice_start[i]);
slice_size[i] = numext::mini(slice_size[i], dims[i] - slice_start[i]);
}
Tensor<T, NumDims, Options, Index> golden =
src.slice(slice_start, slice_size);
// Now reshape using configured tensor executor.
Tensor<T, NumDims, Options, Index> dst(golden.dimensions());
auto expr = src.slice(slice_start, slice_size);
using Assign = TensorAssignOp<decltype(dst), const decltype(expr)>;
using Executor =
internal::TensorExecutor<const Assign, Device, Vectorizable, Tiling>;
Executor::run(Assign(dst, expr), d);
for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i));
}
}
template <typename T, int NumDims, typename Device, bool Vectorizable,
TiledEvaluation Tiling, int Layout>
static void test_execute_slice_lvalue(Device d)
{
static_assert(NumDims >= 2, "NumDims must be greater or equal than 2");
static constexpr int Options = 0 | Layout;
auto dims = RandomDims<NumDims>(5, 10);
Tensor<T, NumDims, Options, Index> src(dims);
src.setRandom();
// Pick a random slice of src tensor.
auto slice_start = DSizes<Index, NumDims>(RandomDims<NumDims>(1, 10));
auto slice_size = DSizes<Index, NumDims>(RandomDims<NumDims>(1, 10));
// Make sure that slice start + size do not overflow tensor dims.
for (int i = 0; i < NumDims; ++i) {
slice_start[i] = numext::mini(dims[i] - 1, slice_start[i]);
slice_size[i] = numext::mini(slice_size[i], dims[i] - slice_start[i]);
}
Tensor<T, NumDims, Options, Index> slice(slice_size);
slice.setRandom();
// Assign a slice using default executor.
Tensor<T, NumDims, Options, Index> golden = src;
golden.slice(slice_start, slice_size) = slice;
// And using configured execution strategy.
Tensor<T, NumDims, Options, Index> dst = src;
auto expr = dst.slice(slice_start, slice_size);
using Assign = TensorAssignOp<decltype(expr), const decltype(slice)>;
using Executor =
internal::TensorExecutor<const Assign, Device, Vectorizable, Tiling>;
Executor::run(Assign(expr, slice), d);
for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i));
}
}
template <typename T, int NumDims, typename Device, bool Vectorizable,
TiledEvaluation Tiling, int Layout>
static void test_execute_broadcasting_of_forced_eval(Device d)
{
static constexpr int Options = 0 | Layout;
auto dims = RandomDims<NumDims>(1, 10);
Tensor<T, NumDims, Options, Index> src(dims);
src.setRandom();
const auto broadcasts = RandomDims<NumDims>(1, 7);
const auto expr = src.square().eval().broadcast(broadcasts);
// We assume that broadcasting on a default device is tested and correct, so
// we can rely on it to verify correctness of tensor executor and tiling.
Tensor<T, NumDims, Options, Index> golden;
golden = expr;
// Now do the broadcasting using configured tensor executor.
Tensor<T, NumDims, Options, Index> dst(golden.dimensions());
using Assign = TensorAssignOp<decltype(dst), const decltype(expr)>;
using Executor =
internal::TensorExecutor<const Assign, Device, Vectorizable, Tiling>;
Executor::run(Assign(dst, expr), d);
for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i));
}
}
template<typename T, int NumDims>
struct DummyGenerator {
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
T operator()(const array <Index, NumDims>& dims) const {
T result = static_cast<T>(0);
for (int i = 0; i < NumDims; ++i) {
result += static_cast<T>((i + 1) * dims[i]);
}
return result;
}
};
template <typename T, int NumDims, typename Device, bool Vectorizable,
TiledEvaluation Tiling, int Layout>
static void test_execute_generator_op(Device d)
{
static constexpr int Options = 0 | Layout;
auto dims = RandomDims<NumDims>(20, 30);
Tensor<T, NumDims, Options, Index> src(dims);
src.setRandom();
const auto expr = src.generate(DummyGenerator<T, NumDims>());
// We assume that generator on a default device is tested and correct, so
// we can rely on it to verify correctness of tensor executor and tiling.
Tensor<T, NumDims, Options, Index> golden;
golden = expr;
// Now do the broadcasting using configured tensor executor.
Tensor<T, NumDims, Options, Index> dst(golden.dimensions());
using Assign = TensorAssignOp<decltype(dst), const decltype(expr)>;
using Executor =
internal::TensorExecutor<const Assign, Device, Vectorizable, Tiling>;
Executor::run(Assign(dst, expr), d);
for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i));
}
}
template <typename T, int NumDims, typename Device, bool Vectorizable,
TiledEvaluation Tiling, int Layout>
static void test_execute_reverse_rvalue(Device d)
{
static constexpr int Options = 0 | Layout;
auto dims = RandomDims<NumDims>(1, numext::pow(1000000.0, 1.0 / NumDims));
Tensor <T, NumDims, Options, Index> src(dims);
src.setRandom();
// Reverse half of the dimensions.
Eigen::array<bool, NumDims> reverse;
for (int i = 0; i < NumDims; ++i) reverse[i] = internal::random<bool>();
const auto expr = src.reverse(reverse);
// We assume that reversing on a default device is tested and correct, so
// we can rely on it to verify correctness of tensor executor and tiling.
Tensor <T, NumDims, Options, Index> golden;
golden = expr;
// Now do the reversing using configured tensor executor.
Tensor <T, NumDims, Options, Index> dst(golden.dimensions());
using Assign = TensorAssignOp<decltype(dst), const decltype(expr)>;
using Executor =
internal::TensorExecutor<const Assign, Device, Vectorizable, Tiling>;
Executor::run(Assign(dst, expr), d);
for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i));
}
}
template <typename T, int NumDims, typename Device, bool Vectorizable,
TiledEvaluation Tiling, int Layout>
static void test_async_execute_unary_expr(Device d)
{
static constexpr int Options = 0 | Layout;
// Pick a large enough tensor size to bypass small tensor block evaluation
// optimization.
auto dims = RandomDims<NumDims>(50 / NumDims, 100 / NumDims);
Tensor<T, NumDims, Options, Index> src(dims);
Tensor<T, NumDims, Options, Index> dst(dims);
src.setRandom();
const auto expr = src.square();
Eigen::Barrier done(1);
auto on_done = [&done]() { done.Notify(); };
using Assign = TensorAssignOp<decltype(dst), const decltype(expr)>;
using DoneCallback = decltype(on_done);
using Executor = internal::TensorAsyncExecutor<const Assign, Device, DoneCallback,
Vectorizable, Tiling>;
Executor::runAsync(Assign(dst, expr), d, on_done);
done.Wait();
for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
T square = src.coeff(i) * src.coeff(i);
VERIFY_IS_EQUAL(square, dst.coeff(i));
}
}
template <typename T, int NumDims, typename Device, bool Vectorizable,
TiledEvaluation Tiling, int Layout>
static void test_async_execute_binary_expr(Device d)
{
static constexpr int Options = 0 | Layout;
// Pick a large enough tensor size to bypass small tensor block evaluation
// optimization.
auto dims = RandomDims<NumDims>(50 / NumDims, 100 / NumDims);
Tensor<T, NumDims, Options, Index> lhs(dims);
Tensor<T, NumDims, Options, Index> rhs(dims);
Tensor<T, NumDims, Options, Index> dst(dims);
lhs.setRandom();
rhs.setRandom();
const auto expr = lhs + rhs;
Eigen::Barrier done(1);
auto on_done = [&done]() { done.Notify(); };
using Assign = TensorAssignOp<decltype(dst), const decltype(expr)>;
using DoneCallback = decltype(on_done);
using Executor = internal::TensorAsyncExecutor<const Assign, Device, DoneCallback,
Vectorizable, Tiling>;
Executor::runAsync(Assign(dst, expr), d, on_done);
done.Wait();
for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
T sum = lhs.coeff(i) + rhs.coeff(i);
VERIFY_IS_EQUAL(sum, dst.coeff(i));
}
}
#ifdef EIGEN_DONT_VECTORIZE
#define VECTORIZABLE(VAL) !EIGEN_DONT_VECTORIZE && VAL
#else
#define VECTORIZABLE(VAL) VAL
#endif
#define CALL_SUBTEST_PART(PART) \
CALL_SUBTEST_##PART
#define CALL_SUBTEST_COMBINATIONS(PART, NAME, T, NUM_DIMS) \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, false, TiledEvaluation::Off, ColMajor>(default_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, false, TiledEvaluation::On, ColMajor>(default_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, VECTORIZABLE(true), TiledEvaluation::Off, ColMajor>(default_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, VECTORIZABLE(true), TiledEvaluation::On, ColMajor>(default_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, false, TiledEvaluation::Off, RowMajor>(default_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, false, TiledEvaluation::On, RowMajor>(default_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, VECTORIZABLE(true), TiledEvaluation::Off, RowMajor>(default_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, VECTORIZABLE(true), TiledEvaluation::On, RowMajor>(default_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::Off, ColMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::On, ColMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true), TiledEvaluation::Off, ColMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true), TiledEvaluation::On, ColMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::Off, RowMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::On, RowMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true), TiledEvaluation::Off, RowMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true), TiledEvaluation::On, RowMajor>(tp_device)))
// NOTE: Currently only ThreadPoolDevice supports async expression evaluation.
#define CALL_ASYNC_SUBTEST_COMBINATIONS(PART, NAME, T, NUM_DIMS) \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::Off, ColMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::On, ColMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true), TiledEvaluation::Off, ColMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true), TiledEvaluation::On, ColMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::Off, RowMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::On, RowMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true), TiledEvaluation::Off, RowMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true), TiledEvaluation::On, RowMajor>(tp_device)))
EIGEN_DECLARE_TEST(cxx11_tensor_executor) {
Eigen::DefaultDevice default_device;
// Default device is unused in ASYNC tests.
EIGEN_UNUSED_VARIABLE(default_device);
const auto num_threads = internal::random<int>(20, 24);
Eigen::ThreadPool tp(num_threads);
Eigen::ThreadPoolDevice tp_device(&tp, num_threads);
CALL_SUBTEST_COMBINATIONS(1, test_execute_unary_expr, float, 3);
CALL_SUBTEST_COMBINATIONS(1, test_execute_unary_expr, float, 4);
CALL_SUBTEST_COMBINATIONS(1, test_execute_unary_expr, float, 5);
CALL_SUBTEST_COMBINATIONS(2, test_execute_binary_expr, float, 3);
CALL_SUBTEST_COMBINATIONS(2, test_execute_binary_expr, float, 4);
CALL_SUBTEST_COMBINATIONS(2, test_execute_binary_expr, float, 5);
CALL_SUBTEST_COMBINATIONS(3, test_execute_broadcasting, float, 3);
CALL_SUBTEST_COMBINATIONS(3, test_execute_broadcasting, float, 4);
CALL_SUBTEST_COMBINATIONS(3, test_execute_broadcasting, float, 5);
CALL_SUBTEST_COMBINATIONS(4, test_execute_chipping_rvalue, float, 3);
CALL_SUBTEST_COMBINATIONS(4, test_execute_chipping_rvalue, float, 4);
CALL_SUBTEST_COMBINATIONS(4, test_execute_chipping_rvalue, float, 5);
CALL_SUBTEST_COMBINATIONS(5, test_execute_chipping_lvalue, float, 3);
CALL_SUBTEST_COMBINATIONS(5, test_execute_chipping_lvalue, float, 4);
CALL_SUBTEST_COMBINATIONS(5, test_execute_chipping_lvalue, float, 5);
CALL_SUBTEST_COMBINATIONS(6, test_execute_shuffle_rvalue, float, 3);
CALL_SUBTEST_COMBINATIONS(6, test_execute_shuffle_rvalue, float, 4);
CALL_SUBTEST_COMBINATIONS(6, test_execute_shuffle_rvalue, float, 5);
CALL_SUBTEST_COMBINATIONS(7, test_execute_shuffle_lvalue, float, 3);
CALL_SUBTEST_COMBINATIONS(7, test_execute_shuffle_lvalue, float, 4);
CALL_SUBTEST_COMBINATIONS(7, test_execute_shuffle_lvalue, float, 5);
CALL_SUBTEST_COMBINATIONS(9, test_execute_reshape, float, 2);
CALL_SUBTEST_COMBINATIONS(9, test_execute_reshape, float, 3);
CALL_SUBTEST_COMBINATIONS(9, test_execute_reshape, float, 4);
CALL_SUBTEST_COMBINATIONS(9, test_execute_reshape, float, 5);
CALL_SUBTEST_COMBINATIONS(10, test_execute_slice_rvalue, float, 2);
CALL_SUBTEST_COMBINATIONS(10, test_execute_slice_rvalue, float, 3);
CALL_SUBTEST_COMBINATIONS(10, test_execute_slice_rvalue, float, 4);
CALL_SUBTEST_COMBINATIONS(10, test_execute_slice_rvalue, float, 5);
CALL_SUBTEST_COMBINATIONS(11, test_execute_slice_lvalue, float, 2);
CALL_SUBTEST_COMBINATIONS(11, test_execute_slice_lvalue, float, 3);
CALL_SUBTEST_COMBINATIONS(11, test_execute_slice_lvalue, float, 4);
CALL_SUBTEST_COMBINATIONS(11, test_execute_slice_lvalue, float, 5);
CALL_SUBTEST_COMBINATIONS(12, test_execute_broadcasting_of_forced_eval, float, 2);
CALL_SUBTEST_COMBINATIONS(12, test_execute_broadcasting_of_forced_eval, float, 3);
CALL_SUBTEST_COMBINATIONS(12, test_execute_broadcasting_of_forced_eval, float, 4);
CALL_SUBTEST_COMBINATIONS(12, test_execute_broadcasting_of_forced_eval, float, 5);
CALL_SUBTEST_COMBINATIONS(13, test_execute_generator_op, float, 2);
CALL_SUBTEST_COMBINATIONS(13, test_execute_generator_op, float, 3);
CALL_SUBTEST_COMBINATIONS(13, test_execute_generator_op, float, 4);
CALL_SUBTEST_COMBINATIONS(13, test_execute_generator_op, float, 5);
CALL_SUBTEST_COMBINATIONS(14, test_execute_reverse_rvalue, float, 1);
CALL_SUBTEST_COMBINATIONS(14, test_execute_reverse_rvalue, float, 2);
CALL_SUBTEST_COMBINATIONS(14, test_execute_reverse_rvalue, float, 3);
CALL_SUBTEST_COMBINATIONS(14, test_execute_reverse_rvalue, float, 4);
CALL_SUBTEST_COMBINATIONS(14, test_execute_reverse_rvalue, float, 5);
CALL_ASYNC_SUBTEST_COMBINATIONS(15, test_async_execute_unary_expr, float, 3);
CALL_ASYNC_SUBTEST_COMBINATIONS(15, test_async_execute_unary_expr, float, 4);
CALL_ASYNC_SUBTEST_COMBINATIONS(15, test_async_execute_unary_expr, float, 5);
CALL_ASYNC_SUBTEST_COMBINATIONS(16, test_async_execute_binary_expr, float, 3);
CALL_ASYNC_SUBTEST_COMBINATIONS(16, test_async_execute_binary_expr, float, 4);
CALL_ASYNC_SUBTEST_COMBINATIONS(16, test_async_execute_binary_expr, float, 5);
// Force CMake to split this test.
// EIGEN_SUFFIXES;1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;16
}

View File

@@ -0,0 +1,464 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include <numeric>
#include "main.h"
#include <Eigen/CXX11/Tensor>
using Eigen::Tensor;
using Eigen::RowMajor;
static void test_1d()
{
Tensor<float, 1> vec1(6);
Tensor<float, 1, RowMajor> vec2(6);
vec1(0) = 4.0; vec2(0) = 0.0;
vec1(1) = 8.0; vec2(1) = 1.0;
vec1(2) = 15.0; vec2(2) = 2.0;
vec1(3) = 16.0; vec2(3) = 3.0;
vec1(4) = 23.0; vec2(4) = 4.0;
vec1(5) = 42.0; vec2(5) = 5.0;
float data3[6];
TensorMap<Tensor<float, 1>> vec3(data3, 6);
vec3 = vec1.sqrt();
float data4[6];
TensorMap<Tensor<float, 1, RowMajor>> vec4(data4, 6);
vec4 = vec2.square();
float data5[6];
TensorMap<Tensor<float, 1, RowMajor>> vec5(data5, 6);
vec5 = vec2.cube();
VERIFY_IS_APPROX(vec3(0), sqrtf(4.0));
VERIFY_IS_APPROX(vec3(1), sqrtf(8.0));
VERIFY_IS_APPROX(vec3(2), sqrtf(15.0));
VERIFY_IS_APPROX(vec3(3), sqrtf(16.0));
VERIFY_IS_APPROX(vec3(4), sqrtf(23.0));
VERIFY_IS_APPROX(vec3(5), sqrtf(42.0));
VERIFY_IS_APPROX(vec4(0), 0.0f);
VERIFY_IS_APPROX(vec4(1), 1.0f);
VERIFY_IS_APPROX(vec4(2), 2.0f * 2.0f);
VERIFY_IS_APPROX(vec4(3), 3.0f * 3.0f);
VERIFY_IS_APPROX(vec4(4), 4.0f * 4.0f);
VERIFY_IS_APPROX(vec4(5), 5.0f * 5.0f);
VERIFY_IS_APPROX(vec5(0), 0.0f);
VERIFY_IS_APPROX(vec5(1), 1.0f);
VERIFY_IS_APPROX(vec5(2), 2.0f * 2.0f * 2.0f);
VERIFY_IS_APPROX(vec5(3), 3.0f * 3.0f * 3.0f);
VERIFY_IS_APPROX(vec5(4), 4.0f * 4.0f * 4.0f);
VERIFY_IS_APPROX(vec5(5), 5.0f * 5.0f * 5.0f);
vec3 = vec1 + vec2;
VERIFY_IS_APPROX(vec3(0), 4.0f + 0.0f);
VERIFY_IS_APPROX(vec3(1), 8.0f + 1.0f);
VERIFY_IS_APPROX(vec3(2), 15.0f + 2.0f);
VERIFY_IS_APPROX(vec3(3), 16.0f + 3.0f);
VERIFY_IS_APPROX(vec3(4), 23.0f + 4.0f);
VERIFY_IS_APPROX(vec3(5), 42.0f + 5.0f);
}
static void test_2d()
{
float data1[6];
TensorMap<Tensor<float, 2>> mat1(data1, 2, 3);
float data2[6];
TensorMap<Tensor<float, 2, RowMajor>> mat2(data2, 2, 3);
mat1(0,0) = 0.0;
mat1(0,1) = 1.0;
mat1(0,2) = 2.0;
mat1(1,0) = 3.0;
mat1(1,1) = 4.0;
mat1(1,2) = 5.0;
mat2(0,0) = -0.0;
mat2(0,1) = -1.0;
mat2(0,2) = -2.0;
mat2(1,0) = -3.0;
mat2(1,1) = -4.0;
mat2(1,2) = -5.0;
Tensor<float, 2> mat3(2,3);
Tensor<float, 2, RowMajor> mat4(2,3);
mat3 = mat1.abs();
mat4 = mat2.abs();
VERIFY_IS_APPROX(mat3(0,0), 0.0f);
VERIFY_IS_APPROX(mat3(0,1), 1.0f);
VERIFY_IS_APPROX(mat3(0,2), 2.0f);
VERIFY_IS_APPROX(mat3(1,0), 3.0f);
VERIFY_IS_APPROX(mat3(1,1), 4.0f);
VERIFY_IS_APPROX(mat3(1,2), 5.0f);
VERIFY_IS_APPROX(mat4(0,0), 0.0f);
VERIFY_IS_APPROX(mat4(0,1), 1.0f);
VERIFY_IS_APPROX(mat4(0,2), 2.0f);
VERIFY_IS_APPROX(mat4(1,0), 3.0f);
VERIFY_IS_APPROX(mat4(1,1), 4.0f);
VERIFY_IS_APPROX(mat4(1,2), 5.0f);
}
static void test_3d()
{
Tensor<float, 3> mat1(2,3,7);
Tensor<float, 3, RowMajor> mat2(2,3,7);
float val = 1.0f;
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 7; ++k) {
mat1(i,j,k) = val;
mat2(i,j,k) = val;
val += 1.0f;
}
}
}
Tensor<float, 3> mat3(2,3,7);
mat3 = mat1 + mat1;
Tensor<float, 3, RowMajor> mat4(2,3,7);
mat4 = mat2 * 3.14f;
Tensor<float, 3> mat5(2,3,7);
mat5 = mat1.inverse().log();
Tensor<float, 3, RowMajor> mat6(2,3,7);
mat6 = mat2.pow(0.5f) * 3.14f;
Tensor<float, 3> mat7(2,3,7);
mat7 = mat1.cwiseMax(mat5 * 2.0f).exp();
Tensor<float, 3, RowMajor> mat8(2,3,7);
mat8 = (-mat2).exp() * 3.14f;
Tensor<float, 3, RowMajor> mat9(2,3,7);
mat9 = mat2 + 3.14f;
Tensor<float, 3, RowMajor> mat10(2,3,7);
mat10 = mat2 - 3.14f;
Tensor<float, 3, RowMajor> mat11(2,3,7);
mat11 = mat2 / 3.14f;
val = 1.0f;
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 7; ++k) {
VERIFY_IS_APPROX(mat3(i,j,k), val + val);
VERIFY_IS_APPROX(mat4(i,j,k), val * 3.14f);
VERIFY_IS_APPROX(mat5(i,j,k), logf(1.0f/val));
VERIFY_IS_APPROX(mat6(i,j,k), sqrtf(val) * 3.14f);
VERIFY_IS_APPROX(mat7(i,j,k), expf((std::max)(val, mat5(i,j,k) * 2.0f)));
VERIFY_IS_APPROX(mat8(i,j,k), expf(-val) * 3.14f);
VERIFY_IS_APPROX(mat9(i,j,k), val + 3.14f);
VERIFY_IS_APPROX(mat10(i,j,k), val - 3.14f);
VERIFY_IS_APPROX(mat11(i,j,k), val / 3.14f);
val += 1.0f;
}
}
}
}
static void test_constants()
{
Tensor<float, 3> mat1(2,3,7);
Tensor<float, 3> mat2(2,3,7);
Tensor<float, 3> mat3(2,3,7);
float val = 1.0f;
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 7; ++k) {
mat1(i,j,k) = val;
val += 1.0f;
}
}
}
mat2 = mat1.constant(3.14f);
mat3 = mat1.cwiseMax(7.3f).exp();
val = 1.0f;
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 7; ++k) {
VERIFY_IS_APPROX(mat2(i,j,k), 3.14f);
VERIFY_IS_APPROX(mat3(i,j,k), expf((std::max)(val, 7.3f)));
val += 1.0f;
}
}
}
}
static void test_boolean()
{
const int kSize = 31;
Tensor<int, 1> vec(kSize);
std::iota(vec.data(), vec.data() + kSize, 0);
// Test ||.
Tensor<bool, 1> bool1 = vec < vec.constant(1) || vec > vec.constant(4);
for (int i = 0; i < kSize; ++i) {
bool expected = i < 1 || i > 4;
VERIFY_IS_EQUAL(bool1[i], expected);
}
// Test &&, including cast of operand vec.
Tensor<bool, 1> bool2 = vec.cast<bool>() && vec < vec.constant(4);
for (int i = 0; i < kSize; ++i) {
bool expected = bool(i) && i < 4;
VERIFY_IS_EQUAL(bool2[i], expected);
}
// Compilation tests:
// Test Tensor<bool> against results of cast or comparison; verifies that
// CoeffReturnType is set to match Op return type of bool for Unary and Binary
// Ops.
Tensor<bool, 1> bool3 = vec.cast<bool>() && bool2;
bool3 = vec < vec.constant(4) && bool2;
}
static void test_functors()
{
Tensor<float, 3> mat1(2,3,7);
Tensor<float, 3> mat2(2,3,7);
Tensor<float, 3> mat3(2,3,7);
float val = 1.0f;
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 7; ++k) {
mat1(i,j,k) = val;
val += 1.0f;
}
}
}
mat2 = mat1.inverse().unaryExpr(&asinf);
mat3 = mat1.unaryExpr(&tanhf);
val = 1.0f;
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 7; ++k) {
VERIFY_IS_APPROX(mat2(i,j,k), asinf(1.0f / mat1(i,j,k)));
VERIFY_IS_APPROX(mat3(i,j,k), tanhf(mat1(i,j,k)));
val += 1.0f;
}
}
}
}
static void test_type_casting()
{
Tensor<bool, 3> mat1(2,3,7);
Tensor<float, 3> mat2(2,3,7);
Tensor<double, 3> mat3(2,3,7);
mat1.setRandom();
mat2.setRandom();
mat3 = mat1.cast<double>();
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 7; ++k) {
VERIFY_IS_APPROX(mat3(i,j,k), mat1(i,j,k) ? 1.0 : 0.0);
}
}
}
mat3 = mat2.cast<double>();
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 7; ++k) {
VERIFY_IS_APPROX(mat3(i,j,k), static_cast<double>(mat2(i,j,k)));
}
}
}
}
static void test_select()
{
Tensor<float, 3> selector(2,3,7);
Tensor<float, 3> mat1(2,3,7);
Tensor<float, 3> mat2(2,3,7);
Tensor<float, 3> result(2,3,7);
selector.setRandom();
mat1.setRandom();
mat2.setRandom();
result = (selector > selector.constant(0.5f)).select(mat1, mat2);
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 7; ++k) {
VERIFY_IS_APPROX(result(i,j,k), (selector(i,j,k) > 0.5f) ? mat1(i,j,k) : mat2(i,j,k));
}
}
}
}
template <typename Scalar>
void test_minmax_nan_propagation_templ() {
for (int size = 1; size < 17; ++size) {
const Scalar kNaN = std::numeric_limits<Scalar>::quiet_NaN();
const Scalar kInf = std::numeric_limits<Scalar>::infinity();
const Scalar kZero(0);
Tensor<Scalar, 1> vec_all_nan(size);
Tensor<Scalar, 1> vec_one_nan(size);
Tensor<Scalar, 1> vec_zero(size);
vec_all_nan.setConstant(kNaN);
vec_zero.setZero();
vec_one_nan.setZero();
vec_one_nan(size/2) = kNaN;
auto verify_all_nan = [&](const Tensor<Scalar, 1>& v) {
for (int i = 0; i < size; ++i) {
VERIFY((numext::isnan)(v(i)));
}
};
auto verify_all_zero = [&](const Tensor<Scalar, 1>& v) {
for (int i = 0; i < size; ++i) {
VERIFY_IS_EQUAL(v(i), Scalar(0));
}
};
// Test NaN propagating max.
// max(nan, nan) = nan
// max(nan, 0) = nan
// max(0, nan) = nan
// max(0, 0) = 0
verify_all_nan(vec_all_nan.template cwiseMax<PropagateNaN>(kNaN));
verify_all_nan(vec_all_nan.template cwiseMax<PropagateNaN>(vec_all_nan));
verify_all_nan(vec_all_nan.template cwiseMax<PropagateNaN>(kZero));
verify_all_nan(vec_all_nan.template cwiseMax<PropagateNaN>(vec_zero));
verify_all_nan(vec_zero.template cwiseMax<PropagateNaN>(kNaN));
verify_all_nan(vec_zero.template cwiseMax<PropagateNaN>(vec_all_nan));
verify_all_zero(vec_zero.template cwiseMax<PropagateNaN>(kZero));
verify_all_zero(vec_zero.template cwiseMax<PropagateNaN>(vec_zero));
// Test number propagating max.
// max(nan, nan) = nan
// max(nan, 0) = 0
// max(0, nan) = 0
// max(0, 0) = 0
verify_all_nan(vec_all_nan.template cwiseMax<PropagateNumbers>(kNaN));
verify_all_nan(vec_all_nan.template cwiseMax<PropagateNumbers>(vec_all_nan));
verify_all_zero(vec_all_nan.template cwiseMax<PropagateNumbers>(kZero));
verify_all_zero(vec_all_nan.template cwiseMax<PropagateNumbers>(vec_zero));
verify_all_zero(vec_zero.template cwiseMax<PropagateNumbers>(kNaN));
verify_all_zero(vec_zero.template cwiseMax<PropagateNumbers>(vec_all_nan));
verify_all_zero(vec_zero.template cwiseMax<PropagateNumbers>(kZero));
verify_all_zero(vec_zero.template cwiseMax<PropagateNumbers>(vec_zero));
// Test NaN propagating min.
// min(nan, nan) = nan
// min(nan, 0) = nan
// min(0, nan) = nan
// min(0, 0) = 0
verify_all_nan(vec_all_nan.template cwiseMin<PropagateNaN>(kNaN));
verify_all_nan(vec_all_nan.template cwiseMin<PropagateNaN>(vec_all_nan));
verify_all_nan(vec_all_nan.template cwiseMin<PropagateNaN>(kZero));
verify_all_nan(vec_all_nan.template cwiseMin<PropagateNaN>(vec_zero));
verify_all_nan(vec_zero.template cwiseMin<PropagateNaN>(kNaN));
verify_all_nan(vec_zero.template cwiseMin<PropagateNaN>(vec_all_nan));
verify_all_zero(vec_zero.template cwiseMin<PropagateNaN>(kZero));
verify_all_zero(vec_zero.template cwiseMin<PropagateNaN>(vec_zero));
// Test number propagating min.
// min(nan, nan) = nan
// min(nan, 0) = 0
// min(0, nan) = 0
// min(0, 0) = 0
verify_all_nan(vec_all_nan.template cwiseMin<PropagateNumbers>(kNaN));
verify_all_nan(vec_all_nan.template cwiseMin<PropagateNumbers>(vec_all_nan));
verify_all_zero(vec_all_nan.template cwiseMin<PropagateNumbers>(kZero));
verify_all_zero(vec_all_nan.template cwiseMin<PropagateNumbers>(vec_zero));
verify_all_zero(vec_zero.template cwiseMin<PropagateNumbers>(kNaN));
verify_all_zero(vec_zero.template cwiseMin<PropagateNumbers>(vec_all_nan));
verify_all_zero(vec_zero.template cwiseMin<PropagateNumbers>(kZero));
verify_all_zero(vec_zero.template cwiseMin<PropagateNumbers>(vec_zero));
// Test min and max reduction
Tensor<Scalar, 0> val;
val = vec_zero.minimum();
VERIFY_IS_EQUAL(val(), kZero);
val = vec_zero.template minimum<PropagateNaN>();
VERIFY_IS_EQUAL(val(), kZero);
val = vec_zero.template minimum<PropagateNumbers>();
VERIFY_IS_EQUAL(val(), kZero);
val = vec_zero.maximum();
VERIFY_IS_EQUAL(val(), kZero);
val = vec_zero.template maximum<PropagateNaN>();
VERIFY_IS_EQUAL(val(), kZero);
val = vec_zero.template maximum<PropagateNumbers>();
VERIFY_IS_EQUAL(val(), kZero);
// Test NaN propagation for tensor of all NaNs.
val = vec_all_nan.template minimum<PropagateNaN>();
VERIFY((numext::isnan)(val()));
val = vec_all_nan.template minimum<PropagateNumbers>();
VERIFY_IS_EQUAL(val(), kInf);
val = vec_all_nan.template maximum<PropagateNaN>();
VERIFY((numext::isnan)(val()));
val = vec_all_nan.template maximum<PropagateNumbers>();
VERIFY_IS_EQUAL(val(), -kInf);
// Test NaN propagation for tensor with a single NaN.
val = vec_one_nan.template minimum<PropagateNaN>();
VERIFY((numext::isnan)(val()));
val = vec_one_nan.template minimum<PropagateNumbers>();
VERIFY_IS_EQUAL(val(), (size == 1 ? kInf : kZero));
val = vec_one_nan.template maximum<PropagateNaN>();
VERIFY((numext::isnan)(val()));
val = vec_one_nan.template maximum<PropagateNumbers>();
VERIFY_IS_EQUAL(val(), (size == 1 ? -kInf : kZero));
}
}
static void test_clip()
{
Tensor<float, 1> vec(6);
vec(0) = 4.0;
vec(1) = 8.0;
vec(2) = 15.0;
vec(3) = 16.0;
vec(4) = 23.0;
vec(5) = 42.0;
float kMin = 20;
float kMax = 30;
Tensor<float, 1> vec_clipped(6);
vec_clipped = vec.clip(kMin, kMax);
for (int i = 0; i < 6; ++i) {
VERIFY_IS_EQUAL(vec_clipped(i), numext::mini(numext::maxi(vec(i), kMin), kMax));
}
}
static void test_minmax_nan_propagation()
{
test_minmax_nan_propagation_templ<float>();
test_minmax_nan_propagation_templ<double>();
}
EIGEN_DECLARE_TEST(cxx11_tensor_expr)
{
CALL_SUBTEST(test_1d());
CALL_SUBTEST(test_2d());
CALL_SUBTEST(test_3d());
CALL_SUBTEST(test_constants());
CALL_SUBTEST(test_boolean());
CALL_SUBTEST(test_functors());
CALL_SUBTEST(test_type_casting());
CALL_SUBTEST(test_select());
CALL_SUBTEST(test_clip());
// Nan propagation does currently not work like one would expect from std::max/std::min,
// so we disable it for now
#if !EIGEN_ARCH_ARM_OR_ARM64
CALL_SUBTEST(test_minmax_nan_propagation());
#endif
}

View File

@@ -0,0 +1,304 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2014 Jianwei Cui <thucjw@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
#include <Eigen/CXX11/Tensor>
using Eigen::Tensor;
template <int DataLayout>
static void test_fft_2D_golden() {
Tensor<float, 2, DataLayout> input(2, 3);
input(0, 0) = 1;
input(0, 1) = 2;
input(0, 2) = 3;
input(1, 0) = 4;
input(1, 1) = 5;
input(1, 2) = 6;
array<ptrdiff_t, 2> fft;
fft[0] = 0;
fft[1] = 1;
Tensor<std::complex<float>, 2, DataLayout> output = input.template fft<Eigen::BothParts, Eigen::FFT_FORWARD>(fft);
std::complex<float> output_golden[6]; // in ColMajor order
output_golden[0] = std::complex<float>(21, 0);
output_golden[1] = std::complex<float>(-9, 0);
output_golden[2] = std::complex<float>(-3, 1.73205);
output_golden[3] = std::complex<float>( 0, 0);
output_golden[4] = std::complex<float>(-3, -1.73205);
output_golden[5] = std::complex<float>(0 ,0);
std::complex<float> c_offset = std::complex<float>(1.0, 1.0);
if (DataLayout == ColMajor) {
VERIFY_IS_APPROX(output(0) + c_offset, output_golden[0] + c_offset);
VERIFY_IS_APPROX(output(1) + c_offset, output_golden[1] + c_offset);
VERIFY_IS_APPROX(output(2) + c_offset, output_golden[2] + c_offset);
VERIFY_IS_APPROX(output(3) + c_offset, output_golden[3] + c_offset);
VERIFY_IS_APPROX(output(4) + c_offset, output_golden[4] + c_offset);
VERIFY_IS_APPROX(output(5) + c_offset, output_golden[5] + c_offset);
}
else {
VERIFY_IS_APPROX(output(0)+ c_offset, output_golden[0]+ c_offset);
VERIFY_IS_APPROX(output(1)+ c_offset, output_golden[2]+ c_offset);
VERIFY_IS_APPROX(output(2)+ c_offset, output_golden[4]+ c_offset);
VERIFY_IS_APPROX(output(3)+ c_offset, output_golden[1]+ c_offset);
VERIFY_IS_APPROX(output(4)+ c_offset, output_golden[3]+ c_offset);
VERIFY_IS_APPROX(output(5)+ c_offset, output_golden[5]+ c_offset);
}
}
static void test_fft_complex_input_golden() {
Tensor<std::complex<float>, 1, ColMajor> input(5);
input(0) = std::complex<float>(1, 1);
input(1) = std::complex<float>(2, 2);
input(2) = std::complex<float>(3, 3);
input(3) = std::complex<float>(4, 4);
input(4) = std::complex<float>(5, 5);
array<ptrdiff_t, 1> fft;
fft[0] = 0;
Tensor<std::complex<float>, 1, ColMajor> forward_output_both_parts = input.fft<BothParts, FFT_FORWARD>(fft);
Tensor<std::complex<float>, 1, ColMajor> reverse_output_both_parts = input.fft<BothParts, FFT_REVERSE>(fft);
Tensor<float, 1, ColMajor> forward_output_real_part = input.fft<RealPart, FFT_FORWARD>(fft);
Tensor<float, 1, ColMajor> reverse_output_real_part = input.fft<RealPart, FFT_REVERSE>(fft);
Tensor<float, 1, ColMajor> forward_output_imag_part = input.fft<ImagPart, FFT_FORWARD>(fft);
Tensor<float, 1, ColMajor> reverse_output_imag_part = input.fft<ImagPart, FFT_REVERSE>(fft);
VERIFY_IS_EQUAL(forward_output_both_parts.dimension(0), input.dimension(0));
VERIFY_IS_EQUAL(reverse_output_both_parts.dimension(0), input.dimension(0));
VERIFY_IS_EQUAL(forward_output_real_part.dimension(0), input.dimension(0));
VERIFY_IS_EQUAL(reverse_output_real_part.dimension(0), input.dimension(0));
VERIFY_IS_EQUAL(forward_output_imag_part.dimension(0), input.dimension(0));
VERIFY_IS_EQUAL(reverse_output_imag_part.dimension(0), input.dimension(0));
std::complex<float> forward_golden_result[5];
std::complex<float> reverse_golden_result[5];
forward_golden_result[0] = std::complex<float>(15.000000000000000,+15.000000000000000);
forward_golden_result[1] = std::complex<float>(-5.940954801177935, +0.940954801177934);
forward_golden_result[2] = std::complex<float>(-3.312299240582266, -1.687700759417735);
forward_golden_result[3] = std::complex<float>(-1.687700759417735, -3.312299240582266);
forward_golden_result[4] = std::complex<float>( 0.940954801177934, -5.940954801177935);
reverse_golden_result[0] = std::complex<float>( 3.000000000000000, + 3.000000000000000);
reverse_golden_result[1] = std::complex<float>( 0.188190960235587, - 1.188190960235587);
reverse_golden_result[2] = std::complex<float>(-0.337540151883547, - 0.662459848116453);
reverse_golden_result[3] = std::complex<float>(-0.662459848116453, - 0.337540151883547);
reverse_golden_result[4] = std::complex<float>(-1.188190960235587, + 0.188190960235587);
for(int i = 0; i < 5; ++i) {
VERIFY_IS_APPROX(forward_output_both_parts(i), forward_golden_result[i]);
VERIFY_IS_APPROX(forward_output_real_part(i), forward_golden_result[i].real());
VERIFY_IS_APPROX(forward_output_imag_part(i), forward_golden_result[i].imag());
}
for(int i = 0; i < 5; ++i) {
VERIFY_IS_APPROX(reverse_output_both_parts(i), reverse_golden_result[i]);
VERIFY_IS_APPROX(reverse_output_real_part(i), reverse_golden_result[i].real());
VERIFY_IS_APPROX(reverse_output_imag_part(i), reverse_golden_result[i].imag());
}
}
static void test_fft_real_input_golden() {
Tensor<float, 1, ColMajor> input(5);
input(0) = 1.0;
input(1) = 2.0;
input(2) = 3.0;
input(3) = 4.0;
input(4) = 5.0;
array<ptrdiff_t, 1> fft;
fft[0] = 0;
Tensor<std::complex<float>, 1, ColMajor> forward_output_both_parts = input.fft<BothParts, FFT_FORWARD>(fft);
Tensor<std::complex<float>, 1, ColMajor> reverse_output_both_parts = input.fft<BothParts, FFT_REVERSE>(fft);
Tensor<float, 1, ColMajor> forward_output_real_part = input.fft<RealPart, FFT_FORWARD>(fft);
Tensor<float, 1, ColMajor> reverse_output_real_part = input.fft<RealPart, FFT_REVERSE>(fft);
Tensor<float, 1, ColMajor> forward_output_imag_part = input.fft<ImagPart, FFT_FORWARD>(fft);
Tensor<float, 1, ColMajor> reverse_output_imag_part = input.fft<ImagPart, FFT_REVERSE>(fft);
VERIFY_IS_EQUAL(forward_output_both_parts.dimension(0), input.dimension(0));
VERIFY_IS_EQUAL(reverse_output_both_parts.dimension(0), input.dimension(0));
VERIFY_IS_EQUAL(forward_output_real_part.dimension(0), input.dimension(0));
VERIFY_IS_EQUAL(reverse_output_real_part.dimension(0), input.dimension(0));
VERIFY_IS_EQUAL(forward_output_imag_part.dimension(0), input.dimension(0));
VERIFY_IS_EQUAL(reverse_output_imag_part.dimension(0), input.dimension(0));
std::complex<float> forward_golden_result[5];
std::complex<float> reverse_golden_result[5];
forward_golden_result[0] = std::complex<float>( 15, 0);
forward_golden_result[1] = std::complex<float>(-2.5, +3.44095480117793);
forward_golden_result[2] = std::complex<float>(-2.5, +0.81229924058227);
forward_golden_result[3] = std::complex<float>(-2.5, -0.81229924058227);
forward_golden_result[4] = std::complex<float>(-2.5, -3.44095480117793);
reverse_golden_result[0] = std::complex<float>( 3.0, 0);
reverse_golden_result[1] = std::complex<float>(-0.5, -0.688190960235587);
reverse_golden_result[2] = std::complex<float>(-0.5, -0.162459848116453);
reverse_golden_result[3] = std::complex<float>(-0.5, +0.162459848116453);
reverse_golden_result[4] = std::complex<float>(-0.5, +0.688190960235587);
std::complex<float> c_offset(1.0, 1.0);
float r_offset = 1.0;
for(int i = 0; i < 5; ++i) {
VERIFY_IS_APPROX(forward_output_both_parts(i) + c_offset, forward_golden_result[i] + c_offset);
VERIFY_IS_APPROX(forward_output_real_part(i) + r_offset, forward_golden_result[i].real() + r_offset);
VERIFY_IS_APPROX(forward_output_imag_part(i) + r_offset, forward_golden_result[i].imag() + r_offset);
}
for(int i = 0; i < 5; ++i) {
VERIFY_IS_APPROX(reverse_output_both_parts(i) + c_offset, reverse_golden_result[i] + c_offset);
VERIFY_IS_APPROX(reverse_output_real_part(i) + r_offset, reverse_golden_result[i].real() + r_offset);
VERIFY_IS_APPROX(reverse_output_imag_part(i) + r_offset, reverse_golden_result[i].imag() + r_offset);
}
}
template <int DataLayout, typename RealScalar, bool isComplexInput, int FFTResultType, int FFTDirection, int TensorRank>
static void test_fft_real_input_energy() {
Eigen::DSizes<ptrdiff_t, TensorRank> dimensions;
ptrdiff_t total_size = 1;
for (int i = 0; i < TensorRank; ++i) {
dimensions[i] = rand() % 20 + 1;
total_size *= dimensions[i];
}
const DSizes<ptrdiff_t, TensorRank> arr = dimensions;
typedef typename internal::conditional<isComplexInput == true, std::complex<RealScalar>, RealScalar>::type InputScalar;
Tensor<InputScalar, TensorRank, DataLayout> input;
input.resize(arr);
input.setRandom();
array<ptrdiff_t, TensorRank> fft;
for (int i = 0; i < TensorRank; ++i) {
fft[i] = i;
}
typedef typename internal::conditional<FFTResultType == Eigen::BothParts, std::complex<RealScalar>, RealScalar>::type OutputScalar;
Tensor<OutputScalar, TensorRank, DataLayout> output;
output = input.template fft<FFTResultType, FFTDirection>(fft);
for (int i = 0; i < TensorRank; ++i) {
VERIFY_IS_EQUAL(output.dimension(i), input.dimension(i));
}
RealScalar energy_original = 0.0;
RealScalar energy_after_fft = 0.0;
for (int i = 0; i < total_size; ++i) {
energy_original += numext::abs2(input(i));
}
for (int i = 0; i < total_size; ++i) {
energy_after_fft += numext::abs2(output(i));
}
if(FFTDirection == FFT_FORWARD) {
VERIFY_IS_APPROX(energy_original, energy_after_fft / total_size);
}
else {
VERIFY_IS_APPROX(energy_original, energy_after_fft * total_size);
}
}
template <typename RealScalar>
static void test_fft_non_power_of_2_round_trip(int exponent) {
int n = (1 << exponent) + 1;
Eigen::DSizes<ptrdiff_t, 1> dimensions;
dimensions[0] = n;
const DSizes<ptrdiff_t, 1> arr = dimensions;
Tensor<RealScalar, 1, ColMajor, ptrdiff_t> input;
input.resize(arr);
input.setRandom();
array<int, 1> fft;
fft[0] = 0;
Tensor<std::complex<RealScalar>, 1, ColMajor> forward =
input.template fft<BothParts, FFT_FORWARD>(fft);
Tensor<RealScalar, 1, ColMajor, ptrdiff_t> output =
forward.template fft<RealPart, FFT_REVERSE>(fft);
for (int i = 0; i < n; ++i) {
RealScalar tol = test_precision<RealScalar>() *
(std::abs(input[i]) + std::abs(output[i]) + 1);
VERIFY_IS_APPROX_OR_LESS_THAN(std::abs(input[i] - output[i]), tol);
}
}
EIGEN_DECLARE_TEST(cxx11_tensor_fft) {
test_fft_complex_input_golden();
test_fft_real_input_golden();
test_fft_2D_golden<ColMajor>();
test_fft_2D_golden<RowMajor>();
test_fft_real_input_energy<ColMajor, float, true, Eigen::BothParts, FFT_FORWARD, 1>();
test_fft_real_input_energy<ColMajor, double, true, Eigen::BothParts, FFT_FORWARD, 1>();
test_fft_real_input_energy<ColMajor, float, false, Eigen::BothParts, FFT_FORWARD, 1>();
test_fft_real_input_energy<ColMajor, double, false, Eigen::BothParts, FFT_FORWARD, 1>();
test_fft_real_input_energy<ColMajor, float, true, Eigen::BothParts, FFT_FORWARD, 2>();
test_fft_real_input_energy<ColMajor, double, true, Eigen::BothParts, FFT_FORWARD, 2>();
test_fft_real_input_energy<ColMajor, float, false, Eigen::BothParts, FFT_FORWARD, 2>();
test_fft_real_input_energy<ColMajor, double, false, Eigen::BothParts, FFT_FORWARD, 2>();
test_fft_real_input_energy<ColMajor, float, true, Eigen::BothParts, FFT_FORWARD, 3>();
test_fft_real_input_energy<ColMajor, double, true, Eigen::BothParts, FFT_FORWARD, 3>();
test_fft_real_input_energy<ColMajor, float, false, Eigen::BothParts, FFT_FORWARD, 3>();
test_fft_real_input_energy<ColMajor, double, false, Eigen::BothParts, FFT_FORWARD, 3>();
test_fft_real_input_energy<ColMajor, float, true, Eigen::BothParts, FFT_FORWARD, 4>();
test_fft_real_input_energy<ColMajor, double, true, Eigen::BothParts, FFT_FORWARD, 4>();
test_fft_real_input_energy<ColMajor, float, false, Eigen::BothParts, FFT_FORWARD, 4>();
test_fft_real_input_energy<ColMajor, double, false, Eigen::BothParts, FFT_FORWARD, 4>();
test_fft_real_input_energy<RowMajor, float, true, Eigen::BothParts, FFT_FORWARD, 1>();
test_fft_real_input_energy<RowMajor, double, true, Eigen::BothParts, FFT_FORWARD, 1>();
test_fft_real_input_energy<RowMajor, float, false, Eigen::BothParts, FFT_FORWARD, 1>();
test_fft_real_input_energy<RowMajor, double, false, Eigen::BothParts, FFT_FORWARD, 1>();
test_fft_real_input_energy<RowMajor, float, true, Eigen::BothParts, FFT_FORWARD, 2>();
test_fft_real_input_energy<RowMajor, double, true, Eigen::BothParts, FFT_FORWARD, 2>();
test_fft_real_input_energy<RowMajor, float, false, Eigen::BothParts, FFT_FORWARD, 2>();
test_fft_real_input_energy<RowMajor, double, false, Eigen::BothParts, FFT_FORWARD, 2>();
test_fft_real_input_energy<RowMajor, float, true, Eigen::BothParts, FFT_FORWARD, 3>();
test_fft_real_input_energy<RowMajor, double, true, Eigen::BothParts, FFT_FORWARD, 3>();
test_fft_real_input_energy<RowMajor, float, false, Eigen::BothParts, FFT_FORWARD, 3>();
test_fft_real_input_energy<RowMajor, double, false, Eigen::BothParts, FFT_FORWARD, 3>();
test_fft_real_input_energy<RowMajor, float, true, Eigen::BothParts, FFT_FORWARD, 4>();
test_fft_real_input_energy<RowMajor, double, true, Eigen::BothParts, FFT_FORWARD, 4>();
test_fft_real_input_energy<RowMajor, float, false, Eigen::BothParts, FFT_FORWARD, 4>();
test_fft_real_input_energy<RowMajor, double, false, Eigen::BothParts, FFT_FORWARD, 4>();
test_fft_non_power_of_2_round_trip<float>(7);
test_fft_non_power_of_2_round_trip<double>(7);
}

View File

@@ -0,0 +1,261 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
#include <Eigen/CXX11/Tensor>
using Eigen::Tensor;
using Eigen::RowMajor;
static void test_0d()
{
TensorFixedSize<float, Sizes<> > scalar1;
TensorFixedSize<float, Sizes<>, RowMajor> scalar2;
VERIFY_IS_EQUAL(scalar1.rank(), 0);
VERIFY_IS_EQUAL(scalar1.size(), 1);
VERIFY_IS_EQUAL(internal::array_prod(scalar1.dimensions()), 1);
scalar1() = 7.0;
scalar2() = 13.0;
// Test against shallow copy.
TensorFixedSize<float, Sizes<> > copy = scalar1;
VERIFY_IS_NOT_EQUAL(scalar1.data(), copy.data());
VERIFY_IS_APPROX(scalar1(), copy());
copy = scalar1;
VERIFY_IS_NOT_EQUAL(scalar1.data(), copy.data());
VERIFY_IS_APPROX(scalar1(), copy());
TensorFixedSize<float, Sizes<> > scalar3 = scalar1.sqrt();
TensorFixedSize<float, Sizes<>, RowMajor> scalar4 = scalar2.sqrt();
VERIFY_IS_EQUAL(scalar3.rank(), 0);
VERIFY_IS_APPROX(scalar3(), sqrtf(7.0));
VERIFY_IS_APPROX(scalar4(), sqrtf(13.0));
scalar3 = scalar1 + scalar2;
VERIFY_IS_APPROX(scalar3(), 7.0f + 13.0f);
}
static void test_1d()
{
TensorFixedSize<float, Sizes<6> > vec1;
TensorFixedSize<float, Sizes<6>, RowMajor> vec2;
VERIFY_IS_EQUAL((vec1.size()), 6);
// VERIFY_IS_EQUAL((vec1.dimensions()[0]), 6);
// VERIFY_IS_EQUAL((vec1.dimension(0)), 6);
vec1(0) = 4.0; vec2(0) = 0.0;
vec1(1) = 8.0; vec2(1) = 1.0;
vec1(2) = 15.0; vec2(2) = 2.0;
vec1(3) = 16.0; vec2(3) = 3.0;
vec1(4) = 23.0; vec2(4) = 4.0;
vec1(5) = 42.0; vec2(5) = 5.0;
// Test against shallow copy.
TensorFixedSize<float, Sizes<6> > copy = vec1;
VERIFY_IS_NOT_EQUAL(vec1.data(), copy.data());
for (int i = 0; i < 6; ++i) {
VERIFY_IS_APPROX(vec1(i), copy(i));
}
copy = vec1;
VERIFY_IS_NOT_EQUAL(vec1.data(), copy.data());
for (int i = 0; i < 6; ++i) {
VERIFY_IS_APPROX(vec1(i), copy(i));
}
TensorFixedSize<float, Sizes<6> > vec3 = vec1.sqrt();
TensorFixedSize<float, Sizes<6>, RowMajor> vec4 = vec2.sqrt();
VERIFY_IS_EQUAL((vec3.size()), 6);
VERIFY_IS_EQUAL(vec3.rank(), 1);
// VERIFY_IS_EQUAL((vec3.dimensions()[0]), 6);
// VERIFY_IS_EQUAL((vec3.dimension(0)), 6);
VERIFY_IS_APPROX(vec3(0), sqrtf(4.0));
VERIFY_IS_APPROX(vec3(1), sqrtf(8.0));
VERIFY_IS_APPROX(vec3(2), sqrtf(15.0));
VERIFY_IS_APPROX(vec3(3), sqrtf(16.0));
VERIFY_IS_APPROX(vec3(4), sqrtf(23.0));
VERIFY_IS_APPROX(vec3(5), sqrtf(42.0));
VERIFY_IS_APPROX(vec4(0), sqrtf(0.0));
VERIFY_IS_APPROX(vec4(1), sqrtf(1.0));
VERIFY_IS_APPROX(vec4(2), sqrtf(2.0));
VERIFY_IS_APPROX(vec4(3), sqrtf(3.0));
VERIFY_IS_APPROX(vec4(4), sqrtf(4.0));
VERIFY_IS_APPROX(vec4(5), sqrtf(5.0));
vec3 = vec1 + vec2;
VERIFY_IS_APPROX(vec3(0), 4.0f + 0.0f);
VERIFY_IS_APPROX(vec3(1), 8.0f + 1.0f);
VERIFY_IS_APPROX(vec3(2), 15.0f + 2.0f);
VERIFY_IS_APPROX(vec3(3), 16.0f + 3.0f);
VERIFY_IS_APPROX(vec3(4), 23.0f + 4.0f);
VERIFY_IS_APPROX(vec3(5), 42.0f + 5.0f);
}
static void test_tensor_map()
{
TensorFixedSize<float, Sizes<6> > vec1;
TensorFixedSize<float, Sizes<6>, RowMajor> vec2;
vec1(0) = 4.0; vec2(0) = 0.0;
vec1(1) = 8.0; vec2(1) = 1.0;
vec1(2) = 15.0; vec2(2) = 2.0;
vec1(3) = 16.0; vec2(3) = 3.0;
vec1(4) = 23.0; vec2(4) = 4.0;
vec1(5) = 42.0; vec2(5) = 5.0;
float data3[6];
TensorMap<TensorFixedSize<float, Sizes<6> > > vec3(data3, 6);
vec3 = vec1.sqrt() + vec2;
VERIFY_IS_APPROX(vec3(0), sqrtf(4.0));
VERIFY_IS_APPROX(vec3(1), sqrtf(8.0) + 1.0f);
VERIFY_IS_APPROX(vec3(2), sqrtf(15.0) + 2.0f);
VERIFY_IS_APPROX(vec3(3), sqrtf(16.0) + 3.0f);
VERIFY_IS_APPROX(vec3(4), sqrtf(23.0) + 4.0f);
VERIFY_IS_APPROX(vec3(5), sqrtf(42.0) + 5.0f);
}
static void test_2d()
{
float data1[6];
TensorMap<TensorFixedSize<float, Sizes<2, 3> > > mat1(data1,2,3);
float data2[6];
TensorMap<TensorFixedSize<float, Sizes<2, 3>, RowMajor> > mat2(data2,2,3);
VERIFY_IS_EQUAL((mat1.size()), 2*3);
VERIFY_IS_EQUAL(mat1.rank(), 2);
// VERIFY_IS_EQUAL((mat1.dimension(0)), 2);
// VERIFY_IS_EQUAL((mat1.dimension(1)), 3);
mat1(0,0) = 0.0;
mat1(0,1) = 1.0;
mat1(0,2) = 2.0;
mat1(1,0) = 3.0;
mat1(1,1) = 4.0;
mat1(1,2) = 5.0;
mat2(0,0) = -0.0;
mat2(0,1) = -1.0;
mat2(0,2) = -2.0;
mat2(1,0) = -3.0;
mat2(1,1) = -4.0;
mat2(1,2) = -5.0;
TensorFixedSize<float, Sizes<2, 3> > mat3;
TensorFixedSize<float, Sizes<2, 3>, RowMajor> mat4;
mat3 = mat1.abs();
mat4 = mat2.abs();
VERIFY_IS_EQUAL((mat3.size()), 2*3);
// VERIFY_IS_EQUAL((mat3.dimension(0)), 2);
// VERIFY_IS_EQUAL((mat3.dimension(1)), 3);
VERIFY_IS_APPROX(mat3(0,0), 0.0f);
VERIFY_IS_APPROX(mat3(0,1), 1.0f);
VERIFY_IS_APPROX(mat3(0,2), 2.0f);
VERIFY_IS_APPROX(mat3(1,0), 3.0f);
VERIFY_IS_APPROX(mat3(1,1), 4.0f);
VERIFY_IS_APPROX(mat3(1,2), 5.0f);
VERIFY_IS_APPROX(mat4(0,0), 0.0f);
VERIFY_IS_APPROX(mat4(0,1), 1.0f);
VERIFY_IS_APPROX(mat4(0,2), 2.0f);
VERIFY_IS_APPROX(mat4(1,0), 3.0f);
VERIFY_IS_APPROX(mat4(1,1), 4.0f);
VERIFY_IS_APPROX(mat4(1,2), 5.0f);
}
static void test_3d()
{
TensorFixedSize<float, Sizes<2, 3, 7> > mat1;
TensorFixedSize<float, Sizes<2, 3, 7>, RowMajor> mat2;
VERIFY_IS_EQUAL((mat1.size()), 2*3*7);
VERIFY_IS_EQUAL(mat1.rank(), 3);
// VERIFY_IS_EQUAL((mat1.dimension(0)), 2);
// VERIFY_IS_EQUAL((mat1.dimension(1)), 3);
// VERIFY_IS_EQUAL((mat1.dimension(2)), 7);
float val = 0.0f;
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 7; ++k) {
mat1(i,j,k) = val;
mat2(i,j,k) = val;
val += 1.0f;
}
}
}
TensorFixedSize<float, Sizes<2, 3, 7> > mat3;
mat3 = mat1.sqrt();
TensorFixedSize<float, Sizes<2, 3, 7>, RowMajor> mat4;
mat4 = mat2.sqrt();
VERIFY_IS_EQUAL((mat3.size()), 2*3*7);
// VERIFY_IS_EQUAL((mat3.dimension(0)), 2);
// VERIFY_IS_EQUAL((mat3.dimension(1)), 3);
// VERIFY_IS_EQUAL((mat3.dimension(2)), 7);
val = 0.0f;
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 7; ++k) {
VERIFY_IS_APPROX(mat3(i,j,k), sqrtf(val));
VERIFY_IS_APPROX(mat4(i,j,k), sqrtf(val));
val += 1.0f;
}
}
}
}
static void test_array()
{
TensorFixedSize<float, Sizes<2, 3, 7> > mat1;
float val = 0.0f;
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 7; ++k) {
mat1(i,j,k) = val;
val += 1.0f;
}
}
}
TensorFixedSize<float, Sizes<2, 3, 7> > mat3;
mat3 = mat1.pow(3.5f);
val = 0.0f;
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 7; ++k) {
VERIFY_IS_APPROX(mat3(i,j,k), powf(val, 3.5f));
val += 1.0f;
}
}
}
}
EIGEN_DECLARE_TEST(cxx11_tensor_fixed_size)
{
CALL_SUBTEST(test_0d());
CALL_SUBTEST(test_1d());
CALL_SUBTEST(test_tensor_map());
CALL_SUBTEST(test_2d());
CALL_SUBTEST(test_3d());
CALL_SUBTEST(test_array());
}

View File

@@ -0,0 +1,79 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
#include <Eigen/Core>
#include <Eigen/CXX11/Tensor>
using Eigen::MatrixXf;
using Eigen::Tensor;
static void test_simple()
{
MatrixXf m1(3,3);
MatrixXf m2(3,3);
m1.setRandom();
m2.setRandom();
TensorMap<Tensor<float, 2> > mat1(m1.data(), 3,3);
TensorMap<Tensor<float, 2> > mat2(m2.data(), 3,3);
Tensor<float, 2> mat3(3,3);
mat3 = mat1;
typedef Tensor<float, 1>::DimensionPair DimPair;
Eigen::array<DimPair, 1> dims;
dims[0] = DimPair(1, 0);
mat3 = mat3.contract(mat2, dims).eval();
VERIFY_IS_APPROX(mat3(0, 0), (m1*m2).eval()(0,0));
VERIFY_IS_APPROX(mat3(0, 1), (m1*m2).eval()(0,1));
VERIFY_IS_APPROX(mat3(0, 2), (m1*m2).eval()(0,2));
VERIFY_IS_APPROX(mat3(1, 0), (m1*m2).eval()(1,0));
VERIFY_IS_APPROX(mat3(1, 1), (m1*m2).eval()(1,1));
VERIFY_IS_APPROX(mat3(1, 2), (m1*m2).eval()(1,2));
VERIFY_IS_APPROX(mat3(2, 0), (m1*m2).eval()(2,0));
VERIFY_IS_APPROX(mat3(2, 1), (m1*m2).eval()(2,1));
VERIFY_IS_APPROX(mat3(2, 2), (m1*m2).eval()(2,2));
}
static void test_const()
{
MatrixXf input(3,3);
input.setRandom();
MatrixXf output = input;
output.rowwise() -= input.colwise().maxCoeff();
Eigen::array<int, 1> depth_dim;
depth_dim[0] = 0;
Tensor<float, 2>::Dimensions dims2d;
dims2d[0] = 1;
dims2d[1] = 3;
Eigen::array<int, 2> bcast;
bcast[0] = 3;
bcast[1] = 1;
const TensorMap<const Tensor<float, 2> > input_tensor(input.data(), 3, 3);
Tensor<float, 2> output_tensor= (input_tensor - input_tensor.maximum(depth_dim).eval().reshape(dims2d).broadcast(bcast));
for (int i = 0; i < 3; ++i) {
for (int j = 0; j < 3; ++j) {
VERIFY_IS_APPROX(output(i, j), output_tensor(i, j));
}
}
}
EIGEN_DECLARE_TEST(cxx11_tensor_forced_eval)
{
CALL_SUBTEST(test_simple());
CALL_SUBTEST(test_const());
}

View File

@@ -0,0 +1,77 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2016
// Mehdi Goli Codeplay Software Ltd.
// Ralph Potter Codeplay Software Ltd.
// Luke Iwanski Codeplay Software Ltd.
// Contact: <eigen@codeplay.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#define EIGEN_TEST_NO_LONGDOUBLE
#define EIGEN_TEST_NO_COMPLEX
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
#define EIGEN_USE_SYCL
#include "main.h"
#include <unsupported/Eigen/CXX11/Tensor>
using Eigen::Tensor;
template <typename DataType, int DataLayout, typename IndexType>
void test_forced_eval_sycl(const Eigen::SyclDevice &sycl_device) {
IndexType sizeDim1 = 100;
IndexType sizeDim2 = 20;
IndexType sizeDim3 = 20;
Eigen::array<IndexType, 3> tensorRange = {{sizeDim1, sizeDim2, sizeDim3}};
Eigen::Tensor<DataType, 3, DataLayout, IndexType> in1(tensorRange);
Eigen::Tensor<DataType, 3, DataLayout, IndexType> in2(tensorRange);
Eigen::Tensor<DataType, 3, DataLayout, IndexType> out(tensorRange);
DataType * gpu_in1_data = static_cast<DataType*>(sycl_device.allocate(in1.dimensions().TotalSize()*sizeof(DataType)));
DataType * gpu_in2_data = static_cast<DataType*>(sycl_device.allocate(in2.dimensions().TotalSize()*sizeof(DataType)));
DataType * gpu_out_data = static_cast<DataType*>(sycl_device.allocate(out.dimensions().TotalSize()*sizeof(DataType)));
in1 = in1.random() + in1.constant(static_cast<DataType>(10.0f));
in2 = in2.random() + in2.constant(static_cast<DataType>(10.0f));
// creating TensorMap from tensor
Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType>> gpu_in1(gpu_in1_data, tensorRange);
Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType>> gpu_in2(gpu_in2_data, tensorRange);
Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType>> gpu_out(gpu_out_data, tensorRange);
sycl_device.memcpyHostToDevice(gpu_in1_data, in1.data(),(in1.dimensions().TotalSize())*sizeof(DataType));
sycl_device.memcpyHostToDevice(gpu_in2_data, in2.data(),(in2.dimensions().TotalSize())*sizeof(DataType));
/// c=(a+b)*b
gpu_out.device(sycl_device) =(gpu_in1 + gpu_in2).eval() * gpu_in2;
sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(DataType));
for (IndexType i = 0; i < sizeDim1; ++i) {
for (IndexType j = 0; j < sizeDim2; ++j) {
for (IndexType k = 0; k < sizeDim3; ++k) {
VERIFY_IS_APPROX(out(i, j, k),
(in1(i, j, k) + in2(i, j, k)) * in2(i, j, k));
}
}
}
printf("(a+b)*b Test Passed\n");
sycl_device.deallocate(gpu_in1_data);
sycl_device.deallocate(gpu_in2_data);
sycl_device.deallocate(gpu_out_data);
}
template <typename DataType, typename Dev_selector> void tensorForced_evalperDevice(Dev_selector s){
QueueInterface queueInterface(s);
auto sycl_device = Eigen::SyclDevice(&queueInterface);
test_forced_eval_sycl<DataType, RowMajor, int64_t>(sycl_device);
test_forced_eval_sycl<DataType, ColMajor, int64_t>(sycl_device);
}
EIGEN_DECLARE_TEST(cxx11_tensor_forced_eval_sycl) {
for (const auto& device :Eigen::get_sycl_supported_devices()) {
CALL_SUBTEST(tensorForced_evalperDevice<float>(device));
CALL_SUBTEST(tensorForced_evalperDevice<half>(device));
}
}

View File

@@ -0,0 +1,91 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
#include <Eigen/CXX11/Tensor>
struct Generator1D {
Generator1D() { }
float operator()(const array<Eigen::DenseIndex, 1>& coordinates) const {
return coordinates[0];
}
};
template <int DataLayout>
static void test_1D()
{
Tensor<float, 1> vec(6);
Tensor<float, 1> result = vec.generate(Generator1D());
for (int i = 0; i < 6; ++i) {
VERIFY_IS_EQUAL(result(i), i);
}
}
struct Generator2D {
Generator2D() { }
float operator()(const array<Eigen::DenseIndex, 2>& coordinates) const {
return 3 * coordinates[0] + 11 * coordinates[1];
}
};
template <int DataLayout>
static void test_2D()
{
Tensor<float, 2> matrix(512, 512);
Tensor<float, 2> result = matrix.generate(Generator2D());
for (int i = 0; i < 512; ++i) {
for (int j = 0; j < 512; ++j) {
VERIFY_IS_EQUAL(result(i, j), 3*i + 11*j);
}
}
}
template <int DataLayout>
static void test_gaussian()
{
int rows = 32;
int cols = 48;
array<float, 2> means;
means[0] = rows / 2.0f;
means[1] = cols / 2.0f;
array<float, 2> std_devs;
std_devs[0] = 3.14f;
std_devs[1] = 2.7f;
internal::GaussianGenerator<float, Eigen::DenseIndex, 2> gaussian_gen(means, std_devs);
Tensor<float, 2> matrix(rows, cols);
Tensor<float, 2> result = matrix.generate(gaussian_gen);
for (int i = 0; i < rows; ++i) {
for (int j = 0; j < cols; ++j) {
float g_rows = powf(rows/2.0f - i, 2) / (3.14f * 3.14f) * 0.5f;
float g_cols = powf(cols/2.0f - j, 2) / (2.7f * 2.7f) * 0.5f;
float gaussian = expf(-g_rows - g_cols);
VERIFY_IS_EQUAL(result(i, j), gaussian);
}
}
}
EIGEN_DECLARE_TEST(cxx11_tensor_generator)
{
CALL_SUBTEST(test_1D<ColMajor>());
CALL_SUBTEST(test_1D<RowMajor>());
CALL_SUBTEST(test_2D<ColMajor>());
CALL_SUBTEST(test_2D<RowMajor>());
CALL_SUBTEST(test_gaussian<ColMajor>());
CALL_SUBTEST(test_gaussian<RowMajor>());
}

View File

@@ -0,0 +1,147 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2016
// Mehdi Goli Codeplay Software Ltd.
// Ralph Potter Codeplay Software Ltd.
// Luke Iwanski Codeplay Software Ltd.
// Contact: <eigen@codeplay.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#define EIGEN_TEST_NO_LONGDOUBLE
#define EIGEN_TEST_NO_COMPLEX
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
#define EIGEN_USE_SYCL
static const float error_threshold =1e-8f;
#include "main.h"
#include <unsupported/Eigen/CXX11/Tensor>
using Eigen::Tensor;
struct Generator1D {
Generator1D() { }
float operator()(const array<Eigen::DenseIndex, 1>& coordinates) const {
return coordinates[0];
}
};
template <typename DataType, int DataLayout, typename IndexType>
static void test_1D_sycl(const Eigen::SyclDevice& sycl_device)
{
IndexType sizeDim1 = 6;
array<IndexType, 1> tensorRange = {{sizeDim1}};
Tensor<DataType, 1, DataLayout,IndexType> vec(tensorRange);
Tensor<DataType, 1, DataLayout,IndexType> result(tensorRange);
const size_t tensorBuffSize =vec.size()*sizeof(DataType);
DataType* gpu_data_vec = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize));
DataType* gpu_data_result = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize));
TensorMap<Tensor<DataType, 1, DataLayout,IndexType>> gpu_vec(gpu_data_vec, tensorRange);
TensorMap<Tensor<DataType, 1, DataLayout,IndexType>> gpu_result(gpu_data_result, tensorRange);
sycl_device.memcpyHostToDevice(gpu_data_vec, vec.data(), tensorBuffSize);
gpu_result.device(sycl_device)=gpu_vec.generate(Generator1D());
sycl_device.memcpyDeviceToHost(result.data(), gpu_data_result, tensorBuffSize);
for (IndexType i = 0; i < 6; ++i) {
VERIFY_IS_EQUAL(result(i), i);
}
}
struct Generator2D {
Generator2D() { }
float operator()(const array<Eigen::DenseIndex, 2>& coordinates) const {
return 3 * coordinates[0] + 11 * coordinates[1];
}
};
template <typename DataType, int DataLayout, typename IndexType>
static void test_2D_sycl(const Eigen::SyclDevice& sycl_device)
{
IndexType sizeDim1 = 5;
IndexType sizeDim2 = 7;
array<IndexType, 2> tensorRange = {{sizeDim1, sizeDim2}};
Tensor<DataType, 2, DataLayout,IndexType> matrix(tensorRange);
Tensor<DataType, 2, DataLayout,IndexType> result(tensorRange);
const size_t tensorBuffSize =matrix.size()*sizeof(DataType);
DataType* gpu_data_matrix = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize));
DataType* gpu_data_result = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize));
TensorMap<Tensor<DataType, 2, DataLayout,IndexType>> gpu_matrix(gpu_data_matrix, tensorRange);
TensorMap<Tensor<DataType, 2, DataLayout,IndexType>> gpu_result(gpu_data_result, tensorRange);
sycl_device.memcpyHostToDevice(gpu_data_matrix, matrix.data(), tensorBuffSize);
gpu_result.device(sycl_device)=gpu_matrix.generate(Generator2D());
sycl_device.memcpyDeviceToHost(result.data(), gpu_data_result, tensorBuffSize);
for (IndexType i = 0; i < 5; ++i) {
for (IndexType j = 0; j < 5; ++j) {
VERIFY_IS_EQUAL(result(i, j), 3*i + 11*j);
}
}
}
template <typename DataType, int DataLayout, typename IndexType>
static void test_gaussian_sycl(const Eigen::SyclDevice& sycl_device)
{
IndexType rows = 32;
IndexType cols = 48;
array<DataType, 2> means;
means[0] = rows / 2.0f;
means[1] = cols / 2.0f;
array<DataType, 2> std_devs;
std_devs[0] = 3.14f;
std_devs[1] = 2.7f;
internal::GaussianGenerator<DataType, Eigen::DenseIndex, 2> gaussian_gen(means, std_devs);
array<IndexType, 2> tensorRange = {{rows, cols}};
Tensor<DataType, 2, DataLayout,IndexType> matrix(tensorRange);
Tensor<DataType, 2, DataLayout,IndexType> result(tensorRange);
const size_t tensorBuffSize =matrix.size()*sizeof(DataType);
DataType* gpu_data_matrix = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize));
DataType* gpu_data_result = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize));
TensorMap<Tensor<DataType, 2, DataLayout,IndexType>> gpu_matrix(gpu_data_matrix, tensorRange);
TensorMap<Tensor<DataType, 2, DataLayout,IndexType>> gpu_result(gpu_data_result, tensorRange);
sycl_device.memcpyHostToDevice(gpu_data_matrix, matrix.data(), tensorBuffSize);
gpu_result.device(sycl_device)=gpu_matrix.generate(gaussian_gen);
sycl_device.memcpyDeviceToHost(result.data(), gpu_data_result, tensorBuffSize);
for (IndexType i = 0; i < rows; ++i) {
for (IndexType j = 0; j < cols; ++j) {
DataType g_rows = powf(rows/2.0f - i, 2) / (3.14f * 3.14f) * 0.5f;
DataType g_cols = powf(cols/2.0f - j, 2) / (2.7f * 2.7f) * 0.5f;
DataType gaussian = expf(-g_rows - g_cols);
Eigen::internal::isApprox(result(i, j), gaussian, error_threshold);
}
}
}
template<typename DataType, typename dev_Selector> void sycl_generator_test_per_device(dev_Selector s){
QueueInterface queueInterface(s);
auto sycl_device = Eigen::SyclDevice(&queueInterface);
test_1D_sycl<DataType, RowMajor, int64_t>(sycl_device);
test_1D_sycl<DataType, ColMajor, int64_t>(sycl_device);
test_2D_sycl<DataType, RowMajor, int64_t>(sycl_device);
test_2D_sycl<DataType, ColMajor, int64_t>(sycl_device);
test_gaussian_sycl<DataType, RowMajor, int64_t>(sycl_device);
test_gaussian_sycl<DataType, ColMajor, int64_t>(sycl_device);
}
EIGEN_DECLARE_TEST(cxx11_tensor_generator_sycl)
{
for (const auto& device :Eigen::get_sycl_supported_devices()) {
CALL_SUBTEST(sycl_generator_test_per_device<float>(device));
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,154 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2014 Jianwei Cui <thucjw@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
#include <complex>
#include <cmath>
#include <Eigen/CXX11/Tensor>
using Eigen::Tensor;
template <int DataLayout>
static void test_1D_fft_ifft_invariant(int sequence_length) {
Tensor<double, 1, DataLayout> tensor(sequence_length);
tensor.setRandom();
array<int, 1> fft;
fft[0] = 0;
Tensor<std::complex<double>, 1, DataLayout> tensor_after_fft;
Tensor<std::complex<double>, 1, DataLayout> tensor_after_fft_ifft;
tensor_after_fft = tensor.template fft<Eigen::BothParts, Eigen::FFT_FORWARD>(fft);
tensor_after_fft_ifft = tensor_after_fft.template fft<Eigen::BothParts, Eigen::FFT_REVERSE>(fft);
VERIFY_IS_EQUAL(tensor_after_fft.dimension(0), sequence_length);
VERIFY_IS_EQUAL(tensor_after_fft_ifft.dimension(0), sequence_length);
for (int i = 0; i < sequence_length; ++i) {
VERIFY_IS_APPROX(static_cast<float>(tensor(i)), static_cast<float>(std::real(tensor_after_fft_ifft(i))));
}
}
template <int DataLayout>
static void test_2D_fft_ifft_invariant(int dim0, int dim1) {
Tensor<double, 2, DataLayout> tensor(dim0, dim1);
tensor.setRandom();
array<int, 2> fft;
fft[0] = 0;
fft[1] = 1;
Tensor<std::complex<double>, 2, DataLayout> tensor_after_fft;
Tensor<std::complex<double>, 2, DataLayout> tensor_after_fft_ifft;
tensor_after_fft = tensor.template fft<Eigen::BothParts, Eigen::FFT_FORWARD>(fft);
tensor_after_fft_ifft = tensor_after_fft.template fft<Eigen::BothParts, Eigen::FFT_REVERSE>(fft);
VERIFY_IS_EQUAL(tensor_after_fft.dimension(0), dim0);
VERIFY_IS_EQUAL(tensor_after_fft.dimension(1), dim1);
VERIFY_IS_EQUAL(tensor_after_fft_ifft.dimension(0), dim0);
VERIFY_IS_EQUAL(tensor_after_fft_ifft.dimension(1), dim1);
for (int i = 0; i < dim0; ++i) {
for (int j = 0; j < dim1; ++j) {
//std::cout << "[" << i << "][" << j << "]" << " Original data: " << tensor(i,j) << " Transformed data:" << tensor_after_fft_ifft(i,j) << std::endl;
VERIFY_IS_APPROX(static_cast<float>(tensor(i,j)), static_cast<float>(std::real(tensor_after_fft_ifft(i,j))));
}
}
}
template <int DataLayout>
static void test_3D_fft_ifft_invariant(int dim0, int dim1, int dim2) {
Tensor<double, 3, DataLayout> tensor(dim0, dim1, dim2);
tensor.setRandom();
array<int, 3> fft;
fft[0] = 0;
fft[1] = 1;
fft[2] = 2;
Tensor<std::complex<double>, 3, DataLayout> tensor_after_fft;
Tensor<std::complex<double>, 3, DataLayout> tensor_after_fft_ifft;
tensor_after_fft = tensor.template fft<Eigen::BothParts, Eigen::FFT_FORWARD>(fft);
tensor_after_fft_ifft = tensor_after_fft.template fft<Eigen::BothParts, Eigen::FFT_REVERSE>(fft);
VERIFY_IS_EQUAL(tensor_after_fft.dimension(0), dim0);
VERIFY_IS_EQUAL(tensor_after_fft.dimension(1), dim1);
VERIFY_IS_EQUAL(tensor_after_fft.dimension(2), dim2);
VERIFY_IS_EQUAL(tensor_after_fft_ifft.dimension(0), dim0);
VERIFY_IS_EQUAL(tensor_after_fft_ifft.dimension(1), dim1);
VERIFY_IS_EQUAL(tensor_after_fft_ifft.dimension(2), dim2);
for (int i = 0; i < dim0; ++i) {
for (int j = 0; j < dim1; ++j) {
for (int k = 0; k < dim2; ++k) {
VERIFY_IS_APPROX(static_cast<float>(tensor(i,j,k)), static_cast<float>(std::real(tensor_after_fft_ifft(i,j,k))));
}
}
}
}
template <int DataLayout>
static void test_sub_fft_ifft_invariant(int dim0, int dim1, int dim2, int dim3) {
Tensor<double, 4, DataLayout> tensor(dim0, dim1, dim2, dim3);
tensor.setRandom();
array<int, 2> fft;
fft[0] = 2;
fft[1] = 0;
Tensor<std::complex<double>, 4, DataLayout> tensor_after_fft;
Tensor<double, 4, DataLayout> tensor_after_fft_ifft;
tensor_after_fft = tensor.template fft<Eigen::BothParts, Eigen::FFT_FORWARD>(fft);
tensor_after_fft_ifft = tensor_after_fft.template fft<Eigen::RealPart, Eigen::FFT_REVERSE>(fft);
VERIFY_IS_EQUAL(tensor_after_fft.dimension(0), dim0);
VERIFY_IS_EQUAL(tensor_after_fft.dimension(1), dim1);
VERIFY_IS_EQUAL(tensor_after_fft.dimension(2), dim2);
VERIFY_IS_EQUAL(tensor_after_fft.dimension(3), dim3);
VERIFY_IS_EQUAL(tensor_after_fft_ifft.dimension(0), dim0);
VERIFY_IS_EQUAL(tensor_after_fft_ifft.dimension(1), dim1);
VERIFY_IS_EQUAL(tensor_after_fft_ifft.dimension(2), dim2);
VERIFY_IS_EQUAL(tensor_after_fft_ifft.dimension(3), dim3);
for (int i = 0; i < dim0; ++i) {
for (int j = 0; j < dim1; ++j) {
for (int k = 0; k < dim2; ++k) {
for (int l = 0; l < dim3; ++l) {
VERIFY_IS_APPROX(static_cast<float>(tensor(i,j,k,l)), static_cast<float>(tensor_after_fft_ifft(i,j,k,l)));
}
}
}
}
}
EIGEN_DECLARE_TEST(cxx11_tensor_ifft) {
CALL_SUBTEST(test_1D_fft_ifft_invariant<ColMajor>(4));
CALL_SUBTEST(test_1D_fft_ifft_invariant<ColMajor>(16));
CALL_SUBTEST(test_1D_fft_ifft_invariant<ColMajor>(32));
CALL_SUBTEST(test_1D_fft_ifft_invariant<ColMajor>(1024*1024));
CALL_SUBTEST(test_2D_fft_ifft_invariant<ColMajor>(4,4));
CALL_SUBTEST(test_2D_fft_ifft_invariant<ColMajor>(8,16));
CALL_SUBTEST(test_2D_fft_ifft_invariant<ColMajor>(16,32));
CALL_SUBTEST(test_2D_fft_ifft_invariant<ColMajor>(1024,1024));
CALL_SUBTEST(test_3D_fft_ifft_invariant<ColMajor>(4,4,4));
CALL_SUBTEST(test_3D_fft_ifft_invariant<ColMajor>(8,16,32));
CALL_SUBTEST(test_3D_fft_ifft_invariant<ColMajor>(16,4,8));
CALL_SUBTEST(test_3D_fft_ifft_invariant<ColMajor>(256,256,256));
CALL_SUBTEST(test_sub_fft_ifft_invariant<ColMajor>(4,4,4,4));
CALL_SUBTEST(test_sub_fft_ifft_invariant<ColMajor>(8,16,32,64));
CALL_SUBTEST(test_sub_fft_ifft_invariant<ColMajor>(16,4,8,12));
CALL_SUBTEST(test_sub_fft_ifft_invariant<ColMajor>(64,64,64,64));
}

View File

@@ -0,0 +1,103 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2016
// Mehdi Goli Codeplay Software Ltd.
// Ralph Potter Codeplay Software Ltd.
// Luke Iwanski Codeplay Software Ltd.
// Contact: <eigen@codeplay.com>
// Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#define EIGEN_TEST_NO_LONGDOUBLE
#define EIGEN_TEST_NO_COMPLEX
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
#define EIGEN_USE_SYCL
#include "main.h"
#include <unsupported/Eigen/CXX11/Tensor>
using Eigen::array;
using Eigen::SyclDevice;
using Eigen::Tensor;
using Eigen::TensorMap;
using Eigen::Tensor;
using Eigen::RowMajor;
template <typename DataType, int DataLayout, typename IndexType>
static void test_image_op_sycl(const Eigen::SyclDevice &sycl_device)
{
IndexType sizeDim1 = 245;
IndexType sizeDim2 = 343;
IndexType sizeDim3 = 577;
array<IndexType, 3> input_range ={{sizeDim1, sizeDim2, sizeDim3}};
array<IndexType, 3> slice_range ={{sizeDim1-1, sizeDim2, sizeDim3}};
Tensor<DataType, 3,DataLayout, IndexType> tensor1(input_range);
Tensor<DataType, 3,DataLayout, IndexType> tensor2(input_range);
Tensor<DataType, 3, DataLayout, IndexType> tensor3(slice_range);
Tensor<DataType, 3, DataLayout, IndexType> tensor3_cpu(slice_range);
typedef Eigen::DSizes<IndexType, 3> Index3;
Index3 strides1(1L,1L, 1L);
Index3 indicesStart1(1L, 0L, 0L);
Index3 indicesStop1(sizeDim1, sizeDim2, sizeDim3);
Index3 strides2(1L,1L, 1L);
Index3 indicesStart2(0L, 0L, 0L);
Index3 indicesStop2(sizeDim1-1, sizeDim2, sizeDim3);
Eigen::DSizes<IndexType, 3> sizes(sizeDim1-1,sizeDim2,sizeDim3);
tensor1.setRandom();
tensor2.setRandom();
DataType* gpu_data1 = static_cast<DataType*>(sycl_device.allocate(tensor1.size()*sizeof(DataType)));
DataType* gpu_data2 = static_cast<DataType*>(sycl_device.allocate(tensor2.size()*sizeof(DataType)));
DataType* gpu_data3 = static_cast<DataType*>(sycl_device.allocate(tensor3.size()*sizeof(DataType)));
TensorMap<Tensor<DataType, 3, DataLayout, IndexType>> gpu1(gpu_data1, input_range);
TensorMap<Tensor<DataType, 3, DataLayout, IndexType>> gpu2(gpu_data2, input_range);
TensorMap<Tensor<DataType, 3, DataLayout, IndexType>> gpu3(gpu_data3, slice_range);
sycl_device.memcpyHostToDevice(gpu_data1, tensor1.data(),(tensor1.size())*sizeof(DataType));
sycl_device.memcpyHostToDevice(gpu_data2, tensor2.data(),(tensor2.size())*sizeof(DataType));
gpu3.device(sycl_device)= gpu1.slice(indicesStart1, sizes) - gpu2.slice(indicesStart2, sizes);
sycl_device.memcpyDeviceToHost(tensor3.data(), gpu_data3,(tensor3.size())*sizeof(DataType));
tensor3_cpu = tensor1.stridedSlice(indicesStart1,indicesStop1,strides1) - tensor2.stridedSlice(indicesStart2,indicesStop2,strides2);
for (IndexType i = 0; i <slice_range[0] ; ++i) {
for (IndexType j = 0; j < slice_range[1]; ++j) {
for (IndexType k = 0; k < slice_range[2]; ++k) {
VERIFY_IS_EQUAL(tensor3_cpu(i,j,k), tensor3(i,j,k));
}
}
}
sycl_device.deallocate(gpu_data1);
sycl_device.deallocate(gpu_data2);
sycl_device.deallocate(gpu_data3);
}
template<typename DataType, typename dev_Selector> void sycl_computing_test_per_device(dev_Selector s){
QueueInterface queueInterface(s);
auto sycl_device = Eigen::SyclDevice(&queueInterface);
test_image_op_sycl<DataType, RowMajor, int64_t>(sycl_device);
}
EIGEN_DECLARE_TEST(cxx11_tensor_image_op_sycl) {
for (const auto& device :Eigen::get_sycl_supported_devices()) {
CALL_SUBTEST(sycl_computing_test_per_device<float>(device));
#ifdef EIGEN_SYCL_DOUBLE_SUPPORT
CALL_SUBTEST(sycl_computing_test_per_device<double>(device));
#endif
}
}

View File

@@ -0,0 +1,809 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
#include <Eigen/CXX11/Tensor>
using Eigen::Tensor;
void test_simple_patch()
{
Tensor<float, 4> tensor(2,3,5,7);
tensor.setRandom();
Tensor<float, 4, RowMajor> tensor_row_major = tensor.swap_layout();
VERIFY_IS_EQUAL(tensor.dimension(0), tensor_row_major.dimension(3));
VERIFY_IS_EQUAL(tensor.dimension(1), tensor_row_major.dimension(2));
VERIFY_IS_EQUAL(tensor.dimension(2), tensor_row_major.dimension(1));
VERIFY_IS_EQUAL(tensor.dimension(3), tensor_row_major.dimension(0));
// Single pixel patch: ColMajor
Tensor<float, 5> single_pixel_patch;
single_pixel_patch = tensor.extract_image_patches(1, 1);
VERIFY_IS_EQUAL(single_pixel_patch.dimension(0), 2);
VERIFY_IS_EQUAL(single_pixel_patch.dimension(1), 1);
VERIFY_IS_EQUAL(single_pixel_patch.dimension(2), 1);
VERIFY_IS_EQUAL(single_pixel_patch.dimension(3), 3*5);
VERIFY_IS_EQUAL(single_pixel_patch.dimension(4), 7);
// Single pixel patch: RowMajor
Tensor<float, 5, RowMajor> single_pixel_patch_row_major;
single_pixel_patch_row_major = tensor_row_major.extract_image_patches(1, 1);
VERIFY_IS_EQUAL(single_pixel_patch_row_major.dimension(0), 7);
VERIFY_IS_EQUAL(single_pixel_patch_row_major.dimension(1), 3*5);
VERIFY_IS_EQUAL(single_pixel_patch_row_major.dimension(2), 1);
VERIFY_IS_EQUAL(single_pixel_patch_row_major.dimension(3), 1);
VERIFY_IS_EQUAL(single_pixel_patch_row_major.dimension(4), 2);
for (int i = 0; i < tensor.size(); ++i) {
// ColMajor
if (tensor.data()[i] != single_pixel_patch.data()[i]) {
std::cout << "Mismatch detected at index " << i << " : "
<< tensor.data()[i] << " vs " << single_pixel_patch.data()[i]
<< std::endl;
}
VERIFY_IS_EQUAL(single_pixel_patch.data()[i], tensor.data()[i]);
// RowMajor
if (tensor_row_major.data()[i] != single_pixel_patch_row_major.data()[i]) {
std::cout << "Mismatch detected at index " << i << " : "
<< tensor.data()[i] << " vs "
<< single_pixel_patch_row_major.data()[i] << std::endl;
}
VERIFY_IS_EQUAL(single_pixel_patch_row_major.data()[i],
tensor_row_major.data()[i]);
VERIFY_IS_EQUAL(tensor.data()[i], tensor_row_major.data()[i]);
VERIFY_IS_EQUAL(single_pixel_patch.data()[i],
single_pixel_patch_row_major.data()[i]);
}
// Entire image patch: ColMajor
Tensor<float, 5> entire_image_patch;
entire_image_patch = tensor.extract_image_patches(3, 5);
VERIFY_IS_EQUAL(entire_image_patch.dimension(0), 2);
VERIFY_IS_EQUAL(entire_image_patch.dimension(1), 3);
VERIFY_IS_EQUAL(entire_image_patch.dimension(2), 5);
VERIFY_IS_EQUAL(entire_image_patch.dimension(3), 3*5);
VERIFY_IS_EQUAL(entire_image_patch.dimension(4), 7);
// Entire image patch: RowMajor
Tensor<float, 5, RowMajor> entire_image_patch_row_major;
entire_image_patch_row_major = tensor_row_major.extract_image_patches(3, 5);
VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(0), 7);
VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(1), 3*5);
VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(2), 5);
VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(3), 3);
VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(4), 2);
for (int i = 0; i < 3; ++i) {
for (int j = 0; j < 5; ++j) {
int patchId = i+3*j;
for (int r = 0; r < 3; ++r) {
for (int c = 0; c < 5; ++c) {
for (int d = 0; d < 2; ++d) {
for (int b = 0; b < 7; ++b) {
float expected = 0.0f;
float expected_row_major = 0.0f;
if (r-1+i >= 0 && c-2+j >= 0 && r-1+i < 3 && c-2+j < 5) {
expected = tensor(d, r-1+i, c-2+j, b);
expected_row_major = tensor_row_major(b, c-2+j, r-1+i, d);
}
// ColMajor
if (entire_image_patch(d, r, c, patchId, b) != expected) {
std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
}
VERIFY_IS_EQUAL(entire_image_patch(d, r, c, patchId, b), expected);
// RowMajor
if (entire_image_patch_row_major(b, patchId, c, r, d) !=
expected_row_major) {
std::cout << "Mismatch detected at index i=" << i << " j=" << j
<< " r=" << r << " c=" << c << " d=" << d << " b=" << b
<< std::endl;
}
VERIFY_IS_EQUAL(entire_image_patch_row_major(b, patchId, c, r, d),
expected_row_major);
// Check that ColMajor and RowMajor agree.
VERIFY_IS_EQUAL(expected, expected_row_major);
}
}
}
}
}
}
// 2D patch: ColMajor
Tensor<float, 5> twod_patch;
twod_patch = tensor.extract_image_patches(2, 2);
VERIFY_IS_EQUAL(twod_patch.dimension(0), 2);
VERIFY_IS_EQUAL(twod_patch.dimension(1), 2);
VERIFY_IS_EQUAL(twod_patch.dimension(2), 2);
VERIFY_IS_EQUAL(twod_patch.dimension(3), 3*5);
VERIFY_IS_EQUAL(twod_patch.dimension(4), 7);
// 2D patch: RowMajor
Tensor<float, 5, RowMajor> twod_patch_row_major;
twod_patch_row_major = tensor_row_major.extract_image_patches(2, 2);
VERIFY_IS_EQUAL(twod_patch_row_major.dimension(0), 7);
VERIFY_IS_EQUAL(twod_patch_row_major.dimension(1), 3*5);
VERIFY_IS_EQUAL(twod_patch_row_major.dimension(2), 2);
VERIFY_IS_EQUAL(twod_patch_row_major.dimension(3), 2);
VERIFY_IS_EQUAL(twod_patch_row_major.dimension(4), 2);
// Based on the calculation described in TensorTraits.h, padding happens to be 0.
int row_padding = 0;
int col_padding = 0;
int stride = 1;
for (int i = 0; i < 3; ++i) {
for (int j = 0; j < 5; ++j) {
int patchId = i+3*j;
for (int r = 0; r < 2; ++r) {
for (int c = 0; c < 2; ++c) {
for (int d = 0; d < 2; ++d) {
for (int b = 0; b < 7; ++b) {
float expected = 0.0f;
float expected_row_major = 0.0f;
int row_offset = r*stride + i - row_padding;
int col_offset = c*stride + j - col_padding;
// ColMajor
if (row_offset >= 0 && col_offset >= 0 && row_offset < tensor.dimension(1) && col_offset < tensor.dimension(2)) {
expected = tensor(d, row_offset, col_offset, b);
}
if (twod_patch(d, r, c, patchId, b) != expected) {
std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
}
VERIFY_IS_EQUAL(twod_patch(d, r, c, patchId, b), expected);
// RowMajor
if (row_offset >= 0 && col_offset >= 0 && row_offset < tensor_row_major.dimension(2) && col_offset < tensor_row_major.dimension(1)) {
expected_row_major = tensor_row_major(b, col_offset, row_offset, d);
}
if (twod_patch_row_major(b, patchId, c, r, d) != expected_row_major) {
std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
}
VERIFY_IS_EQUAL(twod_patch_row_major(b, patchId, c, r, d), expected_row_major);
// Check that ColMajor and RowMajor agree.
VERIFY_IS_EQUAL(expected, expected_row_major);
}
}
}
}
}
}
}
// Verifies VALID padding (no padding) with incrementing values.
void test_patch_padding_valid()
{
int input_depth = 3;
int input_rows = 3;
int input_cols = 3;
int input_batches = 1;
int ksize = 2; // Corresponds to the Rows and Cols for tensor.extract_image_patches<>.
int stride = 2; // Only same stride is supported.
Tensor<float, 4> tensor(input_depth, input_rows, input_cols, input_batches);
// Initializes tensor with incrementing numbers.
for (int i = 0; i < tensor.size(); ++i) {
tensor.data()[i] = i + 1;
}
// ColMajor
Tensor<float, 5> result = tensor.extract_image_patches(ksize, ksize, stride, stride, 1, 1, PADDING_VALID);
VERIFY_IS_EQUAL(result.dimension(0), input_depth); // depth
VERIFY_IS_EQUAL(result.dimension(1), ksize); // kernel rows
VERIFY_IS_EQUAL(result.dimension(2), ksize); // kernel cols
VERIFY_IS_EQUAL(result.dimension(3), 1); // number of patches
VERIFY_IS_EQUAL(result.dimension(4), input_batches); // number of batches
// RowMajor
Tensor<float, 4, RowMajor> tensor_row_major = tensor.swap_layout();
VERIFY_IS_EQUAL(tensor.dimension(0), tensor_row_major.dimension(3));
VERIFY_IS_EQUAL(tensor.dimension(1), tensor_row_major.dimension(2));
VERIFY_IS_EQUAL(tensor.dimension(2), tensor_row_major.dimension(1));
VERIFY_IS_EQUAL(tensor.dimension(3), tensor_row_major.dimension(0));
Tensor<float, 5, RowMajor> result_row_major = tensor_row_major.extract_image_patches(ksize, ksize, stride, stride, 1, 1, PADDING_VALID);
VERIFY_IS_EQUAL(result.dimension(0), result_row_major.dimension(4));
VERIFY_IS_EQUAL(result.dimension(1), result_row_major.dimension(3));
VERIFY_IS_EQUAL(result.dimension(2), result_row_major.dimension(2));
VERIFY_IS_EQUAL(result.dimension(3), result_row_major.dimension(1));
VERIFY_IS_EQUAL(result.dimension(4), result_row_major.dimension(0));
// No padding is carried out.
int row_padding = 0;
int col_padding = 0;
for (int i = 0; (i+stride+ksize-1) < input_rows; i += stride) { // input rows
for (int j = 0; (j+stride+ksize-1) < input_cols; j += stride) { // input cols
int patchId = i+input_rows*j;
for (int r = 0; r < ksize; ++r) { // patch rows
for (int c = 0; c < ksize; ++c) { // patch cols
for (int d = 0; d < input_depth; ++d) { // depth
for (int b = 0; b < input_batches; ++b) { // batch
float expected = 0.0f;
float expected_row_major = 0.0f;
int row_offset = r + i - row_padding;
int col_offset = c + j - col_padding;
if (row_offset >= 0 && col_offset >= 0 && row_offset < input_rows && col_offset < input_cols) {
expected = tensor(d, row_offset, col_offset, b);
expected_row_major = tensor_row_major(b, col_offset, row_offset, d);
}
// ColMajor
if (result(d, r, c, patchId, b) != expected) {
std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
}
VERIFY_IS_EQUAL(result(d, r, c, patchId, b), expected);
// RowMajor
if (result_row_major(b, patchId, c, r, d) != expected_row_major) {
std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
}
VERIFY_IS_EQUAL(result_row_major(b, patchId, c, r, d), expected_row_major);
// Check that ColMajor and RowMajor agree.
VERIFY_IS_EQUAL(expected, expected_row_major);
}
}
}
}
}
}
}
// Verifies VALID padding (no padding) with the same value.
void test_patch_padding_valid_same_value()
{
int input_depth = 1;
int input_rows = 5;
int input_cols = 5;
int input_batches = 2;
int ksize = 3; // Corresponds to the Rows and Cols for tensor.extract_image_patches<>.
int stride = 2; // Only same stride is supported.
// ColMajor
Tensor<float, 4> tensor(input_depth, input_rows, input_cols, input_batches);
tensor = tensor.constant(11.0f);
Tensor<float, 5> result = tensor.extract_image_patches(ksize, ksize, stride, stride, 1, 1, PADDING_VALID);
VERIFY_IS_EQUAL(result.dimension(0), input_depth); // depth
VERIFY_IS_EQUAL(result.dimension(1), ksize); // kernel rows
VERIFY_IS_EQUAL(result.dimension(2), ksize); // kernel cols
VERIFY_IS_EQUAL(result.dimension(3), 4); // number of patches
VERIFY_IS_EQUAL(result.dimension(4), input_batches); // number of batches
// RowMajor
Tensor<float, 4, RowMajor> tensor_row_major = tensor.swap_layout();
VERIFY_IS_EQUAL(tensor.dimension(0), tensor_row_major.dimension(3));
VERIFY_IS_EQUAL(tensor.dimension(1), tensor_row_major.dimension(2));
VERIFY_IS_EQUAL(tensor.dimension(2), tensor_row_major.dimension(1));
VERIFY_IS_EQUAL(tensor.dimension(3), tensor_row_major.dimension(0));
Tensor<float, 5, RowMajor> result_row_major = tensor_row_major.extract_image_patches(ksize, ksize, stride, stride, 1, 1, PADDING_VALID);
VERIFY_IS_EQUAL(result.dimension(0), result_row_major.dimension(4));
VERIFY_IS_EQUAL(result.dimension(1), result_row_major.dimension(3));
VERIFY_IS_EQUAL(result.dimension(2), result_row_major.dimension(2));
VERIFY_IS_EQUAL(result.dimension(3), result_row_major.dimension(1));
VERIFY_IS_EQUAL(result.dimension(4), result_row_major.dimension(0));
// No padding is carried out.
int row_padding = 0;
int col_padding = 0;
for (int i = 0; (i+stride+ksize-1) <= input_rows; i += stride) { // input rows
for (int j = 0; (j+stride+ksize-1) <= input_cols; j += stride) { // input cols
int patchId = i+input_rows*j;
for (int r = 0; r < ksize; ++r) { // patch rows
for (int c = 0; c < ksize; ++c) { // patch cols
for (int d = 0; d < input_depth; ++d) { // depth
for (int b = 0; b < input_batches; ++b) { // batch
float expected = 0.0f;
float expected_row_major = 0.0f;
int row_offset = r + i - row_padding;
int col_offset = c + j - col_padding;
if (row_offset >= 0 && col_offset >= 0 && row_offset < input_rows && col_offset < input_cols) {
expected = tensor(d, row_offset, col_offset, b);
expected_row_major = tensor_row_major(b, col_offset, row_offset, d);
}
// ColMajor
if (result(d, r, c, patchId, b) != expected) {
std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
}
VERIFY_IS_EQUAL(result(d, r, c, patchId, b), expected);
// RowMajor
if (result_row_major(b, patchId, c, r, d) != expected_row_major) {
std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
}
VERIFY_IS_EQUAL(result_row_major(b, patchId, c, r, d), expected_row_major);
// Check that ColMajor and RowMajor agree.
VERIFY_IS_EQUAL(expected, expected_row_major);
}
}
}
}
}
}
}
// Verifies SAME padding.
void test_patch_padding_same()
{
int input_depth = 3;
int input_rows = 4;
int input_cols = 2;
int input_batches = 1;
int ksize = 2; // Corresponds to the Rows and Cols for tensor.extract_image_patches<>.
int stride = 2; // Only same stride is supported.
// ColMajor
Tensor<float, 4> tensor(input_depth, input_rows, input_cols, input_batches);
// Initializes tensor with incrementing numbers.
for (int i = 0; i < tensor.size(); ++i) {
tensor.data()[i] = i + 1;
}
Tensor<float, 5> result = tensor.extract_image_patches(ksize, ksize, stride, stride, PADDING_SAME);
VERIFY_IS_EQUAL(result.dimension(0), input_depth); // depth
VERIFY_IS_EQUAL(result.dimension(1), ksize); // kernel rows
VERIFY_IS_EQUAL(result.dimension(2), ksize); // kernel cols
VERIFY_IS_EQUAL(result.dimension(3), 2); // number of patches
VERIFY_IS_EQUAL(result.dimension(4), input_batches); // number of batches
// RowMajor
Tensor<float, 4, RowMajor> tensor_row_major = tensor.swap_layout();
VERIFY_IS_EQUAL(tensor.dimension(0), tensor_row_major.dimension(3));
VERIFY_IS_EQUAL(tensor.dimension(1), tensor_row_major.dimension(2));
VERIFY_IS_EQUAL(tensor.dimension(2), tensor_row_major.dimension(1));
VERIFY_IS_EQUAL(tensor.dimension(3), tensor_row_major.dimension(0));
Tensor<float, 5, RowMajor> result_row_major = tensor_row_major.extract_image_patches(ksize, ksize, stride, stride, PADDING_SAME);
VERIFY_IS_EQUAL(result.dimension(0), result_row_major.dimension(4));
VERIFY_IS_EQUAL(result.dimension(1), result_row_major.dimension(3));
VERIFY_IS_EQUAL(result.dimension(2), result_row_major.dimension(2));
VERIFY_IS_EQUAL(result.dimension(3), result_row_major.dimension(1));
VERIFY_IS_EQUAL(result.dimension(4), result_row_major.dimension(0));
// Based on the calculation described in TensorTraits.h, padding happens to be
// 0.
int row_padding = 0;
int col_padding = 0;
for (int i = 0; (i+stride+ksize-1) <= input_rows; i += stride) { // input rows
for (int j = 0; (j+stride+ksize-1) <= input_cols; j += stride) { // input cols
int patchId = i+input_rows*j;
for (int r = 0; r < ksize; ++r) { // patch rows
for (int c = 0; c < ksize; ++c) { // patch cols
for (int d = 0; d < input_depth; ++d) { // depth
for (int b = 0; b < input_batches; ++b) { // batch
float expected = 0.0f;
float expected_row_major = 0.0f;
int row_offset = r*stride + i - row_padding;
int col_offset = c*stride + j - col_padding;
if (row_offset >= 0 && col_offset >= 0 && row_offset < input_rows && col_offset < input_cols) {
expected = tensor(d, row_offset, col_offset, b);
expected_row_major = tensor_row_major(b, col_offset, row_offset, d);
}
// ColMajor
if (result(d, r, c, patchId, b) != expected) {
std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
}
VERIFY_IS_EQUAL(result(d, r, c, patchId, b), expected);
// RowMajor
if (result_row_major(b, patchId, c, r, d) != expected_row_major) {
std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
}
VERIFY_IS_EQUAL(result_row_major(b, patchId, c, r, d), expected_row_major);
// Check that ColMajor and RowMajor agree.
VERIFY_IS_EQUAL(expected, expected_row_major);
}
}
}
}
}
}
}
// Verifies that SAME padding, when computed as negative values, will be clipped
// to zero.
void test_patch_padding_same_negative_padding_clip_to_zero() {
int input_depth = 1;
int input_rows = 15;
int input_cols = 1;
int input_batches = 1;
int ksize = 1; // Corresponds to the Rows and Cols for
// tensor.extract_image_patches<>.
int row_stride = 5;
int col_stride = 1;
// ColMajor
Tensor<float, 4> tensor(input_depth, input_rows, input_cols, input_batches);
// Initializes tensor with incrementing numbers.
for (int i = 0; i < tensor.size(); ++i) {
tensor.data()[i] = i + 1;
}
Tensor<float, 5> result = tensor.extract_image_patches(
ksize, ksize, row_stride, col_stride, 1, 1, PADDING_SAME);
// row padding will be computed as -2 originally and then be clipped to 0.
VERIFY_IS_EQUAL(result.coeff(0), 1.0f);
VERIFY_IS_EQUAL(result.coeff(1), 6.0f);
VERIFY_IS_EQUAL(result.coeff(2), 11.0f);
VERIFY_IS_EQUAL(result.dimension(0), input_depth); // depth
VERIFY_IS_EQUAL(result.dimension(1), ksize); // kernel rows
VERIFY_IS_EQUAL(result.dimension(2), ksize); // kernel cols
VERIFY_IS_EQUAL(result.dimension(3), 3); // number of patches
VERIFY_IS_EQUAL(result.dimension(4), input_batches); // number of batches
// RowMajor
Tensor<float, 4, RowMajor> tensor_row_major = tensor.swap_layout();
VERIFY_IS_EQUAL(tensor.dimension(0), tensor_row_major.dimension(3));
VERIFY_IS_EQUAL(tensor.dimension(1), tensor_row_major.dimension(2));
VERIFY_IS_EQUAL(tensor.dimension(2), tensor_row_major.dimension(1));
VERIFY_IS_EQUAL(tensor.dimension(3), tensor_row_major.dimension(0));
Tensor<float, 5, RowMajor> result_row_major =
tensor_row_major.extract_image_patches(ksize, ksize, row_stride,
col_stride, 1, 1, PADDING_SAME);
VERIFY_IS_EQUAL(result_row_major.coeff(0), 1.0f);
VERIFY_IS_EQUAL(result_row_major.coeff(1), 6.0f);
VERIFY_IS_EQUAL(result_row_major.coeff(2), 11.0f);
VERIFY_IS_EQUAL(result.dimension(0), result_row_major.dimension(4));
VERIFY_IS_EQUAL(result.dimension(1), result_row_major.dimension(3));
VERIFY_IS_EQUAL(result.dimension(2), result_row_major.dimension(2));
VERIFY_IS_EQUAL(result.dimension(3), result_row_major.dimension(1));
VERIFY_IS_EQUAL(result.dimension(4), result_row_major.dimension(0));
}
void test_patch_no_extra_dim()
{
Tensor<float, 3> tensor(2,3,5);
tensor.setRandom();
Tensor<float, 3, RowMajor> tensor_row_major = tensor.swap_layout();
VERIFY_IS_EQUAL(tensor.dimension(0), tensor_row_major.dimension(2));
VERIFY_IS_EQUAL(tensor.dimension(1), tensor_row_major.dimension(1));
VERIFY_IS_EQUAL(tensor.dimension(2), tensor_row_major.dimension(0));
// Single pixel patch: ColMajor
Tensor<float, 4> single_pixel_patch;
single_pixel_patch = tensor.extract_image_patches(1, 1);
VERIFY_IS_EQUAL(single_pixel_patch.dimension(0), 2);
VERIFY_IS_EQUAL(single_pixel_patch.dimension(1), 1);
VERIFY_IS_EQUAL(single_pixel_patch.dimension(2), 1);
VERIFY_IS_EQUAL(single_pixel_patch.dimension(3), 3*5);
// Single pixel patch: RowMajor
Tensor<float, 4, RowMajor> single_pixel_patch_row_major;
single_pixel_patch_row_major = tensor_row_major.extract_image_patches(1, 1);
VERIFY_IS_EQUAL(single_pixel_patch_row_major.dimension(0), 3*5);
VERIFY_IS_EQUAL(single_pixel_patch_row_major.dimension(1), 1);
VERIFY_IS_EQUAL(single_pixel_patch_row_major.dimension(2), 1);
VERIFY_IS_EQUAL(single_pixel_patch_row_major.dimension(3), 2);
for (int i = 0; i < tensor.size(); ++i) {
// ColMajor
if (tensor.data()[i] != single_pixel_patch.data()[i]) {
std::cout << "Mismatch detected at index " << i << " : " << tensor.data()[i] << " vs " << single_pixel_patch.data()[i] << std::endl;
}
VERIFY_IS_EQUAL(single_pixel_patch.data()[i], tensor.data()[i]);
// RowMajor
if (tensor_row_major.data()[i] != single_pixel_patch_row_major.data()[i]) {
std::cout << "Mismatch detected at index " << i << " : "
<< tensor.data()[i] << " vs "
<< single_pixel_patch_row_major.data()[i] << std::endl;
}
VERIFY_IS_EQUAL(single_pixel_patch_row_major.data()[i],
tensor_row_major.data()[i]);
VERIFY_IS_EQUAL(tensor.data()[i], tensor_row_major.data()[i]);
VERIFY_IS_EQUAL(single_pixel_patch.data()[i],
single_pixel_patch_row_major.data()[i]);
}
// Entire image patch: ColMajor
Tensor<float, 4> entire_image_patch;
entire_image_patch = tensor.extract_image_patches(3, 5);
VERIFY_IS_EQUAL(entire_image_patch.dimension(0), 2);
VERIFY_IS_EQUAL(entire_image_patch.dimension(1), 3);
VERIFY_IS_EQUAL(entire_image_patch.dimension(2), 5);
VERIFY_IS_EQUAL(entire_image_patch.dimension(3), 3*5);
// Entire image patch: RowMajor
Tensor<float, 4, RowMajor> entire_image_patch_row_major;
entire_image_patch_row_major = tensor_row_major.extract_image_patches(3, 5);
VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(0), 3*5);
VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(1), 5);
VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(2), 3);
VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(3), 2);
for (int i = 0; i < 3; ++i) {
for (int j = 0; j < 5; ++j) {
int patchId = i+3*j;
for (int r = 0; r < 3; ++r) {
for (int c = 0; c < 5; ++c) {
for (int d = 0; d < 2; ++d) {
float expected = 0.0f;
float expected_row_major = 0.0f;
if (r-1+i >= 0 && c-2+j >= 0 && r-1+i < 3 && c-2+j < 5) {
expected = tensor(d, r-1+i, c-2+j);
expected_row_major = tensor_row_major(c-2+j, r-1+i, d);
}
// ColMajor
if (entire_image_patch(d, r, c, patchId) != expected) {
std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << std::endl;
}
VERIFY_IS_EQUAL(entire_image_patch(d, r, c, patchId), expected);
// RowMajor
if (entire_image_patch_row_major(patchId, c, r, d) !=
expected_row_major) {
std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << std::endl;
}
VERIFY_IS_EQUAL(entire_image_patch_row_major(patchId, c, r, d),
expected_row_major);
// Check that ColMajor and RowMajor agree.
VERIFY_IS_EQUAL(expected, expected_row_major);
}
}
}
}
}
// 2D patch: ColMajor
Tensor<float, 4> twod_patch;
twod_patch = tensor.extract_image_patches(2, 2);
VERIFY_IS_EQUAL(twod_patch.dimension(0), 2);
VERIFY_IS_EQUAL(twod_patch.dimension(1), 2);
VERIFY_IS_EQUAL(twod_patch.dimension(2), 2);
VERIFY_IS_EQUAL(twod_patch.dimension(3), 3*5);
// 2D patch: RowMajor
Tensor<float, 4, RowMajor> twod_patch_row_major;
twod_patch_row_major = tensor_row_major.extract_image_patches(2, 2);
VERIFY_IS_EQUAL(twod_patch_row_major.dimension(0), 3*5);
VERIFY_IS_EQUAL(twod_patch_row_major.dimension(1), 2);
VERIFY_IS_EQUAL(twod_patch_row_major.dimension(2), 2);
VERIFY_IS_EQUAL(twod_patch_row_major.dimension(3), 2);
// Based on the calculation described in TensorTraits.h, padding happens to be 0.
int row_padding = 0;
int col_padding = 0;
int stride = 1;
for (int i = 0; i < 3; ++i) {
for (int j = 0; j < 5; ++j) {
int patchId = i+3*j;
for (int r = 0; r < 2; ++r) {
for (int c = 0; c < 2; ++c) {
for (int d = 0; d < 2; ++d) {
float expected = 0.0f;
float expected_row_major = 0.0f;
int row_offset = r*stride + i - row_padding;
int col_offset = c*stride + j - col_padding;
// ColMajor
if (row_offset >= 0 && col_offset >= 0 && row_offset < tensor.dimension(1) && col_offset < tensor.dimension(2)) {
expected = tensor(d, row_offset, col_offset);
}
if (twod_patch(d, r, c, patchId) != expected) {
std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << std::endl;
}
VERIFY_IS_EQUAL(twod_patch(d, r, c, patchId), expected);
// RowMajor
if (row_offset >= 0 && col_offset >= 0 && row_offset < tensor_row_major.dimension(1) && col_offset < tensor_row_major.dimension(0)) {
expected_row_major = tensor_row_major(col_offset, row_offset, d);
}
if (twod_patch_row_major(patchId, c, r, d) != expected_row_major) {
std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << std::endl;
}
VERIFY_IS_EQUAL(twod_patch_row_major(patchId, c, r, d), expected_row_major);
// Check that ColMajor and RowMajor agree.
VERIFY_IS_EQUAL(expected, expected_row_major);
}
}
}
}
}
}
void test_imagenet_patches()
{
// Test the code on typical configurations used by the 'imagenet' benchmarks at
// https://github.com/soumith/convnet-benchmarks
// ColMajor
Tensor<float, 4> l_in(3, 128, 128, 16);
l_in.setRandom();
Tensor<float, 5> l_out = l_in.extract_image_patches(11, 11);
VERIFY_IS_EQUAL(l_out.dimension(0), 3);
VERIFY_IS_EQUAL(l_out.dimension(1), 11);
VERIFY_IS_EQUAL(l_out.dimension(2), 11);
VERIFY_IS_EQUAL(l_out.dimension(3), 128*128);
VERIFY_IS_EQUAL(l_out.dimension(4), 16);
// RowMajor
Tensor<float, 5, RowMajor> l_out_row_major = l_in.swap_layout().extract_image_patches(11, 11);
VERIFY_IS_EQUAL(l_out_row_major.dimension(0), 16);
VERIFY_IS_EQUAL(l_out_row_major.dimension(1), 128*128);
VERIFY_IS_EQUAL(l_out_row_major.dimension(2), 11);
VERIFY_IS_EQUAL(l_out_row_major.dimension(3), 11);
VERIFY_IS_EQUAL(l_out_row_major.dimension(4), 3);
for (int b = 0; b < 16; ++b) {
for (int i = 0; i < 128; ++i) {
for (int j = 0; j < 128; ++j) {
int patchId = i+128*j;
for (int c = 0; c < 11; ++c) {
for (int r = 0; r < 11; ++r) {
for (int d = 0; d < 3; ++d) {
float expected = 0.0f;
if (r-5+i >= 0 && c-5+j >= 0 && r-5+i < 128 && c-5+j < 128) {
expected = l_in(d, r-5+i, c-5+j, b);
}
// ColMajor
if (l_out(d, r, c, patchId, b) != expected) {
std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
}
VERIFY_IS_EQUAL(l_out(d, r, c, patchId, b), expected);
// RowMajor
if (l_out_row_major(b, patchId, c, r, d) !=
expected) {
std::cout << "Mismatch detected at index i=" << i << " j=" << j
<< " r=" << r << " c=" << c << " d=" << d << " b=" << b
<< std::endl;
}
VERIFY_IS_EQUAL(l_out_row_major(b, patchId, c, r, d),
expected);
}
}
}
}
}
}
// ColMajor
l_in.resize(16, 64, 64, 32);
l_in.setRandom();
l_out = l_in.extract_image_patches(9, 9);
VERIFY_IS_EQUAL(l_out.dimension(0), 16);
VERIFY_IS_EQUAL(l_out.dimension(1), 9);
VERIFY_IS_EQUAL(l_out.dimension(2), 9);
VERIFY_IS_EQUAL(l_out.dimension(3), 64*64);
VERIFY_IS_EQUAL(l_out.dimension(4), 32);
// RowMajor
l_out_row_major = l_in.swap_layout().extract_image_patches(9, 9);
VERIFY_IS_EQUAL(l_out_row_major.dimension(0), 32);
VERIFY_IS_EQUAL(l_out_row_major.dimension(1), 64*64);
VERIFY_IS_EQUAL(l_out_row_major.dimension(2), 9);
VERIFY_IS_EQUAL(l_out_row_major.dimension(3), 9);
VERIFY_IS_EQUAL(l_out_row_major.dimension(4), 16);
for (int b = 0; b < 32; ++b) {
for (int i = 0; i < 64; ++i) {
for (int j = 0; j < 64; ++j) {
int patchId = i+64*j;
for (int c = 0; c < 9; ++c) {
for (int r = 0; r < 9; ++r) {
for (int d = 0; d < 16; ++d) {
float expected = 0.0f;
if (r-4+i >= 0 && c-4+j >= 0 && r-4+i < 64 && c-4+j < 64) {
expected = l_in(d, r-4+i, c-4+j, b);
}
// ColMajor
if (l_out(d, r, c, patchId, b) != expected) {
std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
}
VERIFY_IS_EQUAL(l_out(d, r, c, patchId, b), expected);
// RowMajor
if (l_out_row_major(b, patchId, c, r, d) != expected) {
std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
}
VERIFY_IS_EQUAL(l_out_row_major(b, patchId, c, r, d), expected);
}
}
}
}
}
}
// ColMajor
l_in.resize(32, 16, 16, 32);
l_in.setRandom();
l_out = l_in.extract_image_patches(7, 7);
VERIFY_IS_EQUAL(l_out.dimension(0), 32);
VERIFY_IS_EQUAL(l_out.dimension(1), 7);
VERIFY_IS_EQUAL(l_out.dimension(2), 7);
VERIFY_IS_EQUAL(l_out.dimension(3), 16*16);
VERIFY_IS_EQUAL(l_out.dimension(4), 32);
// RowMajor
l_out_row_major = l_in.swap_layout().extract_image_patches(7, 7);
VERIFY_IS_EQUAL(l_out_row_major.dimension(0), 32);
VERIFY_IS_EQUAL(l_out_row_major.dimension(1), 16*16);
VERIFY_IS_EQUAL(l_out_row_major.dimension(2), 7);
VERIFY_IS_EQUAL(l_out_row_major.dimension(3), 7);
VERIFY_IS_EQUAL(l_out_row_major.dimension(4), 32);
for (int b = 0; b < 32; ++b) {
for (int i = 0; i < 16; ++i) {
for (int j = 0; j < 16; ++j) {
int patchId = i+16*j;
for (int c = 0; c < 7; ++c) {
for (int r = 0; r < 7; ++r) {
for (int d = 0; d < 32; ++d) {
float expected = 0.0f;
if (r-3+i >= 0 && c-3+j >= 0 && r-3+i < 16 && c-3+j < 16) {
expected = l_in(d, r-3+i, c-3+j, b);
}
// ColMajor
if (l_out(d, r, c, patchId, b) != expected) {
std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
}
VERIFY_IS_EQUAL(l_out(d, r, c, patchId, b), expected);
// RowMajor
if (l_out_row_major(b, patchId, c, r, d) != expected) {
std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
}
VERIFY_IS_EQUAL(l_out_row_major(b, patchId, c, r, d), expected);
}
}
}
}
}
}
// ColMajor
l_in.resize(64, 13, 13, 32);
l_in.setRandom();
l_out = l_in.extract_image_patches(3, 3);
VERIFY_IS_EQUAL(l_out.dimension(0), 64);
VERIFY_IS_EQUAL(l_out.dimension(1), 3);
VERIFY_IS_EQUAL(l_out.dimension(2), 3);
VERIFY_IS_EQUAL(l_out.dimension(3), 13*13);
VERIFY_IS_EQUAL(l_out.dimension(4), 32);
// RowMajor
l_out_row_major = l_in.swap_layout().extract_image_patches(3, 3);
VERIFY_IS_EQUAL(l_out_row_major.dimension(0), 32);
VERIFY_IS_EQUAL(l_out_row_major.dimension(1), 13*13);
VERIFY_IS_EQUAL(l_out_row_major.dimension(2), 3);
VERIFY_IS_EQUAL(l_out_row_major.dimension(3), 3);
VERIFY_IS_EQUAL(l_out_row_major.dimension(4), 64);
for (int b = 0; b < 32; ++b) {
for (int i = 0; i < 13; ++i) {
for (int j = 0; j < 13; ++j) {
int patchId = i+13*j;
for (int c = 0; c < 3; ++c) {
for (int r = 0; r < 3; ++r) {
for (int d = 0; d < 64; ++d) {
float expected = 0.0f;
if (r-1+i >= 0 && c-1+j >= 0 && r-1+i < 13 && c-1+j < 13) {
expected = l_in(d, r-1+i, c-1+j, b);
}
// ColMajor
if (l_out(d, r, c, patchId, b) != expected) {
std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
}
VERIFY_IS_EQUAL(l_out(d, r, c, patchId, b), expected);
// RowMajor
if (l_out_row_major(b, patchId, c, r, d) != expected) {
std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
}
VERIFY_IS_EQUAL(l_out_row_major(b, patchId, c, r, d), expected);
}
}
}
}
}
}
}
EIGEN_DECLARE_TEST(cxx11_tensor_image_patch)
{
CALL_SUBTEST_1(test_simple_patch());
CALL_SUBTEST_2(test_patch_no_extra_dim());
CALL_SUBTEST_3(test_patch_padding_valid());
CALL_SUBTEST_4(test_patch_padding_valid_same_value());
CALL_SUBTEST_5(test_patch_padding_same());
CALL_SUBTEST_6(test_imagenet_patches());
CALL_SUBTEST_7(test_patch_padding_same_negative_padding_clip_to_zero());
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,385 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
#include <Eigen/CXX11/Tensor>
#ifdef EIGEN_HAS_INDEX_LIST
static void test_static_index_list()
{
Tensor<float, 4> tensor(2,3,5,7);
tensor.setRandom();
constexpr auto reduction_axis = make_index_list(0, 1, 2);
VERIFY_IS_EQUAL(internal::array_get<0>(reduction_axis), 0);
VERIFY_IS_EQUAL(internal::array_get<1>(reduction_axis), 1);
VERIFY_IS_EQUAL(internal::array_get<2>(reduction_axis), 2);
VERIFY_IS_EQUAL(static_cast<Index>(reduction_axis[0]), 0);
VERIFY_IS_EQUAL(static_cast<Index>(reduction_axis[1]), 1);
VERIFY_IS_EQUAL(static_cast<Index>(reduction_axis[2]), 2);
EIGEN_STATIC_ASSERT((internal::array_get<0>(reduction_axis) == 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((internal::array_get<1>(reduction_axis) == 1), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((internal::array_get<2>(reduction_axis) == 2), YOU_MADE_A_PROGRAMMING_MISTAKE);
Tensor<float, 1> result = tensor.sum(reduction_axis);
for (int i = 0; i < result.size(); ++i) {
float expected = 0.0f;
for (int j = 0; j < 2; ++j) {
for (int k = 0; k < 3; ++k) {
for (int l = 0; l < 5; ++l) {
expected += tensor(j,k,l,i);
}
}
}
VERIFY_IS_APPROX(result(i), expected);
}
}
static void test_type2index_list()
{
Tensor<float, 5> tensor(2,3,5,7,11);
tensor.setRandom();
tensor += tensor.constant(10.0f);
typedef Eigen::IndexList<Eigen::type2index<0>> Dims0;
typedef Eigen::IndexList<Eigen::type2index<0>, Eigen::type2index<1>> Dims1;
typedef Eigen::IndexList<Eigen::type2index<0>, Eigen::type2index<1>, Eigen::type2index<2>> Dims2;
typedef Eigen::IndexList<Eigen::type2index<0>, Eigen::type2index<1>, Eigen::type2index<2>, Eigen::type2index<3>> Dims3;
typedef Eigen::IndexList<Eigen::type2index<0>, Eigen::type2index<1>, Eigen::type2index<2>, Eigen::type2index<3>, Eigen::type2index<4>> Dims4;
#if 0
EIGEN_STATIC_ASSERT((internal::indices_statically_known_to_increase<Dims0>() == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((internal::indices_statically_known_to_increase<Dims1>() == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((internal::indices_statically_known_to_increase<Dims2>() == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((internal::indices_statically_known_to_increase<Dims3>() == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((internal::indices_statically_known_to_increase<Dims4>() == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
#endif
EIGEN_STATIC_ASSERT((internal::are_inner_most_dims<Dims0, 1, ColMajor>::value == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((internal::are_inner_most_dims<Dims1, 2, ColMajor>::value == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((internal::are_inner_most_dims<Dims2, 3, ColMajor>::value == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((internal::are_inner_most_dims<Dims3, 4, ColMajor>::value == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((internal::are_inner_most_dims<Dims4, 5, ColMajor>::value == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((internal::are_inner_most_dims<Dims0, 1, RowMajor>::value == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((internal::are_inner_most_dims<Dims1, 2, RowMajor>::value == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((internal::are_inner_most_dims<Dims2, 3, RowMajor>::value == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((internal::are_inner_most_dims<Dims3, 4, RowMajor>::value == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((internal::are_inner_most_dims<Dims4, 5, RowMajor>::value == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
const Dims0 reduction_axis0;
Tensor<float, 4> result0 = tensor.sum(reduction_axis0);
for (int m = 0; m < 11; ++m) {
for (int l = 0; l < 7; ++l) {
for (int k = 0; k < 5; ++k) {
for (int j = 0; j < 3; ++j) {
float expected = 0.0f;
for (int i = 0; i < 2; ++i) {
expected += tensor(i,j,k,l,m);
}
VERIFY_IS_APPROX(result0(j,k,l,m), expected);
}
}
}
}
const Dims1 reduction_axis1;
Tensor<float, 3> result1 = tensor.sum(reduction_axis1);
for (int m = 0; m < 11; ++m) {
for (int l = 0; l < 7; ++l) {
for (int k = 0; k < 5; ++k) {
float expected = 0.0f;
for (int j = 0; j < 3; ++j) {
for (int i = 0; i < 2; ++i) {
expected += tensor(i,j,k,l,m);
}
}
VERIFY_IS_APPROX(result1(k,l,m), expected);
}
}
}
const Dims2 reduction_axis2;
Tensor<float, 2> result2 = tensor.sum(reduction_axis2);
for (int m = 0; m < 11; ++m) {
for (int l = 0; l < 7; ++l) {
float expected = 0.0f;
for (int k = 0; k < 5; ++k) {
for (int j = 0; j < 3; ++j) {
for (int i = 0; i < 2; ++i) {
expected += tensor(i,j,k,l,m);
}
}
}
VERIFY_IS_APPROX(result2(l,m), expected);
}
}
const Dims3 reduction_axis3;
Tensor<float, 1> result3 = tensor.sum(reduction_axis3);
for (int m = 0; m < 11; ++m) {
float expected = 0.0f;
for (int l = 0; l < 7; ++l) {
for (int k = 0; k < 5; ++k) {
for (int j = 0; j < 3; ++j) {
for (int i = 0; i < 2; ++i) {
expected += tensor(i,j,k,l,m);
}
}
}
}
VERIFY_IS_APPROX(result3(m), expected);
}
const Dims4 reduction_axis4;
Tensor<float, 0> result4 = tensor.sum(reduction_axis4);
float expected = 0.0f;
for (int m = 0; m < 11; ++m) {
for (int l = 0; l < 7; ++l) {
for (int k = 0; k < 5; ++k) {
for (int j = 0; j < 3; ++j) {
for (int i = 0; i < 2; ++i) {
expected += tensor(i,j,k,l,m);
}
}
}
}
}
VERIFY_IS_APPROX(result4(), expected);
}
static void test_type2indexpair_list()
{
Tensor<float, 5> tensor(2,3,5,7,11);
tensor.setRandom();
tensor += tensor.constant(10.0f);
typedef Eigen::IndexPairList<Eigen::type2indexpair<0,10>> Dims0;
typedef Eigen::IndexPairList<Eigen::type2indexpair<0,10>, Eigen::type2indexpair<1,11>, Eigen::type2indexpair<2,12>> Dims2_a;
typedef Eigen::IndexPairList<Eigen::type2indexpair<0,10>, Eigen::IndexPair<Index>, Eigen::type2indexpair<2,12>> Dims2_b;
typedef Eigen::IndexPairList<Eigen::IndexPair<Index>, Eigen::type2indexpair<1,11>, Eigen::IndexPair<Index>> Dims2_c;
Dims2_a d2_a;
Dims2_b d2_b;
d2_b.set(1, Eigen::IndexPair<Index>(1,11));
Dims2_c d2_c;
d2_c.set(0, Eigen::IndexPair<Index>(Eigen::IndexPair<Index>(0,10)));
d2_c.set(1, Eigen::IndexPair<Index>(1,11)); // setting type2indexpair to correct value.
d2_c.set(2, Eigen::IndexPair<Index>(2,12));
VERIFY_IS_EQUAL(d2_a[0].first, 0);
VERIFY_IS_EQUAL(d2_a[0].second, 10);
VERIFY_IS_EQUAL(d2_a[1].first, 1);
VERIFY_IS_EQUAL(d2_a[1].second, 11);
VERIFY_IS_EQUAL(d2_a[2].first, 2);
VERIFY_IS_EQUAL(d2_a[2].second, 12);
VERIFY_IS_EQUAL(d2_b[0].first, 0);
VERIFY_IS_EQUAL(d2_b[0].second, 10);
VERIFY_IS_EQUAL(d2_b[1].first, 1);
VERIFY_IS_EQUAL(d2_b[1].second, 11);
VERIFY_IS_EQUAL(d2_b[2].first, 2);
VERIFY_IS_EQUAL(d2_b[2].second, 12);
VERIFY_IS_EQUAL(d2_c[0].first, 0);
VERIFY_IS_EQUAL(d2_c[0].second, 10);
VERIFY_IS_EQUAL(d2_c[1].first, 1);
VERIFY_IS_EQUAL(d2_c[1].second, 11);
VERIFY_IS_EQUAL(d2_c[2].first, 2);
VERIFY_IS_EQUAL(d2_c[2].second, 12);
EIGEN_STATIC_ASSERT((d2_a.value_known_statically(0) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((d2_a.value_known_statically(1) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((d2_a.value_known_statically(2) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((d2_b.value_known_statically(0) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((d2_b.value_known_statically(1) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((d2_b.value_known_statically(2) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((d2_c.value_known_statically(0) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((d2_c.value_known_statically(1) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((d2_c.value_known_statically(2) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims0>(0, 0) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims0>(0, 1) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_a>(0, 0) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_a>(0, 1) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_a>(1, 1) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_a>(1, 2) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_a>(2, 2) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_a>(2, 3) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_b>(0, 0) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_b>(0, 1) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_b>(1, 1) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_b>(1, 2) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_b>(2, 2) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_b>(2, 3) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_c>(0, 0) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_c>(0, 1) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_c>(1, 1) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_c>(1, 2) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_c>(2, 2) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_c>(2, 3) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims0>(0, 10) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims0>(0, 11) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_a>(0, 10) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_a>(0, 11) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_a>(1, 11) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_a>(1, 12) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_a>(2, 12) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_a>(2, 13) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_b>(0, 10) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_b>(0, 11) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_b>(1, 11) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_b>(1, 12) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_b>(2, 12) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_b>(2, 13) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_c>(0, 10) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_c>(0, 11) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_c>(1, 11) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_c>(1, 12) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_c>(2, 12) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_c>(2, 13) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
}
static void test_dynamic_index_list()
{
Tensor<float, 4> tensor(2,3,5,7);
tensor.setRandom();
int dim1 = 2;
int dim2 = 1;
int dim3 = 0;
auto reduction_axis = make_index_list(dim1, dim2, dim3);
VERIFY_IS_EQUAL(internal::array_get<0>(reduction_axis), 2);
VERIFY_IS_EQUAL(internal::array_get<1>(reduction_axis), 1);
VERIFY_IS_EQUAL(internal::array_get<2>(reduction_axis), 0);
VERIFY_IS_EQUAL(static_cast<Index>(reduction_axis[0]), 2);
VERIFY_IS_EQUAL(static_cast<Index>(reduction_axis[1]), 1);
VERIFY_IS_EQUAL(static_cast<Index>(reduction_axis[2]), 0);
Tensor<float, 1> result = tensor.sum(reduction_axis);
for (int i = 0; i < result.size(); ++i) {
float expected = 0.0f;
for (int j = 0; j < 2; ++j) {
for (int k = 0; k < 3; ++k) {
for (int l = 0; l < 5; ++l) {
expected += tensor(j,k,l,i);
}
}
}
VERIFY_IS_APPROX(result(i), expected);
}
}
static void test_mixed_index_list()
{
Tensor<float, 4> tensor(2,3,5,7);
tensor.setRandom();
int dim2 = 1;
int dim4 = 3;
auto reduction_axis = make_index_list(0, dim2, 2, dim4);
VERIFY_IS_EQUAL(internal::array_get<0>(reduction_axis), 0);
VERIFY_IS_EQUAL(internal::array_get<1>(reduction_axis), 1);
VERIFY_IS_EQUAL(internal::array_get<2>(reduction_axis), 2);
VERIFY_IS_EQUAL(internal::array_get<3>(reduction_axis), 3);
VERIFY_IS_EQUAL(static_cast<Index>(reduction_axis[0]), 0);
VERIFY_IS_EQUAL(static_cast<Index>(reduction_axis[1]), 1);
VERIFY_IS_EQUAL(static_cast<Index>(reduction_axis[2]), 2);
VERIFY_IS_EQUAL(static_cast<Index>(reduction_axis[3]), 3);
typedef IndexList<type2index<0>, int, type2index<2>, int> ReductionIndices;
ReductionIndices reduction_indices;
reduction_indices.set(1, 1);
reduction_indices.set(3, 3);
EIGEN_STATIC_ASSERT((internal::array_get<0>(reduction_indices) == 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((internal::array_get<2>(reduction_indices) == 2), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((internal::index_known_statically<ReductionIndices>(0) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((internal::index_known_statically<ReductionIndices>(2) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((internal::index_statically_eq<ReductionIndices>(0, 0) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((internal::index_statically_eq<ReductionIndices>(2, 2) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
#if 0
EIGEN_STATIC_ASSERT((internal::all_indices_known_statically<ReductionIndices>() == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((internal::indices_statically_known_to_increase<ReductionIndices>() == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
#endif
typedef IndexList<type2index<0>, type2index<1>, type2index<2>, type2index<3>> ReductionList;
ReductionList reduction_list;
EIGEN_STATIC_ASSERT((internal::index_statically_eq<ReductionList>(0, 0) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((internal::index_statically_eq<ReductionList>(1, 1) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((internal::index_statically_eq<ReductionList>(2, 2) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((internal::index_statically_eq<ReductionList>(3, 3) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
#if 0
EIGEN_STATIC_ASSERT((internal::all_indices_known_statically<ReductionList>() == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((internal::indices_statically_known_to_increase<ReductionList>() == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
#endif
Tensor<float, 0> result1 = tensor.sum(reduction_axis);
Tensor<float, 0> result2 = tensor.sum(reduction_indices);
Tensor<float, 0> result3 = tensor.sum(reduction_list);
float expected = 0.0f;
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 5; ++k) {
for (int l = 0; l < 7; ++l) {
expected += tensor(i,j,k,l);
}
}
}
}
VERIFY_IS_APPROX(result1(), expected);
VERIFY_IS_APPROX(result2(), expected);
VERIFY_IS_APPROX(result3(), expected);
}
static void test_dim_check()
{
Eigen::IndexList<Eigen::type2index<1>, int> dim1;
dim1.set(1, 2);
Eigen::IndexList<Eigen::type2index<1>, int> dim2;
dim2.set(1, 2);
VERIFY(dimensions_match(dim1, dim2));
}
#endif
EIGEN_DECLARE_TEST(cxx11_tensor_index_list)
{
#ifdef EIGEN_HAS_INDEX_LIST
CALL_SUBTEST(test_static_index_list());
CALL_SUBTEST(test_type2index_list());
CALL_SUBTEST(test_type2indexpair_list());
CALL_SUBTEST(test_dynamic_index_list());
CALL_SUBTEST(test_mixed_index_list());
CALL_SUBTEST(test_dim_check());
#endif
}

View File

@@ -0,0 +1,81 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2015 Ke Yang <yangke@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
#include <Eigen/CXX11/Tensor>
using Eigen::Tensor;
template<int DataLayout>
static void test_simple_inflation()
{
Tensor<float, 4, DataLayout> tensor(2,3,5,7);
tensor.setRandom();
array<ptrdiff_t, 4> strides;
strides[0] = 1;
strides[1] = 1;
strides[2] = 1;
strides[3] = 1;
Tensor<float, 4, DataLayout> no_stride;
no_stride = tensor.inflate(strides);
VERIFY_IS_EQUAL(no_stride.dimension(0), 2);
VERIFY_IS_EQUAL(no_stride.dimension(1), 3);
VERIFY_IS_EQUAL(no_stride.dimension(2), 5);
VERIFY_IS_EQUAL(no_stride.dimension(3), 7);
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 5; ++k) {
for (int l = 0; l < 7; ++l) {
VERIFY_IS_EQUAL(tensor(i,j,k,l), no_stride(i,j,k,l));
}
}
}
}
strides[0] = 2;
strides[1] = 4;
strides[2] = 2;
strides[3] = 3;
Tensor<float, 4, DataLayout> inflated;
inflated = tensor.inflate(strides);
VERIFY_IS_EQUAL(inflated.dimension(0), 3);
VERIFY_IS_EQUAL(inflated.dimension(1), 9);
VERIFY_IS_EQUAL(inflated.dimension(2), 9);
VERIFY_IS_EQUAL(inflated.dimension(3), 19);
for (int i = 0; i < 3; ++i) {
for (int j = 0; j < 9; ++j) {
for (int k = 0; k < 9; ++k) {
for (int l = 0; l < 19; ++l) {
if (i % 2 == 0 &&
j % 4 == 0 &&
k % 2 == 0 &&
l % 3 == 0) {
VERIFY_IS_EQUAL(inflated(i,j,k,l),
tensor(i/2, j/4, k/2, l/3));
} else {
VERIFY_IS_EQUAL(0, inflated(i,j,k,l));
}
}
}
}
}
}
EIGEN_DECLARE_TEST(cxx11_tensor_inflation)
{
CALL_SUBTEST(test_simple_inflation<ColMajor>());
CALL_SUBTEST(test_simple_inflation<RowMajor>());
}

View File

@@ -0,0 +1,136 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2016
// Mehdi Goli Codeplay Software Ltd.
// Ralph Potter Codeplay Software Ltd.
// Luke Iwanski Codeplay Software Ltd.
// Contact: <eigen@codeplay.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#define EIGEN_TEST_NO_LONGDOUBLE
#define EIGEN_TEST_NO_COMPLEX
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
#define EIGEN_USE_SYCL
#include "main.h"
#include <unsupported/Eigen/CXX11/Tensor>
using Eigen::Tensor;
// Inflation Definition for each dimension the inflated val would be
//((dim-1)*strid[dim] +1)
// for 1 dimension vector of size 3 with value (4,4,4) with the inflated stride value of 3 would be changed to
// tensor of size (2*3) +1 = 7 with the value of
// (4, 0, 0, 4, 0, 0, 4).
template <typename DataType, int DataLayout, typename IndexType>
void test_simple_inflation_sycl(const Eigen::SyclDevice &sycl_device) {
IndexType sizeDim1 = 2;
IndexType sizeDim2 = 3;
IndexType sizeDim3 = 5;
IndexType sizeDim4 = 7;
array<IndexType, 4> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}};
Tensor<DataType, 4, DataLayout,IndexType> tensor(tensorRange);
Tensor<DataType, 4, DataLayout,IndexType> no_stride(tensorRange);
tensor.setRandom();
array<IndexType, 4> strides;
strides[0] = 1;
strides[1] = 1;
strides[2] = 1;
strides[3] = 1;
const size_t tensorBuffSize =tensor.size()*sizeof(DataType);
DataType* gpu_data_tensor = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize));
DataType* gpu_data_no_stride = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize));
TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_tensor(gpu_data_tensor, tensorRange);
TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_no_stride(gpu_data_no_stride, tensorRange);
sycl_device.memcpyHostToDevice(gpu_data_tensor, tensor.data(), tensorBuffSize);
gpu_no_stride.device(sycl_device)=gpu_tensor.inflate(strides);
sycl_device.memcpyDeviceToHost(no_stride.data(), gpu_data_no_stride, tensorBuffSize);
VERIFY_IS_EQUAL(no_stride.dimension(0), sizeDim1);
VERIFY_IS_EQUAL(no_stride.dimension(1), sizeDim2);
VERIFY_IS_EQUAL(no_stride.dimension(2), sizeDim3);
VERIFY_IS_EQUAL(no_stride.dimension(3), sizeDim4);
for (IndexType i = 0; i < 2; ++i) {
for (IndexType j = 0; j < 3; ++j) {
for (IndexType k = 0; k < 5; ++k) {
for (IndexType l = 0; l < 7; ++l) {
VERIFY_IS_EQUAL(tensor(i,j,k,l), no_stride(i,j,k,l));
}
}
}
}
strides[0] = 2;
strides[1] = 4;
strides[2] = 2;
strides[3] = 3;
IndexType inflatedSizeDim1 = 3;
IndexType inflatedSizeDim2 = 9;
IndexType inflatedSizeDim3 = 9;
IndexType inflatedSizeDim4 = 19;
array<IndexType, 4> inflatedTensorRange = {{inflatedSizeDim1, inflatedSizeDim2, inflatedSizeDim3, inflatedSizeDim4}};
Tensor<DataType, 4, DataLayout, IndexType> inflated(inflatedTensorRange);
const size_t inflatedTensorBuffSize =inflated.size()*sizeof(DataType);
DataType* gpu_data_inflated = static_cast<DataType*>(sycl_device.allocate(inflatedTensorBuffSize));
TensorMap<Tensor<DataType, 4, DataLayout, IndexType>> gpu_inflated(gpu_data_inflated, inflatedTensorRange);
gpu_inflated.device(sycl_device)=gpu_tensor.inflate(strides);
sycl_device.memcpyDeviceToHost(inflated.data(), gpu_data_inflated, inflatedTensorBuffSize);
VERIFY_IS_EQUAL(inflated.dimension(0), inflatedSizeDim1);
VERIFY_IS_EQUAL(inflated.dimension(1), inflatedSizeDim2);
VERIFY_IS_EQUAL(inflated.dimension(2), inflatedSizeDim3);
VERIFY_IS_EQUAL(inflated.dimension(3), inflatedSizeDim4);
for (IndexType i = 0; i < inflatedSizeDim1; ++i) {
for (IndexType j = 0; j < inflatedSizeDim2; ++j) {
for (IndexType k = 0; k < inflatedSizeDim3; ++k) {
for (IndexType l = 0; l < inflatedSizeDim4; ++l) {
if (i % strides[0] == 0 &&
j % strides[1] == 0 &&
k % strides[2] == 0 &&
l % strides[3] == 0) {
VERIFY_IS_EQUAL(inflated(i,j,k,l),
tensor(i/strides[0], j/strides[1], k/strides[2], l/strides[3]));
} else {
VERIFY_IS_EQUAL(0, inflated(i,j,k,l));
}
}
}
}
}
sycl_device.deallocate(gpu_data_tensor);
sycl_device.deallocate(gpu_data_no_stride);
sycl_device.deallocate(gpu_data_inflated);
}
template<typename DataType, typename dev_Selector> void sycl_inflation_test_per_device(dev_Selector s){
QueueInterface queueInterface(s);
auto sycl_device = Eigen::SyclDevice(&queueInterface);
test_simple_inflation_sycl<DataType, RowMajor, int64_t>(sycl_device);
test_simple_inflation_sycl<DataType, ColMajor, int64_t>(sycl_device);
}
EIGEN_DECLARE_TEST(cxx11_tensor_inflation_sycl)
{
for (const auto& device :Eigen::get_sycl_supported_devices()) {
CALL_SUBTEST(sycl_inflation_test_per_device<float>(device));
}
}

View File

@@ -0,0 +1,147 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2014-2015 Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
#include <Eigen/CXX11/Tensor>
void test_signed_32bit()
{
// Divide by one
const Eigen::internal::TensorIntDivisor<int32_t, false> div_by_one(1);
for (int32_t j = 0; j < 25000; ++j) {
const int32_t fast_div = j / div_by_one;
const int32_t slow_div = j / 1;
VERIFY_IS_EQUAL(fast_div, slow_div);
}
// Standard divide by 2 or more
for (int32_t i = 2; i < 25000; ++i) {
const Eigen::internal::TensorIntDivisor<int32_t, false> div(i);
for (int32_t j = 0; j < 25000; ++j) {
const int32_t fast_div = j / div;
const int32_t slow_div = j / i;
VERIFY_IS_EQUAL(fast_div, slow_div);
}
}
// Optimized divide by 2 or more
for (int32_t i = 2; i < 25000; ++i) {
const Eigen::internal::TensorIntDivisor<int32_t, true> div(i);
for (int32_t j = 0; j < 25000; ++j) {
const int32_t fast_div = j / div;
const int32_t slow_div = j / i;
VERIFY_IS_EQUAL(fast_div, slow_div);
}
}
}
void test_unsigned_32bit()
{
for (uint32_t i = 1; i < 25000; ++i) {
const Eigen::internal::TensorIntDivisor<uint32_t> div(i);
for (uint32_t j = 0; j < 25000; ++j) {
const uint32_t fast_div = j / div;
const uint32_t slow_div = j / i;
VERIFY_IS_EQUAL(fast_div, slow_div);
}
}
}
void test_signed_64bit()
{
for (int64_t i = 1; i < 25000; ++i) {
const Eigen::internal::TensorIntDivisor<int64_t> div(i);
for (int64_t j = 0; j < 25000; ++j) {
const int64_t fast_div = j / div;
const int64_t slow_div = j / i;
VERIFY_IS_EQUAL(fast_div, slow_div);
}
}
}
void test_unsigned_64bit()
{
for (uint64_t i = 1; i < 25000; ++i) {
const Eigen::internal::TensorIntDivisor<uint64_t> div(i);
for (uint64_t j = 0; j < 25000; ++j) {
const uint64_t fast_div = j / div;
const uint64_t slow_div = j / i;
VERIFY_IS_EQUAL(fast_div, slow_div);
}
}
}
void test_powers_32bit() {
for (int expon = 1; expon < 31; expon++) {
int32_t div = (1 << expon);
for (int num_expon = 0; num_expon < 32; num_expon++) {
int32_t start_num = (1 << num_expon) - 100;
int32_t end_num = (1 << num_expon) + 100;
if (start_num < 0)
start_num = 0;
for (int32_t num = start_num; num < end_num; num++) {
Eigen::internal::TensorIntDivisor<int32_t> divider =
Eigen::internal::TensorIntDivisor<int32_t>(div);
int32_t result = num/div;
int32_t result_op = divider.divide(num);
VERIFY_IS_EQUAL(result_op, result);
}
}
}
}
void test_powers_64bit() {
for (int expon = 0; expon < 63; expon++) {
int64_t div = (1ull << expon);
for (int num_expon = 0; num_expon < 63; num_expon++) {
int64_t start_num = (1ull << num_expon) - 10;
int64_t end_num = (1ull << num_expon) + 10;
if (start_num < 0)
start_num = 0;
for (int64_t num = start_num; num < end_num; num++) {
Eigen::internal::TensorIntDivisor<int64_t> divider(div);
int64_t result = num/div;
int64_t result_op = divider.divide(num);
VERIFY_IS_EQUAL(result_op, result);
}
}
}
}
void test_specific() {
// A particular combination that was previously failing
int64_t div = 209715200;
int64_t num = 3238002688ll;
Eigen::internal::TensorIntDivisor<int64_t> divider(div);
int64_t result = num/div;
int64_t result_op = divider.divide(num);
VERIFY_IS_EQUAL(result, result_op);
}
EIGEN_DECLARE_TEST(cxx11_tensor_intdiv)
{
CALL_SUBTEST_1(test_signed_32bit());
CALL_SUBTEST_2(test_unsigned_32bit());
CALL_SUBTEST_3(test_signed_64bit());
CALL_SUBTEST_4(test_unsigned_64bit());
CALL_SUBTEST_5(test_powers_32bit());
CALL_SUBTEST_6(test_powers_64bit());
CALL_SUBTEST_7(test_specific());
}

View File

@@ -0,0 +1,136 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
#include <sstream>
#include <string>
#include <Eigen/CXX11/Tensor>
template<int DataLayout>
static void test_output_0d()
{
Tensor<int, 0, DataLayout> tensor;
tensor() = 123;
std::stringstream os;
os << tensor;
std::string expected("123");
VERIFY_IS_EQUAL(std::string(os.str()), expected);
}
template<int DataLayout>
static void test_output_1d()
{
Tensor<int, 1, DataLayout> tensor(5);
for (int i = 0; i < 5; ++i) {
tensor(i) = i;
}
std::stringstream os;
os << tensor;
std::string expected("0\n1\n2\n3\n4");
VERIFY_IS_EQUAL(std::string(os.str()), expected);
Eigen::Tensor<double,1,DataLayout> empty_tensor(0);
std::stringstream empty_os;
empty_os << empty_tensor;
std::string empty_string;
VERIFY_IS_EQUAL(std::string(empty_os.str()), empty_string);
}
template<int DataLayout>
static void test_output_2d()
{
Tensor<int, 2, DataLayout> tensor(5, 3);
for (int i = 0; i < 5; ++i) {
for (int j = 0; j < 3; ++j) {
tensor(i, j) = i*j;
}
}
std::stringstream os;
os << tensor;
std::string expected("0 0 0\n0 1 2\n0 2 4\n0 3 6\n0 4 8");
VERIFY_IS_EQUAL(std::string(os.str()), expected);
}
template<int DataLayout>
static void test_output_expr()
{
Tensor<int, 1, DataLayout> tensor1(5);
Tensor<int, 1, DataLayout> tensor2(5);
for (int i = 0; i < 5; ++i) {
tensor1(i) = i;
tensor2(i) = 7;
}
std::stringstream os;
os << tensor1 + tensor2;
std::string expected(" 7\n 8\n 9\n10\n11");
VERIFY_IS_EQUAL(std::string(os.str()), expected);
}
template<int DataLayout>
static void test_output_string()
{
Tensor<std::string, 2, DataLayout> tensor(5, 3);
tensor.setConstant(std::string("foo"));
std::cout << tensor << std::endl;
std::stringstream os;
os << tensor;
std::string expected("foo foo foo\nfoo foo foo\nfoo foo foo\nfoo foo foo\nfoo foo foo");
VERIFY_IS_EQUAL(std::string(os.str()), expected);
}
template<int DataLayout>
static void test_output_const()
{
Tensor<int, 1, DataLayout> tensor(5);
for (int i = 0; i < 5; ++i) {
tensor(i) = i;
}
TensorMap<Tensor<const int, 1, DataLayout> > tensor_map(tensor.data(), 5);
std::stringstream os;
os << tensor_map;
std::string expected("0\n1\n2\n3\n4");
VERIFY_IS_EQUAL(std::string(os.str()), expected);
}
EIGEN_DECLARE_TEST(cxx11_tensor_io)
{
CALL_SUBTEST(test_output_0d<ColMajor>());
CALL_SUBTEST(test_output_0d<RowMajor>());
CALL_SUBTEST(test_output_1d<ColMajor>());
CALL_SUBTEST(test_output_1d<RowMajor>());
CALL_SUBTEST(test_output_2d<ColMajor>());
CALL_SUBTEST(test_output_2d<RowMajor>());
CALL_SUBTEST(test_output_expr<ColMajor>());
CALL_SUBTEST(test_output_expr<RowMajor>());
CALL_SUBTEST(test_output_string<ColMajor>());
CALL_SUBTEST(test_output_string<RowMajor>());
CALL_SUBTEST(test_output_const<ColMajor>());
CALL_SUBTEST(test_output_const<RowMajor>());
}

View File

@@ -0,0 +1,61 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
#include <Eigen/CXX11/Tensor>
using Eigen::Tensor;
static void test_simple_swap()
{
Tensor<float, 3, ColMajor> tensor(2,3,7);
tensor.setRandom();
Tensor<float, 3, RowMajor> tensor2 = tensor.swap_layout();
VERIFY_IS_EQUAL(tensor.dimension(0), tensor2.dimension(2));
VERIFY_IS_EQUAL(tensor.dimension(1), tensor2.dimension(1));
VERIFY_IS_EQUAL(tensor.dimension(2), tensor2.dimension(0));
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 7; ++k) {
VERIFY_IS_EQUAL(tensor(i,j,k), tensor2(k,j,i));
}
}
}
}
static void test_swap_as_lvalue()
{
Tensor<float, 3, ColMajor> tensor(2,3,7);
tensor.setRandom();
Tensor<float, 3, RowMajor> tensor2(7,3,2);
tensor2.swap_layout() = tensor;
VERIFY_IS_EQUAL(tensor.dimension(0), tensor2.dimension(2));
VERIFY_IS_EQUAL(tensor.dimension(1), tensor2.dimension(1));
VERIFY_IS_EQUAL(tensor.dimension(2), tensor2.dimension(0));
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 7; ++k) {
VERIFY_IS_EQUAL(tensor(i,j,k), tensor2(k,j,i));
}
}
}
}
EIGEN_DECLARE_TEST(cxx11_tensor_layout_swap)
{
CALL_SUBTEST(test_simple_swap());
CALL_SUBTEST(test_swap_as_lvalue());
}

View File

@@ -0,0 +1,126 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2016
// Mehdi Goli Codeplay Software Ltd.
// Ralph Potter Codeplay Software Ltd.
// Luke Iwanski Codeplay Software Ltd.
// Contact: <eigen@codeplay.com>
// Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#define EIGEN_TEST_NO_LONGDOUBLE
#define EIGEN_TEST_NO_COMPLEX
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
#define EIGEN_USE_SYCL
#include "main.h"
#include <Eigen/CXX11/Tensor>
using Eigen::Tensor;
template <typename DataType, typename IndexType>
static void test_simple_swap_sycl(const Eigen::SyclDevice& sycl_device)
{
IndexType sizeDim1 = 2;
IndexType sizeDim2 = 3;
IndexType sizeDim3 = 7;
array<IndexType, 3> tensorColRange = {{sizeDim1, sizeDim2, sizeDim3}};
array<IndexType, 3> tensorRowRange = {{sizeDim3, sizeDim2, sizeDim1}};
Tensor<DataType, 3, ColMajor, IndexType> tensor1(tensorColRange);
Tensor<DataType, 3, RowMajor, IndexType> tensor2(tensorRowRange);
tensor1.setRandom();
DataType* gpu_data1 = static_cast<DataType*>(sycl_device.allocate(tensor1.size()*sizeof(DataType)));
DataType* gpu_data2 = static_cast<DataType*>(sycl_device.allocate(tensor2.size()*sizeof(DataType)));
TensorMap<Tensor<DataType, 3, ColMajor, IndexType>> gpu1(gpu_data1, tensorColRange);
TensorMap<Tensor<DataType, 3, RowMajor, IndexType>> gpu2(gpu_data2, tensorRowRange);
sycl_device.memcpyHostToDevice(gpu_data1, tensor1.data(),(tensor1.size())*sizeof(DataType));
gpu2.device(sycl_device)=gpu1.swap_layout();
sycl_device.memcpyDeviceToHost(tensor2.data(), gpu_data2,(tensor2.size())*sizeof(DataType));
// Tensor<float, 3, ColMajor> tensor(2,3,7);
//tensor.setRandom();
// Tensor<float, 3, RowMajor> tensor2 = tensor.swap_layout();
VERIFY_IS_EQUAL(tensor1.dimension(0), tensor2.dimension(2));
VERIFY_IS_EQUAL(tensor1.dimension(1), tensor2.dimension(1));
VERIFY_IS_EQUAL(tensor1.dimension(2), tensor2.dimension(0));
for (IndexType i = 0; i < 2; ++i) {
for (IndexType j = 0; j < 3; ++j) {
for (IndexType k = 0; k < 7; ++k) {
VERIFY_IS_EQUAL(tensor1(i,j,k), tensor2(k,j,i));
}
}
}
sycl_device.deallocate(gpu_data1);
sycl_device.deallocate(gpu_data2);
}
template <typename DataType, typename IndexType>
static void test_swap_as_lvalue_sycl(const Eigen::SyclDevice& sycl_device)
{
IndexType sizeDim1 = 2;
IndexType sizeDim2 = 3;
IndexType sizeDim3 = 7;
array<IndexType, 3> tensorColRange = {{sizeDim1, sizeDim2, sizeDim3}};
array<IndexType, 3> tensorRowRange = {{sizeDim3, sizeDim2, sizeDim1}};
Tensor<DataType, 3, ColMajor, IndexType> tensor1(tensorColRange);
Tensor<DataType, 3, RowMajor, IndexType> tensor2(tensorRowRange);
tensor1.setRandom();
DataType* gpu_data1 = static_cast<DataType*>(sycl_device.allocate(tensor1.size()*sizeof(DataType)));
DataType* gpu_data2 = static_cast<DataType*>(sycl_device.allocate(tensor2.size()*sizeof(DataType)));
TensorMap<Tensor<DataType, 3, ColMajor, IndexType>> gpu1(gpu_data1, tensorColRange);
TensorMap<Tensor<DataType, 3, RowMajor, IndexType>> gpu2(gpu_data2, tensorRowRange);
sycl_device.memcpyHostToDevice(gpu_data1, tensor1.data(),(tensor1.size())*sizeof(DataType));
gpu2.swap_layout().device(sycl_device)=gpu1;
sycl_device.memcpyDeviceToHost(tensor2.data(), gpu_data2,(tensor2.size())*sizeof(DataType));
// Tensor<float, 3, ColMajor> tensor(2,3,7);
// tensor.setRandom();
//Tensor<float, 3, RowMajor> tensor2(7,3,2);
// tensor2.swap_layout() = tensor;
VERIFY_IS_EQUAL(tensor1.dimension(0), tensor2.dimension(2));
VERIFY_IS_EQUAL(tensor1.dimension(1), tensor2.dimension(1));
VERIFY_IS_EQUAL(tensor1.dimension(2), tensor2.dimension(0));
for (IndexType i = 0; i < 2; ++i) {
for (IndexType j = 0; j < 3; ++j) {
for (IndexType k = 0; k < 7; ++k) {
VERIFY_IS_EQUAL(tensor1(i,j,k), tensor2(k,j,i));
}
}
}
sycl_device.deallocate(gpu_data1);
sycl_device.deallocate(gpu_data2);
}
template<typename DataType, typename dev_Selector> void sycl_tensor_layout_swap_test_per_device(dev_Selector s){
QueueInterface queueInterface(s);
auto sycl_device = Eigen::SyclDevice(&queueInterface);
test_simple_swap_sycl<DataType, int64_t>(sycl_device);
test_swap_as_lvalue_sycl<DataType, int64_t>(sycl_device);
}
EIGEN_DECLARE_TEST(cxx11_tensor_layout_swap_sycl)
{
for (const auto& device :Eigen::get_sycl_supported_devices()) {
CALL_SUBTEST(sycl_tensor_layout_swap_test_per_device<float>(device));
}
}

View File

@@ -0,0 +1,42 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
#include <Eigen/CXX11/Tensor>
using Eigen::Tensor;
using Eigen::RowMajor;
static void test_compound_assignment()
{
Tensor<float, 3> mat1(2,3,7);
Tensor<float, 3> mat2(2,3,7);
Tensor<float, 3> mat3(2,3,7);
mat1.setRandom();
mat2.setRandom();
mat3 = mat1;
mat3 += mat2;
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 7; ++k) {
VERIFY_IS_APPROX(mat3(i,j,k), mat1(i,j,k) + mat2(i,j,k));
}
}
}
}
EIGEN_DECLARE_TEST(cxx11_tensor_lvalue)
{
CALL_SUBTEST(test_compound_assignment());
}

View File

@@ -0,0 +1,327 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
#include <Eigen/CXX11/Tensor>
using Eigen::Tensor;
using Eigen::RowMajor;
static void test_0d()
{
Tensor<int, 0> scalar1;
Tensor<int, 0, RowMajor> scalar2;
TensorMap<const Tensor<int, 0> > scalar3(scalar1.data());
TensorMap<const Tensor<int, 0, RowMajor> > scalar4(scalar2.data());
scalar1() = 7;
scalar2() = 13;
VERIFY_IS_EQUAL(scalar1.rank(), 0);
VERIFY_IS_EQUAL(scalar1.size(), 1);
VERIFY_IS_EQUAL(scalar3(), 7);
VERIFY_IS_EQUAL(scalar4(), 13);
}
static void test_1d()
{
Tensor<int, 1> vec1(6);
Tensor<int, 1, RowMajor> vec2(6);
TensorMap<const Tensor<int, 1> > vec3(vec1.data(), 6);
TensorMap<const Tensor<int, 1, RowMajor> > vec4(vec2.data(), 6);
vec1(0) = 4; vec2(0) = 0;
vec1(1) = 8; vec2(1) = 1;
vec1(2) = 15; vec2(2) = 2;
vec1(3) = 16; vec2(3) = 3;
vec1(4) = 23; vec2(4) = 4;
vec1(5) = 42; vec2(5) = 5;
VERIFY_IS_EQUAL(vec1.rank(), 1);
VERIFY_IS_EQUAL(vec1.size(), 6);
VERIFY_IS_EQUAL(vec1.dimension(0), 6);
VERIFY_IS_EQUAL(vec3(0), 4);
VERIFY_IS_EQUAL(vec3(1), 8);
VERIFY_IS_EQUAL(vec3(2), 15);
VERIFY_IS_EQUAL(vec3(3), 16);
VERIFY_IS_EQUAL(vec3(4), 23);
VERIFY_IS_EQUAL(vec3(5), 42);
VERIFY_IS_EQUAL(vec4(0), 0);
VERIFY_IS_EQUAL(vec4(1), 1);
VERIFY_IS_EQUAL(vec4(2), 2);
VERIFY_IS_EQUAL(vec4(3), 3);
VERIFY_IS_EQUAL(vec4(4), 4);
VERIFY_IS_EQUAL(vec4(5), 5);
}
static void test_2d()
{
Tensor<int, 2> mat1(2,3);
Tensor<int, 2, RowMajor> mat2(2,3);
mat1(0,0) = 0;
mat1(0,1) = 1;
mat1(0,2) = 2;
mat1(1,0) = 3;
mat1(1,1) = 4;
mat1(1,2) = 5;
mat2(0,0) = 0;
mat2(0,1) = 1;
mat2(0,2) = 2;
mat2(1,0) = 3;
mat2(1,1) = 4;
mat2(1,2) = 5;
TensorMap<const Tensor<int, 2> > mat3(mat1.data(), 2, 3);
TensorMap<const Tensor<int, 2, RowMajor> > mat4(mat2.data(), 2, 3);
VERIFY_IS_EQUAL(mat3.rank(), 2);
VERIFY_IS_EQUAL(mat3.size(), 6);
VERIFY_IS_EQUAL(mat3.dimension(0), 2);
VERIFY_IS_EQUAL(mat3.dimension(1), 3);
VERIFY_IS_EQUAL(mat4.rank(), 2);
VERIFY_IS_EQUAL(mat4.size(), 6);
VERIFY_IS_EQUAL(mat4.dimension(0), 2);
VERIFY_IS_EQUAL(mat4.dimension(1), 3);
VERIFY_IS_EQUAL(mat3(0,0), 0);
VERIFY_IS_EQUAL(mat3(0,1), 1);
VERIFY_IS_EQUAL(mat3(0,2), 2);
VERIFY_IS_EQUAL(mat3(1,0), 3);
VERIFY_IS_EQUAL(mat3(1,1), 4);
VERIFY_IS_EQUAL(mat3(1,2), 5);
VERIFY_IS_EQUAL(mat4(0,0), 0);
VERIFY_IS_EQUAL(mat4(0,1), 1);
VERIFY_IS_EQUAL(mat4(0,2), 2);
VERIFY_IS_EQUAL(mat4(1,0), 3);
VERIFY_IS_EQUAL(mat4(1,1), 4);
VERIFY_IS_EQUAL(mat4(1,2), 5);
}
static void test_3d()
{
Tensor<int, 3> mat1(2,3,7);
Tensor<int, 3, RowMajor> mat2(2,3,7);
int val = 0;
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 7; ++k) {
mat1(i,j,k) = val;
mat2(i,j,k) = val;
val++;
}
}
}
TensorMap<const Tensor<int, 3> > mat3(mat1.data(), 2, 3, 7);
TensorMap<const Tensor<int, 3, RowMajor> > mat4(mat2.data(), 2, 3, 7);
VERIFY_IS_EQUAL(mat3.rank(), 3);
VERIFY_IS_EQUAL(mat3.size(), 2*3*7);
VERIFY_IS_EQUAL(mat3.dimension(0), 2);
VERIFY_IS_EQUAL(mat3.dimension(1), 3);
VERIFY_IS_EQUAL(mat3.dimension(2), 7);
VERIFY_IS_EQUAL(mat4.rank(), 3);
VERIFY_IS_EQUAL(mat4.size(), 2*3*7);
VERIFY_IS_EQUAL(mat4.dimension(0), 2);
VERIFY_IS_EQUAL(mat4.dimension(1), 3);
VERIFY_IS_EQUAL(mat4.dimension(2), 7);
val = 0;
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 7; ++k) {
VERIFY_IS_EQUAL(mat3(i,j,k), val);
VERIFY_IS_EQUAL(mat4(i,j,k), val);
val++;
}
}
}
}
static void test_from_tensor()
{
Tensor<int, 3> mat1(2,3,7);
Tensor<int, 3, RowMajor> mat2(2,3,7);
int val = 0;
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 7; ++k) {
mat1(i,j,k) = val;
mat2(i,j,k) = val;
val++;
}
}
}
TensorMap<Tensor<int, 3> > mat3(mat1);
TensorMap<Tensor<int, 3, RowMajor> > mat4(mat2);
VERIFY_IS_EQUAL(mat3.rank(), 3);
VERIFY_IS_EQUAL(mat3.size(), 2*3*7);
VERIFY_IS_EQUAL(mat3.dimension(0), 2);
VERIFY_IS_EQUAL(mat3.dimension(1), 3);
VERIFY_IS_EQUAL(mat3.dimension(2), 7);
VERIFY_IS_EQUAL(mat4.rank(), 3);
VERIFY_IS_EQUAL(mat4.size(), 2*3*7);
VERIFY_IS_EQUAL(mat4.dimension(0), 2);
VERIFY_IS_EQUAL(mat4.dimension(1), 3);
VERIFY_IS_EQUAL(mat4.dimension(2), 7);
val = 0;
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 7; ++k) {
VERIFY_IS_EQUAL(mat3(i,j,k), val);
VERIFY_IS_EQUAL(mat4(i,j,k), val);
val++;
}
}
}
TensorFixedSize<int, Sizes<2,3,7> > mat5;
val = 0;
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 7; ++k) {
array<ptrdiff_t, 3> coords;
coords[0] = i;
coords[1] = j;
coords[2] = k;
mat5(coords) = val;
val++;
}
}
}
TensorMap<TensorFixedSize<int, Sizes<2,3,7> > > mat6(mat5);
VERIFY_IS_EQUAL(mat6.rank(), 3);
VERIFY_IS_EQUAL(mat6.size(), 2*3*7);
VERIFY_IS_EQUAL(mat6.dimension(0), 2);
VERIFY_IS_EQUAL(mat6.dimension(1), 3);
VERIFY_IS_EQUAL(mat6.dimension(2), 7);
val = 0;
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 7; ++k) {
VERIFY_IS_EQUAL(mat6(i,j,k), val);
val++;
}
}
}
}
static int f(const TensorMap<Tensor<int, 3> >& tensor) {
// Size<0> empty;
EIGEN_STATIC_ASSERT((internal::array_size<Sizes<> >::value == 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((internal::array_size<DSizes<int, 0> >::value == 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
Tensor<int, 0> result = tensor.sum();
return result();
}
static void test_casting()
{
Tensor<int, 3> tensor(2,3,7);
int val = 0;
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 7; ++k) {
tensor(i,j,k) = val;
val++;
}
}
}
TensorMap<Tensor<int, 3> > map(tensor);
int sum1 = f(map);
int sum2 = f(tensor);
VERIFY_IS_EQUAL(sum1, sum2);
VERIFY_IS_EQUAL(sum1, 861);
}
template<typename T>
static const T& add_const(T& value) {
return value;
}
static void test_0d_const_tensor()
{
Tensor<int, 0> scalar1;
Tensor<int, 0, RowMajor> scalar2;
TensorMap<const Tensor<int, 0> > scalar3(add_const(scalar1).data());
TensorMap<const Tensor<int, 0, RowMajor> > scalar4(add_const(scalar2).data());
scalar1() = 7;
scalar2() = 13;
VERIFY_IS_EQUAL(scalar1.rank(), 0);
VERIFY_IS_EQUAL(scalar1.size(), 1);
VERIFY_IS_EQUAL(scalar3(), 7);
VERIFY_IS_EQUAL(scalar4(), 13);
}
static void test_0d_const_tensor_map()
{
Tensor<int, 0> scalar1;
Tensor<int, 0, RowMajor> scalar2;
const TensorMap<Tensor<int, 0> > scalar3(scalar1.data());
const TensorMap<Tensor<int, 0, RowMajor> > scalar4(scalar2.data());
// Although TensorMap is constant, we still can write to the underlying
// storage, because we map over non-constant Tensor.
scalar3() = 7;
scalar4() = 13;
VERIFY_IS_EQUAL(scalar1(), 7);
VERIFY_IS_EQUAL(scalar2(), 13);
// Pointer to the underlying storage is also non-const.
scalar3.data()[0] = 8;
scalar4.data()[0] = 14;
VERIFY_IS_EQUAL(scalar1(), 8);
VERIFY_IS_EQUAL(scalar2(), 14);
}
EIGEN_DECLARE_TEST(cxx11_tensor_map)
{
CALL_SUBTEST(test_0d());
CALL_SUBTEST(test_1d());
CALL_SUBTEST(test_2d());
CALL_SUBTEST(test_3d());
CALL_SUBTEST(test_from_tensor());
CALL_SUBTEST(test_casting());
CALL_SUBTEST(test_0d_const_tensor());
CALL_SUBTEST(test_0d_const_tensor_map());
}

View File

@@ -0,0 +1,46 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
#include <Eigen/CXX11/Tensor>
using Eigen::Tensor;
using Eigen::RowMajor;
static void test_tanh()
{
Tensor<float, 1> vec1(6);
vec1.setRandom();
Tensor<float, 1> vec2 = vec1.tanh();
for (int i = 0; i < 6; ++i) {
VERIFY_IS_APPROX(vec2(i), tanhf(vec1(i)));
}
}
static void test_sigmoid()
{
Tensor<float, 1> vec1(6);
vec1.setRandom();
Tensor<float, 1> vec2 = vec1.sigmoid();
for (int i = 0; i < 6; ++i) {
VERIFY_IS_APPROX(vec2(i), 1.0f / (1.0f + std::exp(-vec1(i))));
}
}
EIGEN_DECLARE_TEST(cxx11_tensor_math)
{
CALL_SUBTEST(test_tanh());
CALL_SUBTEST(test_sigmoid());
}

View File

@@ -0,0 +1,105 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2016
// Mehdi Goli Codeplay Software Ltd.
// Ralph Potter Codeplay Software Ltd.
// Luke Iwanski Codeplay Software Ltd.
// Contact: <eigen@codeplay.com>
// Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#define EIGEN_TEST_NO_LONGDOUBLE
#define EIGEN_TEST_NO_COMPLEX
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
#define EIGEN_USE_SYCL
#include "main.h"
#include <unsupported/Eigen/CXX11/Tensor>
using Eigen::array;
using Eigen::SyclDevice;
using Eigen::Tensor;
using Eigen::TensorMap;
using Eigen::Tensor;
using Eigen::RowMajor;
template <typename DataType, int DataLayout, typename IndexType>
static void test_tanh_sycl(const Eigen::SyclDevice &sycl_device)
{
IndexType sizeDim1 = 4;
IndexType sizeDim2 = 4;
IndexType sizeDim3 = 1;
array<IndexType, 3> tensorRange = {{sizeDim1, sizeDim2, sizeDim3}};
Tensor<DataType, 3, DataLayout, IndexType> in(tensorRange);
Tensor<DataType, 3, DataLayout, IndexType> out(tensorRange);
Tensor<DataType, 3, DataLayout, IndexType> out_cpu(tensorRange);
in = in.random();
DataType* gpu_data1 = static_cast<DataType*>(sycl_device.allocate(in.size()*sizeof(DataType)));
DataType* gpu_data2 = static_cast<DataType*>(sycl_device.allocate(out.size()*sizeof(DataType)));
TensorMap<Tensor<DataType, 3, DataLayout, IndexType>> gpu1(gpu_data1, tensorRange);
TensorMap<Tensor<DataType, 3, DataLayout, IndexType>> gpu2(gpu_data2, tensorRange);
sycl_device.memcpyHostToDevice(gpu_data1, in.data(),(in.size())*sizeof(DataType));
gpu2.device(sycl_device) = gpu1.tanh();
sycl_device.memcpyDeviceToHost(out.data(), gpu_data2,(out.size())*sizeof(DataType));
out_cpu=in.tanh();
for (int i = 0; i < in.size(); ++i) {
VERIFY_IS_APPROX(out(i), out_cpu(i));
}
}
template <typename DataType, int DataLayout, typename IndexType>
static void test_sigmoid_sycl(const Eigen::SyclDevice &sycl_device)
{
IndexType sizeDim1 = 4;
IndexType sizeDim2 = 4;
IndexType sizeDim3 = 1;
array<IndexType, 3> tensorRange = {{sizeDim1, sizeDim2, sizeDim3}};
Tensor<DataType, 3, DataLayout, IndexType> in(tensorRange);
Tensor<DataType, 3, DataLayout, IndexType> out(tensorRange);
Tensor<DataType, 3, DataLayout, IndexType> out_cpu(tensorRange);
in = in.random();
DataType* gpu_data1 = static_cast<DataType*>(sycl_device.allocate(in.size()*sizeof(DataType)));
DataType* gpu_data2 = static_cast<DataType*>(sycl_device.allocate(out.size()*sizeof(DataType)));
TensorMap<Tensor<DataType, 3, DataLayout, IndexType>> gpu1(gpu_data1, tensorRange);
TensorMap<Tensor<DataType, 3, DataLayout, IndexType>> gpu2(gpu_data2, tensorRange);
sycl_device.memcpyHostToDevice(gpu_data1, in.data(),(in.size())*sizeof(DataType));
gpu2.device(sycl_device) = gpu1.sigmoid();
sycl_device.memcpyDeviceToHost(out.data(), gpu_data2,(out.size())*sizeof(DataType));
out_cpu=in.sigmoid();
for (int i = 0; i < in.size(); ++i) {
VERIFY_IS_APPROX(out(i), out_cpu(i));
}
}
template<typename DataType, typename dev_Selector> void sycl_computing_test_per_device(dev_Selector s){
QueueInterface queueInterface(s);
auto sycl_device = Eigen::SyclDevice(&queueInterface);
test_tanh_sycl<DataType, RowMajor, int64_t>(sycl_device);
test_tanh_sycl<DataType, ColMajor, int64_t>(sycl_device);
test_sigmoid_sycl<DataType, RowMajor, int64_t>(sycl_device);
test_sigmoid_sycl<DataType, ColMajor, int64_t>(sycl_device);
}
EIGEN_DECLARE_TEST(cxx11_tensor_math_sycl) {
for (const auto& device :Eigen::get_sycl_supported_devices()) {
CALL_SUBTEST(sycl_computing_test_per_device<float>(device));
}
}

View File

@@ -0,0 +1,53 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
#include <Eigen/CXX11/Tensor>
static void test_simple()
{
Tensor<float, 1, ColMajor> vec1(6);
Tensor<float, 1, ColMajor, int> vec2(6);
vec1(0) = 4.0; vec2(0) = 0.0;
vec1(1) = 8.0; vec2(1) = 1.0;
vec1(2) = 15.0; vec2(2) = 2.0;
vec1(3) = 16.0; vec2(3) = 3.0;
vec1(4) = 23.0; vec2(4) = 4.0;
vec1(5) = 42.0; vec2(5) = 5.0;
float data3[6];
TensorMap<Tensor<float, 1, ColMajor>> vec3(data3, 6);
vec3 = vec1.sqrt();
float data4[6];
TensorMap<Tensor<float, 1, ColMajor, int>> vec4(data4, 6);
vec4 = vec2.square();
VERIFY_IS_APPROX(vec3(0), sqrtf(4.0));
VERIFY_IS_APPROX(vec3(1), sqrtf(8.0));
VERIFY_IS_APPROX(vec3(2), sqrtf(15.0));
VERIFY_IS_APPROX(vec3(3), sqrtf(16.0));
VERIFY_IS_APPROX(vec3(4), sqrtf(23.0));
VERIFY_IS_APPROX(vec3(5), sqrtf(42.0));
VERIFY_IS_APPROX(vec4(0), 0.0f);
VERIFY_IS_APPROX(vec4(1), 1.0f);
VERIFY_IS_APPROX(vec4(2), 2.0f * 2.0f);
VERIFY_IS_APPROX(vec4(3), 3.0f * 3.0f);
VERIFY_IS_APPROX(vec4(4), 4.0f * 4.0f);
VERIFY_IS_APPROX(vec4(5), 5.0f * 5.0f);
}
EIGEN_DECLARE_TEST(cxx11_tensor_mixed_indices)
{
CALL_SUBTEST(test_simple());
}

View File

@@ -0,0 +1,565 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
#include <Eigen/CXX11/Tensor>
using Eigen::Tensor;
template<typename>
static void test_simple_reshape()
{
Tensor<float, 5> tensor1(2,3,1,7,1);
tensor1.setRandom();
Tensor<float, 3> tensor2(2,3,7);
Tensor<float, 2> tensor3(6,7);
Tensor<float, 2> tensor4(2,21);
Tensor<float, 3>::Dimensions dim1(2,3,7);
tensor2 = tensor1.reshape(dim1);
Tensor<float, 2>::Dimensions dim2(6,7);
tensor3 = tensor1.reshape(dim2);
Tensor<float, 2>::Dimensions dim3(2,21);
tensor4 = tensor1.reshape(dim1).reshape(dim3);
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 7; ++k) {
VERIFY_IS_EQUAL(tensor1(i,j,0,k,0), tensor2(i,j,k));
VERIFY_IS_EQUAL(tensor1(i,j,0,k,0), tensor3(i+2*j,k));
VERIFY_IS_EQUAL(tensor1(i,j,0,k,0), tensor4(i,j+3*k));
}
}
}
}
template <typename>
static void test_static_reshape() {
#if defined(EIGEN_HAS_INDEX_LIST)
using Eigen::type2index;
Tensor<float, 5> tensor(2, 3, 1, 7, 1);
tensor.setRandom();
// New dimensions: [2, 3, 7]
Eigen::IndexList<type2index<2>, type2index<3>, type2index<7>> dim;
Tensor<float, 3> reshaped = tensor.reshape(static_cast<Eigen::DSizes<ptrdiff_t,3>>(dim));
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 7; ++k) {
VERIFY_IS_EQUAL(tensor(i, j, 0, k, 0), reshaped(i, j, k));
}
}
}
#endif
}
template <typename>
static void test_reshape_in_expr() {
MatrixXf m1(2,3*5*7*11);
MatrixXf m2(3*5*7*11,13);
m1.setRandom();
m2.setRandom();
MatrixXf m3 = m1 * m2;
TensorMap<Tensor<float, 5>> tensor1(m1.data(), 2,3,5,7,11);
TensorMap<Tensor<float, 5>> tensor2(m2.data(), 3,5,7,11,13);
Tensor<float, 2>::Dimensions newDims1(2,3*5*7*11);
Tensor<float, 2>::Dimensions newDims2(3*5*7*11,13);
typedef Tensor<float, 1>::DimensionPair DimPair;
array<DimPair, 1> contract_along{{DimPair(1, 0)}};
Tensor<float, 2> tensor3(2,13);
tensor3 = tensor1.reshape(newDims1).contract(tensor2.reshape(newDims2), contract_along);
Map<MatrixXf> res(tensor3.data(), 2, 13);
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 13; ++j) {
VERIFY_IS_APPROX(res(i,j), m3(i,j));
}
}
}
template<typename>
static void test_reshape_as_lvalue()
{
Tensor<float, 3> tensor(2,3,7);
tensor.setRandom();
Tensor<float, 2> tensor2d(6,7);
Tensor<float, 3>::Dimensions dim(2,3,7);
tensor2d.reshape(dim) = tensor;
float scratch[2*3*1*7*1];
TensorMap<Tensor<float, 5>> tensor5d(scratch, 2,3,1,7,1);
tensor5d.reshape(dim).device(Eigen::DefaultDevice()) = tensor;
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 7; ++k) {
VERIFY_IS_EQUAL(tensor2d(i+2*j,k), tensor(i,j,k));
VERIFY_IS_EQUAL(tensor5d(i,j,0,k,0), tensor(i,j,k));
}
}
}
}
template<typename T, int DataLayout>
static void test_simple_slice()
{
Tensor<T, 5, DataLayout> tensor(2,3,5,7,11);
tensor.setRandom();
Tensor<T, 5, DataLayout> slice1(1,1,1,1,1);
Eigen::DSizes<ptrdiff_t, 5> indices(1,2,3,4,5);
Eigen::DSizes<ptrdiff_t, 5> sizes(1,1,1,1,1);
slice1 = tensor.slice(indices, sizes);
VERIFY_IS_EQUAL(slice1(0,0,0,0,0), tensor(1,2,3,4,5));
Tensor<T, 5, DataLayout> slice2(1,1,2,2,3);
Eigen::DSizes<ptrdiff_t, 5> indices2(1,1,3,4,5);
Eigen::DSizes<ptrdiff_t, 5> sizes2(1,1,2,2,3);
slice2 = tensor.slice(indices2, sizes2);
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 2; ++j) {
for (int k = 0; k < 3; ++k) {
VERIFY_IS_EQUAL(slice2(0,0,i,j,k), tensor(1,1,3+i,4+j,5+k));
}
}
}
}
template<typename T>
static void test_const_slice()
{
const T b[1] = {42};
TensorMap<Tensor<const T, 1> > m(b, 1);
DSizes<DenseIndex, 1> offsets;
offsets[0] = 0;
TensorRef<Tensor<const T, 1> > slice_ref(m.slice(offsets, m.dimensions()));
VERIFY_IS_EQUAL(slice_ref(0), 42);
}
template<typename T, int DataLayout>
static void test_slice_in_expr() {
typedef Matrix<T, Dynamic, Dynamic, DataLayout> Mtx;
Mtx m1(7,7);
Mtx m2(3,3);
m1.setRandom();
m2.setRandom();
Mtx m3 = m1.block(1, 2, 3, 3) * m2.block(0, 2, 3, 1);
TensorMap<Tensor<T, 2, DataLayout>> tensor1(m1.data(), 7, 7);
TensorMap<Tensor<T, 2, DataLayout>> tensor2(m2.data(), 3, 3);
Tensor<T, 2, DataLayout> tensor3(3,1);
typedef typename Tensor<T, 1>::DimensionPair DimPair;
array<DimPair, 1> contract_along{{DimPair(1, 0)}};
Eigen::DSizes<ptrdiff_t, 2> indices1(1,2);
Eigen::DSizes<ptrdiff_t, 2> sizes1(3,3);
Eigen::DSizes<ptrdiff_t, 2> indices2(0,2);
Eigen::DSizes<ptrdiff_t, 2> sizes2(3,1);
tensor3 = tensor1.slice(indices1, sizes1).contract(tensor2.slice(indices2, sizes2), contract_along);
Map<Mtx> res(tensor3.data(), 3, 1);
for (int i = 0; i < 3; ++i) {
for (int j = 0; j < 1; ++j) {
VERIFY_IS_APPROX(res(i,j), m3(i,j));
}
}
// Take an arbitrary slice of an arbitrarily sized tensor.
TensorMap<Tensor<const T, 2, DataLayout>> tensor4(m1.data(), 7, 7);
Tensor<T, 1, DataLayout> tensor6 = tensor4.reshape(DSizes<ptrdiff_t, 1>(7*7)).exp().slice(DSizes<ptrdiff_t, 1>(0), DSizes<ptrdiff_t, 1>(35));
for (int i = 0; i < 35; ++i) {
VERIFY_IS_APPROX(tensor6(i), expf(tensor4.data()[i]));
}
}
template<typename T, int DataLayout>
static void test_slice_as_lvalue()
{
Tensor<T, 3, DataLayout> tensor1(2,2,7);
tensor1.setRandom();
Tensor<T, 3, DataLayout> tensor2(2,2,7);
tensor2.setRandom();
Tensor<T, 3, DataLayout> tensor3(4,3,5);
tensor3.setRandom();
Tensor<T, 3, DataLayout> tensor4(4,3,2);
tensor4.setRandom();
Tensor<T, 3, DataLayout> tensor5(10,13,12);
tensor5.setRandom();
Tensor<T, 3, DataLayout> result(4,5,7);
Eigen::DSizes<ptrdiff_t, 3> sizes12(2,2,7);
Eigen::DSizes<ptrdiff_t, 3> first_slice(0,0,0);
result.slice(first_slice, sizes12) = tensor1;
Eigen::DSizes<ptrdiff_t, 3> second_slice(2,0,0);
result.slice(second_slice, sizes12).device(Eigen::DefaultDevice()) = tensor2;
Eigen::DSizes<ptrdiff_t, 3> sizes3(4,3,5);
Eigen::DSizes<ptrdiff_t, 3> third_slice(0,2,0);
result.slice(third_slice, sizes3) = tensor3;
Eigen::DSizes<ptrdiff_t, 3> sizes4(4,3,2);
Eigen::DSizes<ptrdiff_t, 3> fourth_slice(0,2,5);
result.slice(fourth_slice, sizes4) = tensor4;
for (int j = 0; j < 2; ++j) {
for (int k = 0; k < 7; ++k) {
for (int i = 0; i < 2; ++i) {
VERIFY_IS_EQUAL(result(i,j,k), tensor1(i,j,k));
VERIFY_IS_EQUAL(result(i+2,j,k), tensor2(i,j,k));
}
}
}
for (int i = 0; i < 4; ++i) {
for (int j = 2; j < 5; ++j) {
for (int k = 0; k < 5; ++k) {
VERIFY_IS_EQUAL(result(i,j,k), tensor3(i,j-2,k));
}
for (int k = 5; k < 7; ++k) {
VERIFY_IS_EQUAL(result(i,j,k), tensor4(i,j-2,k-5));
}
}
}
Eigen::DSizes<ptrdiff_t, 3> sizes5(4,5,7);
Eigen::DSizes<ptrdiff_t, 3> fifth_slice(0,0,0);
result.slice(fifth_slice, sizes5) = tensor5.slice(fifth_slice, sizes5);
for (int i = 0; i < 4; ++i) {
for (int j = 2; j < 5; ++j) {
for (int k = 0; k < 7; ++k) {
VERIFY_IS_EQUAL(result(i,j,k), tensor5(i,j,k));
}
}
}
}
template<typename T, int DataLayout>
static void test_slice_raw_data()
{
Tensor<T, 4, DataLayout> tensor(3,5,7,11);
tensor.setRandom();
Eigen::DSizes<ptrdiff_t, 4> offsets(1,2,3,4);
Eigen::DSizes<ptrdiff_t, 4> extents(1,1,1,1);
typedef TensorEvaluator<decltype(tensor.slice(offsets, extents)), DefaultDevice> SliceEvaluator;
auto slice1 = SliceEvaluator(tensor.slice(offsets, extents), DefaultDevice());
VERIFY_IS_EQUAL(slice1.dimensions().TotalSize(), 1);
VERIFY_IS_EQUAL(slice1.data()[0], tensor(1,2,3,4));
if (DataLayout == ColMajor) {
extents = Eigen::DSizes<ptrdiff_t, 4>(2,1,1,1);
auto slice2 = SliceEvaluator(tensor.slice(offsets, extents), DefaultDevice());
VERIFY_IS_EQUAL(slice2.dimensions().TotalSize(), 2);
VERIFY_IS_EQUAL(slice2.data()[0], tensor(1,2,3,4));
VERIFY_IS_EQUAL(slice2.data()[1], tensor(2,2,3,4));
} else {
extents = Eigen::DSizes<ptrdiff_t, 4>(1,1,1,2);
auto slice2 = SliceEvaluator(tensor.slice(offsets, extents), DefaultDevice());
VERIFY_IS_EQUAL(slice2.dimensions().TotalSize(), 2);
VERIFY_IS_EQUAL(slice2.data()[0], tensor(1,2,3,4));
VERIFY_IS_EQUAL(slice2.data()[1], tensor(1,2,3,5));
}
extents = Eigen::DSizes<ptrdiff_t, 4>(1,2,1,1);
auto slice3 = SliceEvaluator(tensor.slice(offsets, extents), DefaultDevice());
VERIFY_IS_EQUAL(slice3.dimensions().TotalSize(), 2);
VERIFY_IS_EQUAL(slice3.data(), static_cast<T*>(0));
if (DataLayout == ColMajor) {
offsets = Eigen::DSizes<ptrdiff_t, 4>(0,2,3,4);
extents = Eigen::DSizes<ptrdiff_t, 4>(3,2,1,1);
auto slice4 = SliceEvaluator(tensor.slice(offsets, extents), DefaultDevice());
VERIFY_IS_EQUAL(slice4.dimensions().TotalSize(), 6);
for (int i = 0; i < 3; ++i) {
for (int j = 0; j < 2; ++j) {
VERIFY_IS_EQUAL(slice4.data()[i+3*j], tensor(i,2+j,3,4));
}
}
} else {
offsets = Eigen::DSizes<ptrdiff_t, 4>(1,2,3,0);
extents = Eigen::DSizes<ptrdiff_t, 4>(1,1,2,11);
auto slice4 = SliceEvaluator(tensor.slice(offsets, extents), DefaultDevice());
VERIFY_IS_EQUAL(slice4.dimensions().TotalSize(), 22);
for (int l = 0; l < 11; ++l) {
for (int k = 0; k < 2; ++k) {
VERIFY_IS_EQUAL(slice4.data()[l+11*k], tensor(1,2,3+k,l));
}
}
}
if (DataLayout == ColMajor) {
offsets = Eigen::DSizes<ptrdiff_t, 4>(0,0,0,4);
extents = Eigen::DSizes<ptrdiff_t, 4>(3,5,7,2);
auto slice5 = SliceEvaluator(tensor.slice(offsets, extents), DefaultDevice());
VERIFY_IS_EQUAL(slice5.dimensions().TotalSize(), 210);
for (int i = 0; i < 3; ++i) {
for (int j = 0; j < 5; ++j) {
for (int k = 0; k < 7; ++k) {
for (int l = 0; l < 2; ++l) {
int slice_index = i + 3 * (j + 5 * (k + 7 * l));
VERIFY_IS_EQUAL(slice5.data()[slice_index], tensor(i,j,k,l+4));
}
}
}
}
} else {
offsets = Eigen::DSizes<ptrdiff_t, 4>(1,0,0,0);
extents = Eigen::DSizes<ptrdiff_t, 4>(2,5,7,11);
auto slice5 = SliceEvaluator(tensor.slice(offsets, extents), DefaultDevice());
VERIFY_IS_EQUAL(slice5.dimensions().TotalSize(), 770);
for (int l = 0; l < 11; ++l) {
for (int k = 0; k < 7; ++k) {
for (int j = 0; j < 5; ++j) {
for (int i = 0; i < 2; ++i) {
int slice_index = l + 11 * (k + 7 * (j + 5 * i));
VERIFY_IS_EQUAL(slice5.data()[slice_index], tensor(i+1,j,k,l));
}
}
}
}
}
offsets = Eigen::DSizes<ptrdiff_t, 4>(0,0,0,0);
extents = Eigen::DSizes<ptrdiff_t, 4>(3,5,7,11);
auto slice6 = SliceEvaluator(tensor.slice(offsets, extents), DefaultDevice());
VERIFY_IS_EQUAL(slice6.dimensions().TotalSize(), 3*5*7*11);
VERIFY_IS_EQUAL(slice6.data(), tensor.data());
}
template<typename T, int DataLayout>
static void test_strided_slice()
{
typedef Tensor<T, 5, DataLayout> Tensor5f;
typedef Eigen::DSizes<Eigen::DenseIndex, 5> Index5;
typedef Tensor<T, 2, DataLayout> Tensor2f;
typedef Eigen::DSizes<Eigen::DenseIndex, 2> Index2;
Tensor<T, 5, DataLayout> tensor(2,3,5,7,11);
Tensor<T, 2, DataLayout> tensor2(7,11);
tensor.setRandom();
tensor2.setRandom();
if (true) {
Tensor2f slice(2,3);
Index2 strides(-2,-1);
Index2 indicesStart(5,7);
Index2 indicesStop(0,4);
slice = tensor2.stridedSlice(indicesStart, indicesStop, strides);
for (int j = 0; j < 2; ++j) {
for (int k = 0; k < 3; ++k) {
VERIFY_IS_EQUAL(slice(j,k), tensor2(5-2*j,7-k));
}
}
}
if(true) {
Tensor2f slice(0,1);
Index2 strides(1,1);
Index2 indicesStart(5,4);
Index2 indicesStop(5,5);
slice = tensor2.stridedSlice(indicesStart, indicesStop, strides);
}
if(true) { // test clamped degenerate interavls
Tensor2f slice(7,11);
Index2 strides(1,-1);
Index2 indicesStart(-3,20); // should become 0,10
Index2 indicesStop(20,-11); // should become 11, -1
slice = tensor2.stridedSlice(indicesStart, indicesStop, strides);
for (int j = 0; j < 7; ++j) {
for (int k = 0; k < 11; ++k) {
VERIFY_IS_EQUAL(slice(j,k), tensor2(j,10-k));
}
}
}
if(true) {
Tensor5f slice1(1,1,1,1,1);
Eigen::DSizes<Eigen::DenseIndex, 5> indicesStart(1, 2, 3, 4, 5);
Eigen::DSizes<Eigen::DenseIndex, 5> indicesStop(2, 3, 4, 5, 6);
Eigen::DSizes<Eigen::DenseIndex, 5> strides(1, 1, 1, 1, 1);
slice1 = tensor.stridedSlice(indicesStart, indicesStop, strides);
VERIFY_IS_EQUAL(slice1(0,0,0,0,0), tensor(1,2,3,4,5));
}
if(true) {
Tensor5f slice(1,1,2,2,3);
Index5 start(1, 1, 3, 4, 5);
Index5 stop(2, 2, 5, 6, 8);
Index5 strides(1, 1, 1, 1, 1);
slice = tensor.stridedSlice(start, stop, strides);
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 2; ++j) {
for (int k = 0; k < 3; ++k) {
VERIFY_IS_EQUAL(slice(0,0,i,j,k), tensor(1,1,3+i,4+j,5+k));
}
}
}
}
if(true) {
Tensor5f slice(1,1,2,2,3);
Index5 strides3(1, 1, -2, 1, -1);
Index5 indices3Start(1, 1, 4, 4, 7);
Index5 indices3Stop(2, 2, 0, 6, 4);
slice = tensor.stridedSlice(indices3Start, indices3Stop, strides3);
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 2; ++j) {
for (int k = 0; k < 3; ++k) {
VERIFY_IS_EQUAL(slice(0,0,i,j,k), tensor(1,1,4-2*i,4+j,7-k));
}
}
}
}
if(false) { // tests degenerate interval
Tensor5f slice(1,1,2,2,3);
Index5 strides3(1, 1, 2, 1, 1);
Index5 indices3Start(1, 1, 4, 4, 7);
Index5 indices3Stop(2, 2, 0, 6, 4);
slice = tensor.stridedSlice(indices3Start, indices3Stop, strides3);
}
}
template<typename T, int DataLayout>
static void test_strided_slice_write()
{
typedef Tensor<T, 2, DataLayout> Tensor2f;
typedef Eigen::DSizes<Eigen::DenseIndex, 2> Index2;
Tensor<T, 2, DataLayout> tensor(7,11),tensor2(7,11);
tensor.setRandom();
tensor2=tensor;
Tensor2f slice(2,3);
slice.setRandom();
Index2 strides(1,1);
Index2 indicesStart(3,4);
Index2 indicesStop(5,7);
Index2 lengths(2,3);
tensor.slice(indicesStart,lengths)=slice;
tensor2.stridedSlice(indicesStart,indicesStop,strides)=slice;
for(int i=0;i<7;i++) for(int j=0;j<11;j++){
VERIFY_IS_EQUAL(tensor(i,j), tensor2(i,j));
}
}
template<typename T, int DataLayout>
static void test_composition()
{
Eigen::Tensor<T, 2, DataLayout> matrix(7, 11);
matrix.setRandom();
const DSizes<ptrdiff_t, 3> newDims(1, 1, 11);
Eigen::Tensor<T, 3, DataLayout> tensor =
matrix.slice(DSizes<ptrdiff_t, 2>(2, 0), DSizes<ptrdiff_t, 2>(1, 11)).reshape(newDims);
VERIFY_IS_EQUAL(tensor.dimensions().TotalSize(), 11);
VERIFY_IS_EQUAL(tensor.dimension(0), 1);
VERIFY_IS_EQUAL(tensor.dimension(1), 1);
VERIFY_IS_EQUAL(tensor.dimension(2), 11);
for (int i = 0; i < 11; ++i) {
VERIFY_IS_EQUAL(tensor(0,0,i), matrix(2,i));
}
}
template<typename T, int DataLayout>
static void test_empty_slice()
{
Tensor<T, 3, DataLayout> tensor(2,3,5);
tensor.setRandom();
Tensor<T, 3, DataLayout> copy = tensor;
// empty size in first dimension
Eigen::DSizes<ptrdiff_t, 3> indices1(1,2,3);
Eigen::DSizes<ptrdiff_t, 3> sizes1(0,1,2);
Tensor<T, 3, DataLayout> slice1(0,1,2);
slice1.setRandom();
tensor.slice(indices1, sizes1) = slice1;
// empty size in second dimension
Eigen::DSizes<ptrdiff_t, 3> indices2(1,2,3);
Eigen::DSizes<ptrdiff_t, 3> sizes2(1,0,2);
Tensor<T, 3, DataLayout> slice2(1,0,2);
slice2.setRandom();
tensor.slice(indices2, sizes2) = slice2;
// empty size in third dimension
Eigen::DSizes<ptrdiff_t, 3> indices3(1,2,3);
Eigen::DSizes<ptrdiff_t, 3> sizes3(1,1,0);
Tensor<T, 3, DataLayout> slice3(1,1,0);
slice3.setRandom();
tensor.slice(indices3, sizes3) = slice3;
// empty size in first and second dimension
Eigen::DSizes<ptrdiff_t, 3> indices4(1,2,3);
Eigen::DSizes<ptrdiff_t, 3> sizes4(0,0,2);
Tensor<T, 3, DataLayout> slice4(0,0,2);
slice4.setRandom();
tensor.slice(indices4, sizes4) = slice4;
// empty size in second and third dimension
Eigen::DSizes<ptrdiff_t, 3> indices5(1,2,3);
Eigen::DSizes<ptrdiff_t, 3> sizes5(1,0,0);
Tensor<T, 3, DataLayout> slice5(1,0,0);
slice5.setRandom();
tensor.slice(indices5, sizes5) = slice5;
// empty size in all dimensions
Eigen::DSizes<ptrdiff_t, 3> indices6(1,2,3);
Eigen::DSizes<ptrdiff_t, 3> sizes6(0,0,0);
Tensor<T, 3, DataLayout> slice6(0,0,0);
slice6.setRandom();
tensor.slice(indices6, sizes6) = slice6;
// none of these operations should change the tensor's components
// because all of the rvalue slices have at least one zero dimension
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 5; ++k) {
VERIFY_IS_EQUAL(tensor(i,j,k), copy(i,j,k));
}
}
}
}
#define CALL_SUBTEST_PART(PART) \
CALL_SUBTEST_##PART
#define CALL_SUBTESTS_TYPES_LAYOUTS(PART, NAME) \
CALL_SUBTEST_PART(PART)((NAME<float, ColMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<float, RowMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<bool, ColMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<bool, RowMajor>()))
EIGEN_DECLARE_TEST(cxx11_tensor_morphing)
{
CALL_SUBTEST_1(test_simple_reshape<void>());
CALL_SUBTEST_1(test_static_reshape<void>());
CALL_SUBTEST_1(test_reshape_as_lvalue<void>());
CALL_SUBTEST_1(test_reshape_in_expr<void>());
CALL_SUBTEST_1(test_const_slice<float>());
CALL_SUBTESTS_TYPES_LAYOUTS(2, test_simple_slice);
CALL_SUBTESTS_TYPES_LAYOUTS(3, test_slice_as_lvalue);
CALL_SUBTESTS_TYPES_LAYOUTS(4, test_slice_raw_data);
CALL_SUBTESTS_TYPES_LAYOUTS(5, test_strided_slice_write);
CALL_SUBTESTS_TYPES_LAYOUTS(6, test_strided_slice);
CALL_SUBTESTS_TYPES_LAYOUTS(7, test_composition);
}

View File

@@ -0,0 +1,386 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2016
// Mehdi Goli Codeplay Software Ltd.
// Ralph Potter Codeplay Software Ltd.
// Luke Iwanski Codeplay Software Ltd.
// Contact: <eigen@codeplay.com>
// Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#define EIGEN_TEST_NO_LONGDOUBLE
#define EIGEN_TEST_NO_COMPLEX
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
#define EIGEN_USE_SYCL
#include "main.h"
#include <unsupported/Eigen/CXX11/Tensor>
using Eigen::array;
using Eigen::SyclDevice;
using Eigen::Tensor;
using Eigen::TensorMap;
template <typename DataType, int DataLayout, typename IndexType>
static void test_simple_reshape(const Eigen::SyclDevice& sycl_device)
{
typename Tensor<DataType, 5 ,DataLayout, IndexType>::Dimensions dim1(2,3,1,7,1);
typename Tensor<DataType, 3 ,DataLayout, IndexType>::Dimensions dim2(2,3,7);
typename Tensor<DataType, 2 ,DataLayout, IndexType>::Dimensions dim3(6,7);
typename Tensor<DataType, 2 ,DataLayout, IndexType>::Dimensions dim4(2,21);
Tensor<DataType, 5, DataLayout, IndexType> tensor1(dim1);
Tensor<DataType, 3, DataLayout, IndexType> tensor2(dim2);
Tensor<DataType, 2, DataLayout, IndexType> tensor3(dim3);
Tensor<DataType, 2, DataLayout, IndexType> tensor4(dim4);
tensor1.setRandom();
DataType* gpu_data1 = static_cast<DataType*>(sycl_device.allocate(tensor1.size()*sizeof(DataType)));
DataType* gpu_data2 = static_cast<DataType*>(sycl_device.allocate(tensor2.size()*sizeof(DataType)));
DataType* gpu_data3 = static_cast<DataType*>(sycl_device.allocate(tensor3.size()*sizeof(DataType)));
DataType* gpu_data4 = static_cast<DataType*>(sycl_device.allocate(tensor4.size()*sizeof(DataType)));
TensorMap<Tensor<DataType, 5,DataLayout, IndexType>> gpu1(gpu_data1, dim1);
TensorMap<Tensor<DataType, 3,DataLayout, IndexType>> gpu2(gpu_data2, dim2);
TensorMap<Tensor<DataType, 2,DataLayout, IndexType>> gpu3(gpu_data3, dim3);
TensorMap<Tensor<DataType, 2,DataLayout, IndexType>> gpu4(gpu_data4, dim4);
sycl_device.memcpyHostToDevice(gpu_data1, tensor1.data(),(tensor1.size())*sizeof(DataType));
gpu2.device(sycl_device)=gpu1.reshape(dim2);
sycl_device.memcpyDeviceToHost(tensor2.data(), gpu_data2,(tensor1.size())*sizeof(DataType));
gpu3.device(sycl_device)=gpu1.reshape(dim3);
sycl_device.memcpyDeviceToHost(tensor3.data(), gpu_data3,(tensor3.size())*sizeof(DataType));
gpu4.device(sycl_device)=gpu1.reshape(dim2).reshape(dim4);
sycl_device.memcpyDeviceToHost(tensor4.data(), gpu_data4,(tensor4.size())*sizeof(DataType));
for (IndexType i = 0; i < 2; ++i){
for (IndexType j = 0; j < 3; ++j){
for (IndexType k = 0; k < 7; ++k){
VERIFY_IS_EQUAL(tensor1(i,j,0,k,0), tensor2(i,j,k)); ///ColMajor
if (static_cast<int>(DataLayout) == static_cast<int>(ColMajor)) {
VERIFY_IS_EQUAL(tensor1(i,j,0,k,0), tensor3(i+2*j,k)); ///ColMajor
VERIFY_IS_EQUAL(tensor1(i,j,0,k,0), tensor4(i,j+3*k)); ///ColMajor
}
else{
//VERIFY_IS_EQUAL(tensor1(i,j,0,k,0), tensor2(i,j,k)); /// RowMajor
VERIFY_IS_EQUAL(tensor1(i,j,0,k,0), tensor4(i,j*7 +k)); /// RowMajor
VERIFY_IS_EQUAL(tensor1(i,j,0,k,0), tensor3(i*3 +j,k)); /// RowMajor
}
}
}
}
sycl_device.deallocate(gpu_data1);
sycl_device.deallocate(gpu_data2);
sycl_device.deallocate(gpu_data3);
sycl_device.deallocate(gpu_data4);
}
template<typename DataType, int DataLayout, typename IndexType>
static void test_reshape_as_lvalue(const Eigen::SyclDevice& sycl_device)
{
typename Tensor<DataType, 3, DataLayout, IndexType>::Dimensions dim1(2,3,7);
typename Tensor<DataType, 2, DataLayout, IndexType>::Dimensions dim2(6,7);
typename Tensor<DataType, 5, DataLayout, IndexType>::Dimensions dim3(2,3,1,7,1);
Tensor<DataType, 3, DataLayout, IndexType> tensor(dim1);
Tensor<DataType, 2, DataLayout, IndexType> tensor2d(dim2);
Tensor<DataType, 5, DataLayout, IndexType> tensor5d(dim3);
tensor.setRandom();
DataType* gpu_data1 = static_cast<DataType*>(sycl_device.allocate(tensor.size()*sizeof(DataType)));
DataType* gpu_data2 = static_cast<DataType*>(sycl_device.allocate(tensor2d.size()*sizeof(DataType)));
DataType* gpu_data3 = static_cast<DataType*>(sycl_device.allocate(tensor5d.size()*sizeof(DataType)));
TensorMap< Tensor<DataType, 3, DataLayout, IndexType> > gpu1(gpu_data1, dim1);
TensorMap< Tensor<DataType, 2, DataLayout, IndexType> > gpu2(gpu_data2, dim2);
TensorMap< Tensor<DataType, 5, DataLayout, IndexType> > gpu3(gpu_data3, dim3);
sycl_device.memcpyHostToDevice(gpu_data1, tensor.data(),(tensor.size())*sizeof(DataType));
gpu2.reshape(dim1).device(sycl_device)=gpu1;
sycl_device.memcpyDeviceToHost(tensor2d.data(), gpu_data2,(tensor2d.size())*sizeof(DataType));
gpu3.reshape(dim1).device(sycl_device)=gpu1;
sycl_device.memcpyDeviceToHost(tensor5d.data(), gpu_data3,(tensor5d.size())*sizeof(DataType));
for (IndexType i = 0; i < 2; ++i){
for (IndexType j = 0; j < 3; ++j){
for (IndexType k = 0; k < 7; ++k){
VERIFY_IS_EQUAL(tensor5d(i,j,0,k,0), tensor(i,j,k));
if (static_cast<int>(DataLayout) == static_cast<int>(ColMajor)) {
VERIFY_IS_EQUAL(tensor2d(i+2*j,k), tensor(i,j,k)); ///ColMajor
}
else{
VERIFY_IS_EQUAL(tensor2d(i*3 +j,k),tensor(i,j,k)); /// RowMajor
}
}
}
}
sycl_device.deallocate(gpu_data1);
sycl_device.deallocate(gpu_data2);
sycl_device.deallocate(gpu_data3);
}
template <typename DataType, int DataLayout, typename IndexType>
static void test_simple_slice(const Eigen::SyclDevice &sycl_device)
{
IndexType sizeDim1 = 2;
IndexType sizeDim2 = 3;
IndexType sizeDim3 = 5;
IndexType sizeDim4 = 7;
IndexType sizeDim5 = 11;
array<IndexType, 5> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4, sizeDim5}};
Tensor<DataType, 5,DataLayout, IndexType> tensor(tensorRange);
tensor.setRandom();
array<IndexType, 5> slice1_range ={{1, 1, 1, 1, 1}};
Tensor<DataType, 5,DataLayout, IndexType> slice1(slice1_range);
DataType* gpu_data1 = static_cast<DataType*>(sycl_device.allocate(tensor.size()*sizeof(DataType)));
DataType* gpu_data2 = static_cast<DataType*>(sycl_device.allocate(slice1.size()*sizeof(DataType)));
TensorMap<Tensor<DataType, 5,DataLayout, IndexType>> gpu1(gpu_data1, tensorRange);
TensorMap<Tensor<DataType, 5,DataLayout, IndexType>> gpu2(gpu_data2, slice1_range);
Eigen::DSizes<IndexType, 5> indices(1,2,3,4,5);
Eigen::DSizes<IndexType, 5> sizes(1,1,1,1,1);
sycl_device.memcpyHostToDevice(gpu_data1, tensor.data(),(tensor.size())*sizeof(DataType));
gpu2.device(sycl_device)=gpu1.slice(indices, sizes);
sycl_device.memcpyDeviceToHost(slice1.data(), gpu_data2,(slice1.size())*sizeof(DataType));
VERIFY_IS_EQUAL(slice1(0,0,0,0,0), tensor(1,2,3,4,5));
array<IndexType, 5> slice2_range ={{1,1,2,2,3}};
Tensor<DataType, 5,DataLayout, IndexType> slice2(slice2_range);
DataType* gpu_data3 = static_cast<DataType*>(sycl_device.allocate(slice2.size()*sizeof(DataType)));
TensorMap<Tensor<DataType, 5,DataLayout, IndexType>> gpu3(gpu_data3, slice2_range);
Eigen::DSizes<IndexType, 5> indices2(1,1,3,4,5);
Eigen::DSizes<IndexType, 5> sizes2(1,1,2,2,3);
gpu3.device(sycl_device)=gpu1.slice(indices2, sizes2);
sycl_device.memcpyDeviceToHost(slice2.data(), gpu_data3,(slice2.size())*sizeof(DataType));
for (IndexType i = 0; i < 2; ++i) {
for (IndexType j = 0; j < 2; ++j) {
for (IndexType k = 0; k < 3; ++k) {
VERIFY_IS_EQUAL(slice2(0,0,i,j,k), tensor(1,1,3+i,4+j,5+k));
}
}
}
sycl_device.deallocate(gpu_data1);
sycl_device.deallocate(gpu_data2);
sycl_device.deallocate(gpu_data3);
}
template <typename DataType, int DataLayout, typename IndexType>
static void test_strided_slice_as_rhs_sycl(const Eigen::SyclDevice &sycl_device)
{
IndexType sizeDim1 = 2;
IndexType sizeDim2 = 3;
IndexType sizeDim3 = 5;
IndexType sizeDim4 = 7;
IndexType sizeDim5 = 11;
typedef Eigen::DSizes<IndexType, 5> Index5;
Index5 strides(1L,1L,1L,1L,1L);
Index5 indicesStart(1L,2L,3L,4L,5L);
Index5 indicesStop(2L,3L,4L,5L,6L);
Index5 lengths(1L,1L,1L,1L,1L);
array<IndexType, 5> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4, sizeDim5}};
Tensor<DataType, 5, DataLayout, IndexType> tensor(tensorRange);
tensor.setRandom();
array<IndexType, 5> slice1_range ={{1, 1, 1, 1, 1}};
Tensor<DataType, 5,DataLayout, IndexType> slice1(slice1_range);
Tensor<DataType, 5, DataLayout, IndexType> slice_stride1(slice1_range);
DataType* gpu_data1 = static_cast<DataType*>(sycl_device.allocate(tensor.size()*sizeof(DataType)));
DataType* gpu_data2 = static_cast<DataType*>(sycl_device.allocate(slice1.size()*sizeof(DataType)));
DataType* gpu_data_stride2 = static_cast<DataType*>(sycl_device.allocate(slice_stride1.size()*sizeof(DataType)));
TensorMap<Tensor<DataType, 5,DataLayout, IndexType>> gpu1(gpu_data1, tensorRange);
TensorMap<Tensor<DataType, 5,DataLayout, IndexType>> gpu2(gpu_data2, slice1_range);
TensorMap<Tensor<DataType, 5,DataLayout, IndexType>> gpu_stride2(gpu_data_stride2, slice1_range);
Eigen::DSizes<IndexType, 5> indices(1,2,3,4,5);
Eigen::DSizes<IndexType, 5> sizes(1,1,1,1,1);
sycl_device.memcpyHostToDevice(gpu_data1, tensor.data(),(tensor.size())*sizeof(DataType));
gpu2.device(sycl_device)=gpu1.slice(indices, sizes);
sycl_device.memcpyDeviceToHost(slice1.data(), gpu_data2,(slice1.size())*sizeof(DataType));
gpu_stride2.device(sycl_device)=gpu1.stridedSlice(indicesStart,indicesStop,strides);
sycl_device.memcpyDeviceToHost(slice_stride1.data(), gpu_data_stride2,(slice_stride1.size())*sizeof(DataType));
VERIFY_IS_EQUAL(slice1(0,0,0,0,0), tensor(1,2,3,4,5));
VERIFY_IS_EQUAL(slice_stride1(0,0,0,0,0), tensor(1,2,3,4,5));
array<IndexType, 5> slice2_range ={{1,1,2,2,3}};
Tensor<DataType, 5,DataLayout, IndexType> slice2(slice2_range);
Tensor<DataType, 5, DataLayout, IndexType> strideSlice2(slice2_range);
DataType* gpu_data3 = static_cast<DataType*>(sycl_device.allocate(slice2.size()*sizeof(DataType)));
DataType* gpu_data_stride3 = static_cast<DataType*>(sycl_device.allocate(strideSlice2.size()*sizeof(DataType)));
TensorMap<Tensor<DataType, 5,DataLayout, IndexType>> gpu3(gpu_data3, slice2_range);
TensorMap<Tensor<DataType, 5,DataLayout, IndexType>> gpu_stride3(gpu_data_stride3, slice2_range);
Eigen::DSizes<IndexType, 5> indices2(1,1,3,4,5);
Eigen::DSizes<IndexType, 5> sizes2(1,1,2,2,3);
Index5 strides2(1L,1L,1L,1L,1L);
Index5 indicesStart2(1L,1L,3L,4L,5L);
Index5 indicesStop2(2L,2L,5L,6L,8L);
gpu3.device(sycl_device)=gpu1.slice(indices2, sizes2);
sycl_device.memcpyDeviceToHost(slice2.data(), gpu_data3,(slice2.size())*sizeof(DataType));
gpu_stride3.device(sycl_device)=gpu1.stridedSlice(indicesStart2,indicesStop2,strides2);
sycl_device.memcpyDeviceToHost(strideSlice2.data(), gpu_data_stride3,(strideSlice2.size())*sizeof(DataType));
for (IndexType i = 0; i < 2; ++i) {
for (IndexType j = 0; j < 2; ++j) {
for (IndexType k = 0; k < 3; ++k) {
VERIFY_IS_EQUAL(slice2(0,0,i,j,k), tensor(1,1,3+i,4+j,5+k));
VERIFY_IS_EQUAL(strideSlice2(0,0,i,j,k), tensor(1,1,3+i,4+j,5+k));
}
}
}
sycl_device.deallocate(gpu_data1);
sycl_device.deallocate(gpu_data2);
sycl_device.deallocate(gpu_data3);
}
template<typename DataType, int DataLayout, typename IndexType>
static void test_strided_slice_write_sycl(const Eigen::SyclDevice& sycl_device)
{
typedef Tensor<DataType, 2, DataLayout, IndexType> Tensor2f;
typedef Eigen::DSizes<IndexType, 2> Index2;
IndexType sizeDim1 = 7L;
IndexType sizeDim2 = 11L;
array<IndexType, 2> tensorRange = {{sizeDim1, sizeDim2}};
Tensor<DataType, 2, DataLayout, IndexType> tensor(tensorRange),tensor2(tensorRange);
IndexType sliceDim1 = 2;
IndexType sliceDim2 = 3;
array<IndexType, 2> sliceRange = {{sliceDim1, sliceDim2}};
Tensor2f slice(sliceRange);
Index2 strides(1L,1L);
Index2 indicesStart(3L,4L);
Index2 indicesStop(5L,7L);
Index2 lengths(2L,3L);
DataType* gpu_data1 = static_cast<DataType*>(sycl_device.allocate(tensor.size()*sizeof(DataType)));
DataType* gpu_data2 = static_cast<DataType*>(sycl_device.allocate(tensor2.size()*sizeof(DataType)));
DataType* gpu_data3 = static_cast<DataType*>(sycl_device.allocate(slice.size()*sizeof(DataType)));
TensorMap<Tensor<DataType, 2,DataLayout,IndexType>> gpu1(gpu_data1, tensorRange);
TensorMap<Tensor<DataType, 2,DataLayout,IndexType>> gpu2(gpu_data2, tensorRange);
TensorMap<Tensor<DataType, 2,DataLayout,IndexType>> gpu3(gpu_data3, sliceRange);
tensor.setRandom();
sycl_device.memcpyHostToDevice(gpu_data1, tensor.data(),(tensor.size())*sizeof(DataType));
gpu2.device(sycl_device)=gpu1;
slice.setRandom();
sycl_device.memcpyHostToDevice(gpu_data3, slice.data(),(slice.size())*sizeof(DataType));
gpu1.slice(indicesStart,lengths).device(sycl_device)=gpu3;
gpu2.stridedSlice(indicesStart,indicesStop,strides).device(sycl_device)=gpu3;
sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data1,(tensor.size())*sizeof(DataType));
sycl_device.memcpyDeviceToHost(tensor2.data(), gpu_data2,(tensor2.size())*sizeof(DataType));
for(IndexType i=0;i<sizeDim1;i++)
for(IndexType j=0;j<sizeDim2;j++){
VERIFY_IS_EQUAL(tensor(i,j), tensor2(i,j));
}
sycl_device.deallocate(gpu_data1);
sycl_device.deallocate(gpu_data2);
sycl_device.deallocate(gpu_data3);
}
template <typename OutIndex, typename DSizes>
Eigen::array<OutIndex, DSizes::count> To32BitDims(const DSizes& in) {
Eigen::array<OutIndex, DSizes::count> out;
for (int i = 0; i < DSizes::count; ++i) {
out[i] = in[i];
}
return out;
}
template <class DataType, int DataLayout, typename IndexType, typename ConvertedIndexType>
int run_eigen(const SyclDevice& sycl_device) {
using TensorI64 = Tensor<DataType, 5, DataLayout, IndexType>;
using TensorI32 = Tensor<DataType, 5, DataLayout, ConvertedIndexType>;
using TensorMI64 = TensorMap<TensorI64>;
using TensorMI32 = TensorMap<TensorI32>;
Eigen::array<IndexType, 5> tensor_range{{4, 1, 1, 1, 6}};
Eigen::array<IndexType, 5> slice_range{{4, 1, 1, 1, 3}};
TensorI64 out_tensor_gpu(tensor_range);
TensorI64 out_tensor_cpu(tensor_range);
out_tensor_cpu.setRandom();
TensorI64 sub_tensor(slice_range);
sub_tensor.setRandom();
DataType* out_gpu_data = static_cast<DataType*>(sycl_device.allocate(out_tensor_cpu.size() * sizeof(DataType)));
DataType* sub_gpu_data = static_cast<DataType*>(sycl_device.allocate(sub_tensor.size() * sizeof(DataType)));
TensorMI64 out_gpu(out_gpu_data, tensor_range);
TensorMI64 sub_gpu(sub_gpu_data, slice_range);
sycl_device.memcpyHostToDevice(out_gpu_data, out_tensor_cpu.data(), out_tensor_cpu.size() * sizeof(DataType));
sycl_device.memcpyHostToDevice(sub_gpu_data, sub_tensor.data(), sub_tensor.size() * sizeof(DataType));
Eigen::array<ConvertedIndexType, 5> slice_offset_32{{0, 0, 0, 0, 3}};
Eigen::array<ConvertedIndexType, 5> slice_range_32{{4, 1, 1, 1, 3}};
TensorMI32 out_cpu_32(out_tensor_cpu.data(), To32BitDims<ConvertedIndexType>(out_tensor_cpu.dimensions()));
TensorMI32 sub_cpu_32(sub_tensor.data(), To32BitDims<ConvertedIndexType>(sub_tensor.dimensions()));
TensorMI32 out_gpu_32(out_gpu.data(), To32BitDims<ConvertedIndexType>(out_gpu.dimensions()));
TensorMI32 sub_gpu_32(sub_gpu.data(), To32BitDims<ConvertedIndexType>(sub_gpu.dimensions()));
out_gpu_32.slice(slice_offset_32, slice_range_32).device(sycl_device) = sub_gpu_32;
out_cpu_32.slice(slice_offset_32, slice_range_32) = sub_cpu_32;
sycl_device.memcpyDeviceToHost(out_tensor_gpu.data(), out_gpu_data, out_tensor_cpu.size() * sizeof(DataType));
int has_err = 0;
for (IndexType i = 0; i < out_tensor_cpu.size(); ++i) {
auto exp = out_tensor_cpu(i);
auto val = out_tensor_gpu(i);
if (val != exp) {
std::cout << "#" << i << " got " << val << " but expected " << exp << std::endl;
has_err = 1;
}
}
sycl_device.deallocate(out_gpu_data);
sycl_device.deallocate(sub_gpu_data);
return has_err;
}
template<typename DataType, typename dev_Selector> void sycl_morphing_test_per_device(dev_Selector s){
QueueInterface queueInterface(s);
auto sycl_device = Eigen::SyclDevice(&queueInterface);
test_simple_slice<DataType, RowMajor, int64_t>(sycl_device);
test_simple_slice<DataType, ColMajor, int64_t>(sycl_device);
test_simple_reshape<DataType, RowMajor, int64_t>(sycl_device);
test_simple_reshape<DataType, ColMajor, int64_t>(sycl_device);
test_reshape_as_lvalue<DataType, RowMajor, int64_t>(sycl_device);
test_reshape_as_lvalue<DataType, ColMajor, int64_t>(sycl_device);
test_strided_slice_write_sycl<DataType, ColMajor, int64_t>(sycl_device);
test_strided_slice_write_sycl<DataType, RowMajor, int64_t>(sycl_device);
test_strided_slice_as_rhs_sycl<DataType, ColMajor, int64_t>(sycl_device);
test_strided_slice_as_rhs_sycl<DataType, RowMajor, int64_t>(sycl_device);
run_eigen<float, RowMajor, long, int>(sycl_device);
}
EIGEN_DECLARE_TEST(cxx11_tensor_morphing_sycl)
{
for (const auto& device :Eigen::get_sycl_supported_devices()) {
CALL_SUBTEST(sycl_morphing_test_per_device<float>(device));
}
}

View File

@@ -0,0 +1,76 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2017 Viktor Csomor <viktor.csomor@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
#include <Eigen/CXX11/Tensor>
#include <utility>
using Eigen::Tensor;
using Eigen::RowMajor;
static void calc_indices(int i, int& x, int& y, int& z)
{
x = i / 4;
y = (i % 4) / 2;
z = i % 2;
}
static void test_move()
{
int x;
int y;
int z;
Tensor<int,3> tensor1(2, 2, 2);
Tensor<int,3,RowMajor> tensor2(2, 2, 2);
for (int i = 0; i < 8; i++)
{
calc_indices(i, x, y, z);
tensor1(x,y,z) = i;
tensor2(x,y,z) = 2 * i;
}
// Invokes the move constructor.
Tensor<int,3> moved_tensor1 = std::move(tensor1);
Tensor<int,3,RowMajor> moved_tensor2 = std::move(tensor2);
VERIFY_IS_EQUAL(tensor1.size(), 0);
VERIFY_IS_EQUAL(tensor2.size(), 0);
for (int i = 0; i < 8; i++)
{
calc_indices(i, x, y, z);
VERIFY_IS_EQUAL(moved_tensor1(x,y,z), i);
VERIFY_IS_EQUAL(moved_tensor2(x,y,z), 2 * i);
}
Tensor<int,3> moved_tensor3(2,2,2);
Tensor<int,3,RowMajor> moved_tensor4(2,2,2);
moved_tensor3.setZero();
moved_tensor4.setZero();
// Invokes the move assignment operator.
moved_tensor3 = std::move(moved_tensor1);
moved_tensor4 = std::move(moved_tensor2);
for (int i = 0; i < 8; i++)
{
calc_indices(i, x, y, z);
VERIFY_IS_EQUAL(moved_tensor3(x,y,z), i);
VERIFY_IS_EQUAL(moved_tensor4(x,y,z), 2 * i);
}
}
EIGEN_DECLARE_TEST(cxx11_tensor_move)
{
CALL_SUBTEST(test_move());
}

View File

@@ -0,0 +1,64 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2015 Vijay Vasudevan <vrv@google.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#define EIGEN_USE_THREADS
#include <atomic>
#include <stdlib.h>
#include "main.h"
#include <Eigen/CXX11/Tensor>
static void test_notification_single()
{
ThreadPool thread_pool(1);
std::atomic<int> counter(0);
Eigen::Notification n;
auto func = [&n, &counter](){ n.Wait(); ++counter;};
thread_pool.Schedule(func);
std::this_thread::sleep_for(std::chrono::milliseconds(1000));
// The thread should be waiting for the notification.
VERIFY_IS_EQUAL(counter, 0);
// Unblock the thread
n.Notify();
std::this_thread::sleep_for(std::chrono::milliseconds(1000));
// Verify the counter has been incremented
VERIFY_IS_EQUAL(counter, 1);
}
// Like test_notification_single() but enqueues multiple threads to
// validate that all threads get notified by Notify().
static void test_notification_multiple()
{
ThreadPool thread_pool(1);
std::atomic<int> counter(0);
Eigen::Notification n;
auto func = [&n, &counter](){ n.Wait(); ++counter;};
thread_pool.Schedule(func);
thread_pool.Schedule(func);
thread_pool.Schedule(func);
thread_pool.Schedule(func);
std::this_thread::sleep_for(std::chrono::milliseconds(1000));
VERIFY_IS_EQUAL(counter, 0);
n.Notify();
std::this_thread::sleep_for(std::chrono::milliseconds(1000));
VERIFY_IS_EQUAL(counter, 4);
}
EIGEN_DECLARE_TEST(cxx11_tensor_notification)
{
CALL_SUBTEST(test_notification_single());
CALL_SUBTEST(test_notification_multiple());
}

View File

@@ -0,0 +1,103 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
#include <Eigen/CXX11/Tensor>
using Eigen::Tensor;
using Eigen::TensorMap;
static void test_additions()
{
Tensor<std::complex<float>, 1> data1(3);
Tensor<std::complex<float>, 1> data2(3);
for (int i = 0; i < 3; ++i) {
data1(i) = std::complex<float>(i, -i);
data2(i) = std::complex<float>(i, 7 * i);
}
Tensor<std::complex<float>, 1> sum = data1 + data2;
for (int i = 0; i < 3; ++i) {
VERIFY_IS_EQUAL(sum(i), std::complex<float>(2*i, 6*i));
}
}
static void test_abs()
{
Tensor<std::complex<float>, 1> data1(3);
Tensor<std::complex<double>, 1> data2(3);
data1.setRandom();
data2.setRandom();
Tensor<float, 1> abs1 = data1.abs();
Tensor<double, 1> abs2 = data2.abs();
for (int i = 0; i < 3; ++i) {
VERIFY_IS_APPROX(abs1(i), std::abs(data1(i)));
VERIFY_IS_APPROX(abs2(i), std::abs(data2(i)));
}
}
static void test_conjugate()
{
Tensor<std::complex<float>, 1> data1(3);
Tensor<std::complex<double>, 1> data2(3);
Tensor<int, 1> data3(3);
data1.setRandom();
data2.setRandom();
data3.setRandom();
Tensor<std::complex<float>, 1> conj1 = data1.conjugate();
Tensor<std::complex<double>, 1> conj2 = data2.conjugate();
Tensor<int, 1> conj3 = data3.conjugate();
for (int i = 0; i < 3; ++i) {
VERIFY_IS_APPROX(conj1(i), std::conj(data1(i)));
VERIFY_IS_APPROX(conj2(i), std::conj(data2(i)));
VERIFY_IS_APPROX(conj3(i), data3(i));
}
}
static void test_contractions()
{
Tensor<std::complex<float>, 4> t_left(30, 50, 8, 31);
Tensor<std::complex<float>, 5> t_right(8, 31, 7, 20, 10);
Tensor<std::complex<float>, 5> t_result(30, 50, 7, 20, 10);
t_left.setRandom();
t_right.setRandom();
typedef Map<Matrix<std::complex<float>, Dynamic, Dynamic>> MapXcf;
MapXcf m_left(t_left.data(), 1500, 248);
MapXcf m_right(t_right.data(), 248, 1400);
Matrix<std::complex<float>, Dynamic, Dynamic> m_result(1500, 1400);
// This contraction should be equivalent to a regular matrix multiplication
typedef Tensor<float, 1>::DimensionPair DimPair;
Eigen::array<DimPair, 2> dims;
dims[0] = DimPair(2, 0);
dims[1] = DimPair(3, 1);
t_result = t_left.contract(t_right, dims);
m_result = m_left * m_right;
for (int i = 0; i < t_result.dimensions().TotalSize(); i++) {
VERIFY_IS_APPROX(t_result.data()[i], m_result.data()[i]);
}
}
EIGEN_DECLARE_TEST(cxx11_tensor_of_complex)
{
CALL_SUBTEST(test_additions());
CALL_SUBTEST(test_abs());
CALL_SUBTEST(test_conjugate());
CALL_SUBTEST(test_contractions());
}

View File

@@ -0,0 +1,105 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
#include <Eigen/CXX11/Tensor>
using Eigen::Tensor;
using Eigen::RowMajor;
static void test_assign()
{
float data1[6];
TensorMap<Tensor<const float, 2>> mat1(data1, 2, 3);
float data2[6];
const TensorMap<Tensor<float, 2>> mat2(data2, 2, 3);
for (int i = 0; i < 6; ++i) {
data1[i] = i;
data2[i] = -i;
}
Tensor<float, 2> rslt1;
rslt1 = mat1;
Tensor<float, 2> rslt2;
rslt2 = mat2;
Tensor<float, 2> rslt3 = mat1;
Tensor<float, 2> rslt4 = mat2;
Tensor<float, 2> rslt5(mat1);
Tensor<float, 2> rslt6(mat2);
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
VERIFY_IS_APPROX(rslt1(i,j), static_cast<float>(i + 2*j));
VERIFY_IS_APPROX(rslt2(i,j), static_cast<float>(-i - 2*j));
VERIFY_IS_APPROX(rslt3(i,j), static_cast<float>(i + 2*j));
VERIFY_IS_APPROX(rslt4(i,j), static_cast<float>(-i - 2*j));
VERIFY_IS_APPROX(rslt5(i,j), static_cast<float>(i + 2*j));
VERIFY_IS_APPROX(rslt6(i,j), static_cast<float>(-i - 2*j));
}
}
}
static void test_plus()
{
float data1[6];
TensorMap<Tensor<const float, 2>> mat1(data1, 2, 3);
float data2[6];
TensorMap<Tensor<float, 2>> mat2(data2, 2, 3);
for (int i = 0; i < 6; ++i) {
data1[i] = i;
data2[i] = -i;
}
Tensor<float, 2> sum1;
sum1 = mat1 + mat2;
Tensor<float, 2> sum2;
sum2 = mat2 + mat1;
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
VERIFY_IS_APPROX(sum1(i,j), 0.0f);
VERIFY_IS_APPROX(sum2(i,j), 0.0f);
}
}
}
static void test_plus_equal()
{
float data1[6];
TensorMap<Tensor<const float, 2>> mat1(data1, 2, 3);
float data2[6];
TensorMap<Tensor<float, 2>> mat2(data2, 2, 3);
for (int i = 0; i < 6; ++i) {
data1[i] = i;
data2[i] = -i;
}
mat2 += mat1;
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
VERIFY_IS_APPROX(mat2(i,j), 0.0f);
}
}
}
EIGEN_DECLARE_TEST(cxx11_tensor_of_const_values)
{
CALL_SUBTEST(test_assign());
CALL_SUBTEST(test_plus());
CALL_SUBTEST(test_plus_equal());
}

View File

@@ -0,0 +1,488 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#define EIGEN_TEST_NO_LONGDOUBLE
#define EIGEN_TEST_NO_COMPLEX
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
#define EIGEN_USE_GPU
#include "main.h"
#include <unsupported/Eigen/CXX11/Tensor>
using Eigen::Tensor;
template<typename>
void test_gpu_numext() {
Eigen::GpuStreamDevice stream;
Eigen::GpuDevice gpu_device(&stream);
int num_elem = 101;
float* d_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
bool* d_res_half = (bool*)gpu_device.allocate(num_elem * sizeof(bool));
bool* d_res_float = (bool*)gpu_device.allocate(num_elem * sizeof(bool));
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float(
d_float, num_elem);
Eigen::TensorMap<Eigen::Tensor<bool, 1>, Eigen::Aligned> gpu_res_half(
d_res_half, num_elem);
Eigen::TensorMap<Eigen::Tensor<bool, 1>, Eigen::Aligned> gpu_res_float(
d_res_float, num_elem);
gpu_float.device(gpu_device) = gpu_float.random() - gpu_float.constant(0.5f);
gpu_res_float.device(gpu_device) = gpu_float.unaryExpr(Eigen::internal::scalar_isnan_op<float>());
gpu_res_half.device(gpu_device) = gpu_float.cast<Eigen::half>().unaryExpr(Eigen::internal::scalar_isnan_op<Eigen::half>());
Tensor<bool, 1> half_prec(num_elem);
Tensor<bool, 1> full_prec(num_elem);
gpu_device.memcpyDeviceToHost(half_prec.data(), d_res_half, num_elem*sizeof(bool));
gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, num_elem*sizeof(bool));
gpu_device.synchronize();
for (int i = 0; i < num_elem; ++i) {
std::cout << "Checking numext " << i << std::endl;
VERIFY_IS_EQUAL(full_prec(i), half_prec(i));
}
gpu_device.deallocate(d_float);
gpu_device.deallocate(d_res_half);
gpu_device.deallocate(d_res_float);
}
#ifdef EIGEN_HAS_GPU_FP16
template<typename>
void test_gpu_conversion() {
Eigen::GpuStreamDevice stream;
Eigen::GpuDevice gpu_device(&stream);
int num_elem = 101;
float* d_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
Eigen::half* d_half = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half));
float* d_conv = (float*)gpu_device.allocate(num_elem * sizeof(float));
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float(
d_float, num_elem);
Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_half(
d_half, num_elem);
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_conv(
d_conv, num_elem);
gpu_float.device(gpu_device) = gpu_float.random();
gpu_half.device(gpu_device) = gpu_float.cast<Eigen::half>();
gpu_conv.device(gpu_device) = gpu_half.cast<float>();
Tensor<float, 1> initial(num_elem);
Tensor<float, 1> final(num_elem);
gpu_device.memcpyDeviceToHost(initial.data(), d_float, num_elem*sizeof(float));
gpu_device.memcpyDeviceToHost(final.data(), d_conv, num_elem*sizeof(float));
for (int i = 0; i < num_elem; ++i) {
VERIFY_IS_APPROX(initial(i), final(i));
}
gpu_device.deallocate(d_float);
gpu_device.deallocate(d_half);
gpu_device.deallocate(d_conv);
}
template<typename>
void test_gpu_unary() {
Eigen::GpuStreamDevice stream;
Eigen::GpuDevice gpu_device(&stream);
int num_elem = 101;
float* d_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
float* d_res_half = (float*)gpu_device.allocate(num_elem * sizeof(float));
float* d_res_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float(
d_float, num_elem);
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_half(
d_res_half, num_elem);
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_float(
d_res_float, num_elem);
gpu_float.device(gpu_device) = gpu_float.random() - gpu_float.constant(0.5f);
gpu_res_float.device(gpu_device) = gpu_float.abs();
gpu_res_half.device(gpu_device) = gpu_float.cast<Eigen::half>().abs().cast<float>();
Tensor<float, 1> half_prec(num_elem);
Tensor<float, 1> full_prec(num_elem);
gpu_device.memcpyDeviceToHost(half_prec.data(), d_res_half, num_elem*sizeof(float));
gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, num_elem*sizeof(float));
gpu_device.synchronize();
for (int i = 0; i < num_elem; ++i) {
std::cout << "Checking unary " << i << std::endl;
VERIFY_IS_APPROX(full_prec(i), half_prec(i));
}
gpu_device.deallocate(d_float);
gpu_device.deallocate(d_res_half);
gpu_device.deallocate(d_res_float);
}
template<typename>
void test_gpu_elementwise() {
Eigen::GpuStreamDevice stream;
Eigen::GpuDevice gpu_device(&stream);
int num_elem = 101;
float* d_float1 = (float*)gpu_device.allocate(num_elem * sizeof(float));
float* d_float2 = (float*)gpu_device.allocate(num_elem * sizeof(float));
float* d_res_half = (float*)gpu_device.allocate(num_elem * sizeof(float));
float* d_res_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float1(
d_float1, num_elem);
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float2(
d_float2, num_elem);
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_half(
d_res_half, num_elem);
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_float(
d_res_float, num_elem);
gpu_float1.device(gpu_device) = gpu_float1.random();
gpu_float2.device(gpu_device) = gpu_float2.random();
gpu_res_float.device(gpu_device) = (gpu_float1 + gpu_float2) * gpu_float1;
gpu_res_half.device(gpu_device) = ((gpu_float1.cast<Eigen::half>() + gpu_float2.cast<Eigen::half>()) * gpu_float1.cast<Eigen::half>()).cast<float>();
Tensor<float, 1> half_prec(num_elem);
Tensor<float, 1> full_prec(num_elem);
gpu_device.memcpyDeviceToHost(half_prec.data(), d_res_half, num_elem*sizeof(float));
gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, num_elem*sizeof(float));
gpu_device.synchronize();
for (int i = 0; i < num_elem; ++i) {
std::cout << "Checking elemwise " << i << ": full prec = " << full_prec(i) << " vs half prec = " << half_prec(i) << std::endl;
VERIFY_IS_APPROX(static_cast<Eigen::half>(full_prec(i)), static_cast<Eigen::half>(half_prec(i)));
}
gpu_device.deallocate(d_float1);
gpu_device.deallocate(d_float2);
gpu_device.deallocate(d_res_half);
gpu_device.deallocate(d_res_float);
}
template<typename>
void test_gpu_trancendental() {
Eigen::GpuStreamDevice stream;
Eigen::GpuDevice gpu_device(&stream);
int num_elem = 101;
float* d_float1 = (float*)gpu_device.allocate(num_elem * sizeof(float));
float* d_float2 = (float*)gpu_device.allocate(num_elem * sizeof(float));
float* d_float3 = (float*)gpu_device.allocate(num_elem * sizeof(float));
Eigen::half* d_res1_half = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half));
Eigen::half* d_res1_float = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half));
Eigen::half* d_res2_half = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half));
Eigen::half* d_res2_float = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half));
Eigen::half* d_res3_half = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half));
Eigen::half* d_res3_float = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half));
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float1(d_float1, num_elem);
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float2(d_float2, num_elem);
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float3(d_float3, num_elem);
Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res1_half(d_res1_half, num_elem);
Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res1_float(d_res1_float, num_elem);
Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res2_half(d_res2_half, num_elem);
Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res2_float(d_res2_float, num_elem);
Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res3_half(d_res3_half, num_elem);
Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res3_float(d_res3_float, num_elem);
Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res4_half(d_res3_half, num_elem);
Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res4_float(d_res3_float, num_elem);
gpu_float1.device(gpu_device) = gpu_float1.random() - gpu_float1.constant(0.5f);
gpu_float2.device(gpu_device) = gpu_float2.random() + gpu_float1.constant(0.5f);
gpu_float3.device(gpu_device) = gpu_float3.random();
gpu_res1_float.device(gpu_device) = gpu_float1.exp().cast<Eigen::half>();
gpu_res2_float.device(gpu_device) = gpu_float2.log().cast<Eigen::half>();
gpu_res3_float.device(gpu_device) = gpu_float3.log1p().cast<Eigen::half>();
gpu_res4_float.device(gpu_device) = gpu_float3.expm1().cast<Eigen::half>();
gpu_res1_half.device(gpu_device) = gpu_float1.cast<Eigen::half>();
gpu_res1_half.device(gpu_device) = gpu_res1_half.exp();
gpu_res2_half.device(gpu_device) = gpu_float2.cast<Eigen::half>();
gpu_res2_half.device(gpu_device) = gpu_res2_half.log();
gpu_res3_half.device(gpu_device) = gpu_float3.cast<Eigen::half>();
gpu_res3_half.device(gpu_device) = gpu_res3_half.log1p();
gpu_res3_half.device(gpu_device) = gpu_float3.cast<Eigen::half>();
gpu_res3_half.device(gpu_device) = gpu_res3_half.expm1();
Tensor<float, 1> input1(num_elem);
Tensor<Eigen::half, 1> half_prec1(num_elem);
Tensor<Eigen::half, 1> full_prec1(num_elem);
Tensor<float, 1> input2(num_elem);
Tensor<Eigen::half, 1> half_prec2(num_elem);
Tensor<Eigen::half, 1> full_prec2(num_elem);
Tensor<float, 1> input3(num_elem);
Tensor<Eigen::half, 1> half_prec3(num_elem);
Tensor<Eigen::half, 1> full_prec3(num_elem);
gpu_device.memcpyDeviceToHost(input1.data(), d_float1, num_elem*sizeof(float));
gpu_device.memcpyDeviceToHost(input2.data(), d_float2, num_elem*sizeof(float));
gpu_device.memcpyDeviceToHost(input3.data(), d_float3, num_elem*sizeof(float));
gpu_device.memcpyDeviceToHost(half_prec1.data(), d_res1_half, num_elem*sizeof(Eigen::half));
gpu_device.memcpyDeviceToHost(full_prec1.data(), d_res1_float, num_elem*sizeof(Eigen::half));
gpu_device.memcpyDeviceToHost(half_prec2.data(), d_res2_half, num_elem*sizeof(Eigen::half));
gpu_device.memcpyDeviceToHost(full_prec2.data(), d_res2_float, num_elem*sizeof(Eigen::half));
gpu_device.memcpyDeviceToHost(half_prec3.data(), d_res3_half, num_elem*sizeof(Eigen::half));
gpu_device.memcpyDeviceToHost(full_prec3.data(), d_res3_float, num_elem*sizeof(Eigen::half));
gpu_device.synchronize();
for (int i = 0; i < num_elem; ++i) {
std::cout << "Checking elemwise exp " << i << " input = " << input1(i) << " full = " << full_prec1(i) << " half = " << half_prec1(i) << std::endl;
VERIFY_IS_APPROX(full_prec1(i), half_prec1(i));
}
for (int i = 0; i < num_elem; ++i) {
std::cout << "Checking elemwise log " << i << " input = " << input2(i) << " full = " << full_prec2(i) << " half = " << half_prec2(i) << std::endl;
if(std::abs(input2(i)-1.f)<0.05f) // log lacks accuracy nearby 1
VERIFY_IS_APPROX(full_prec2(i)+Eigen::half(0.1f), half_prec2(i)+Eigen::half(0.1f));
else
VERIFY_IS_APPROX(full_prec2(i), half_prec2(i));
}
for (int i = 0; i < num_elem; ++i) {
std::cout << "Checking elemwise plog1 " << i << " input = " << input3(i) << " full = " << full_prec3(i) << " half = " << half_prec3(i) << std::endl;
VERIFY_IS_APPROX(full_prec3(i), half_prec3(i));
}
gpu_device.deallocate(d_float1);
gpu_device.deallocate(d_float2);
gpu_device.deallocate(d_float3);
gpu_device.deallocate(d_res1_half);
gpu_device.deallocate(d_res1_float);
gpu_device.deallocate(d_res2_half);
gpu_device.deallocate(d_res2_float);
gpu_device.deallocate(d_res3_float);
gpu_device.deallocate(d_res3_half);
}
template<typename>
void test_gpu_contractions() {
Eigen::GpuStreamDevice stream;
Eigen::GpuDevice gpu_device(&stream);
int rows = 23;
int cols = 23;
int num_elem = rows*cols;
float* d_float1 = (float*)gpu_device.allocate(num_elem * sizeof(float));
float* d_float2 = (float*)gpu_device.allocate(num_elem * sizeof(float));
Eigen::half* d_res_half = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half));
Eigen::half* d_res_float = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half));
Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_float1(
d_float1, rows, cols);
Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_float2(
d_float2, rows, cols);
Eigen::TensorMap<Eigen::Tensor<Eigen::half, 2>, Eigen::Aligned> gpu_res_half(
d_res_half, rows, cols);
Eigen::TensorMap<Eigen::Tensor<Eigen::half, 2>, Eigen::Aligned> gpu_res_float(
d_res_float, rows, cols);
gpu_float1.device(gpu_device) = gpu_float1.random() - gpu_float1.constant(0.5f);
gpu_float2.device(gpu_device) = gpu_float2.random() - gpu_float2.constant(0.5f);
typedef Tensor<float, 2>::DimensionPair DimPair;
Eigen::array<DimPair, 1> dims(DimPair(1, 0));
gpu_res_float.device(gpu_device) = gpu_float1.contract(gpu_float2, dims).cast<Eigen::half>();
gpu_res_half.device(gpu_device) = gpu_float1.cast<Eigen::half>().contract(gpu_float2.cast<Eigen::half>(), dims);
Tensor<Eigen::half, 2> half_prec(rows, cols);
Tensor<Eigen::half, 2> full_prec(rows, cols);
gpu_device.memcpyDeviceToHost(half_prec.data(), d_res_half, num_elem*sizeof(Eigen::half));
gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, num_elem*sizeof(Eigen::half));
gpu_device.synchronize();
for (int i = 0; i < rows; ++i) {
for (int j = 0; j < cols; ++j) {
std::cout << "Checking contract " << i << " " << j << full_prec(i, j) << " " << half_prec(i, j) << std::endl;
if (numext::abs(full_prec(i, j) - half_prec(i, j)) > Eigen::half(1e-2f)) {
VERIFY_IS_APPROX(full_prec(i, j), half_prec(i, j));
}
}
}
gpu_device.deallocate(d_float1);
gpu_device.deallocate(d_float2);
gpu_device.deallocate(d_res_half);
gpu_device.deallocate(d_res_float);
}
template<typename>
void test_gpu_reductions(int size1, int size2, int redux) {
std::cout << "Reducing " << size1 << " by " << size2
<< " tensor along dim " << redux << std::endl;
Eigen::GpuStreamDevice stream;
Eigen::GpuDevice gpu_device(&stream);
int num_elem = size1*size2;
int result_size = (redux == 1 ? size1 : size2);
float* d_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
Eigen::half* d_res_half = (Eigen::half*)gpu_device.allocate(result_size * sizeof(Eigen::half));
Eigen::half* d_res_float = (Eigen::half*)gpu_device.allocate(result_size * sizeof(Eigen::half));
Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_float(
d_float, size1, size2);
Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res_half(
d_res_half, result_size);
Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res_float(
d_res_float, result_size);
gpu_float.device(gpu_device) = gpu_float.random() * 2.0f;
Eigen::array<int, 1> redux_dim = {redux};
gpu_res_float.device(gpu_device) = gpu_float.sum(redux_dim).cast<Eigen::half>();
gpu_res_half.device(gpu_device) = gpu_float.cast<Eigen::half>().sum(redux_dim);
Tensor<Eigen::half, 1> half_prec(result_size);
Tensor<Eigen::half, 1> full_prec(result_size);
gpu_device.memcpyDeviceToHost(half_prec.data(), d_res_half, result_size*sizeof(Eigen::half));
gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, result_size*sizeof(Eigen::half));
gpu_device.synchronize();
for (int i = 0; i < result_size; ++i) {
std::cout << "EXPECTED " << full_prec(i) << " GOT " << half_prec(i) << std::endl;
VERIFY_IS_APPROX(full_prec(i), half_prec(i));
}
gpu_device.deallocate(d_float);
gpu_device.deallocate(d_res_half);
gpu_device.deallocate(d_res_float);
}
template<typename>
void test_gpu_reductions() {
test_gpu_reductions<void>(13, 13, 0);
test_gpu_reductions<void>(13, 13, 1);
test_gpu_reductions<void>(35, 36, 0);
test_gpu_reductions<void>(35, 36, 1);
test_gpu_reductions<void>(36, 35, 0);
test_gpu_reductions<void>(36, 35, 1);
}
template<typename>
void test_gpu_full_reductions() {
Eigen::GpuStreamDevice stream;
Eigen::GpuDevice gpu_device(&stream);
int size = 13;
int num_elem = size*size;
float* d_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
Eigen::half* d_res_half = (Eigen::half*)gpu_device.allocate(1 * sizeof(Eigen::half));
Eigen::half* d_res_float = (Eigen::half*)gpu_device.allocate(1 * sizeof(Eigen::half));
Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_float(
d_float, size, size);
Eigen::TensorMap<Eigen::Tensor<Eigen::half, 0>, Eigen::Aligned> gpu_res_half(
d_res_half);
Eigen::TensorMap<Eigen::Tensor<Eigen::half, 0>, Eigen::Aligned> gpu_res_float(
d_res_float);
gpu_float.device(gpu_device) = gpu_float.random();
gpu_res_float.device(gpu_device) = gpu_float.sum().cast<Eigen::half>();
gpu_res_half.device(gpu_device) = gpu_float.cast<Eigen::half>().sum();
Tensor<Eigen::half, 0> half_prec;
Tensor<Eigen::half, 0> full_prec;
gpu_device.memcpyDeviceToHost(half_prec.data(), d_res_half, sizeof(Eigen::half));
gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, sizeof(Eigen::half));
gpu_device.synchronize();
VERIFY_IS_APPROX(full_prec(), half_prec());
gpu_res_float.device(gpu_device) = gpu_float.maximum().cast<Eigen::half>();
gpu_res_half.device(gpu_device) = gpu_float.cast<Eigen::half>().maximum();
gpu_device.memcpyDeviceToHost(half_prec.data(), d_res_half, sizeof(Eigen::half));
gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, sizeof(Eigen::half));
gpu_device.synchronize();
VERIFY_IS_APPROX(full_prec(), half_prec());
gpu_device.deallocate(d_float);
gpu_device.deallocate(d_res_half);
gpu_device.deallocate(d_res_float);
}
template<typename>
void test_gpu_forced_evals() {
Eigen::GpuStreamDevice stream;
Eigen::GpuDevice gpu_device(&stream);
int num_elem = 101;
float* d_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
float* d_res_half1 = (float*)gpu_device.allocate(num_elem * sizeof(float));
float* d_res_half2 = (float*)gpu_device.allocate(num_elem * sizeof(float));
float* d_res_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float(
d_float, num_elem);
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_half1(
d_res_half1, num_elem);
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Unaligned> gpu_res_half2(
d_res_half2, num_elem);
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_float(
d_res_float, num_elem);
Eigen::array<int, 1> no_bcast;
no_bcast[0] = 1;
gpu_float.device(gpu_device) = gpu_float.random() - gpu_float.constant(0.5f);
gpu_res_float.device(gpu_device) = gpu_float.abs();
gpu_res_half1.device(gpu_device) = gpu_float.cast<Eigen::half>().abs().eval().cast<float>();
gpu_res_half2.device(gpu_device) = gpu_float.cast<Eigen::half>().abs().broadcast(no_bcast).eval().cast<float>();
Tensor<float, 1> half_prec1(num_elem);
Tensor<float, 1> half_prec2(num_elem);
Tensor<float, 1> full_prec(num_elem);
gpu_device.memcpyDeviceToHost(half_prec1.data(), d_res_half1, num_elem*sizeof(float));
gpu_device.memcpyDeviceToHost(half_prec2.data(), d_res_half2, num_elem*sizeof(float));
gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, num_elem*sizeof(float));
gpu_device.synchronize();
for (int i = 0; i < num_elem; ++i) {
std::cout << "Checking forced eval " << i << full_prec(i) << " vs " << half_prec1(i) << " vs " << half_prec2(i) << std::endl;
VERIFY_IS_APPROX(full_prec(i), half_prec1(i));
VERIFY_IS_APPROX(full_prec(i), half_prec2(i));
}
gpu_device.deallocate(d_float);
gpu_device.deallocate(d_res_half1);
gpu_device.deallocate(d_res_half2);
gpu_device.deallocate(d_res_float);
}
#endif
EIGEN_DECLARE_TEST(cxx11_tensor_of_float16_gpu)
{
CALL_SUBTEST_1(test_gpu_numext<void>());
#ifdef EIGEN_HAS_GPU_FP16
CALL_SUBTEST_1(test_gpu_conversion<void>());
CALL_SUBTEST_1(test_gpu_unary<void>());
CALL_SUBTEST_1(test_gpu_elementwise<void>());
CALL_SUBTEST_1(test_gpu_trancendental<void>());
CALL_SUBTEST_2(test_gpu_contractions<void>());
CALL_SUBTEST_3(test_gpu_reductions<void>());
CALL_SUBTEST_4(test_gpu_full_reductions<void>());
CALL_SUBTEST_5(test_gpu_forced_evals<void>());
#else
std::cout << "Half floats are not supported by this version of gpu: skipping the test" << std::endl;
#endif
}

View File

@@ -0,0 +1,152 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
#include <Eigen/CXX11/Tensor>
using Eigen::Tensor;
using Eigen::TensorMap;
static void test_assign()
{
std::string data1[6];
TensorMap<Tensor<std::string, 2>> mat1(data1, 2, 3);
std::string data2[6];
const TensorMap<Tensor<const std::string, 2>> mat2(data2, 2, 3);
for (int i = 0; i < 6; ++i) {
std::ostringstream s1;
s1 << "abc" << i*3;
data1[i] = s1.str();
std::ostringstream s2;
s2 << "def" << i*5;
data2[i] = s2.str();
}
Tensor<std::string, 2> rslt1;
rslt1 = mat1;
Tensor<std::string, 2> rslt2;
rslt2 = mat2;
Tensor<std::string, 2> rslt3 = mat1;
Tensor<std::string, 2> rslt4 = mat2;
Tensor<std::string, 2> rslt5(mat1);
Tensor<std::string, 2> rslt6(mat2);
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
VERIFY_IS_EQUAL(rslt1(i,j), data1[i+2*j]);
VERIFY_IS_EQUAL(rslt2(i,j), data2[i+2*j]);
VERIFY_IS_EQUAL(rslt3(i,j), data1[i+2*j]);
VERIFY_IS_EQUAL(rslt4(i,j), data2[i+2*j]);
VERIFY_IS_EQUAL(rslt5(i,j), data1[i+2*j]);
VERIFY_IS_EQUAL(rslt6(i,j), data2[i+2*j]);
}
}
}
static void test_concat()
{
Tensor<std::string, 2> t1(2, 3);
Tensor<std::string, 2> t2(2, 3);
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
std::ostringstream s1;
s1 << "abc" << i + j*2;
t1(i, j) = s1.str();
std::ostringstream s2;
s2 << "def" << i*5 + j*32;
t2(i, j) = s2.str();
}
}
Tensor<std::string, 2> result = t1.concatenate(t2, 1);
VERIFY_IS_EQUAL(result.dimension(0), 2);
VERIFY_IS_EQUAL(result.dimension(1), 6);
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
VERIFY_IS_EQUAL(result(i, j), t1(i, j));
VERIFY_IS_EQUAL(result(i, j+3), t2(i, j));
}
}
}
static void test_slices()
{
Tensor<std::string, 2> data(2, 6);
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
std::ostringstream s1;
s1 << "abc" << i + j*2;
data(i, j) = s1.str();
}
}
const Eigen::DSizes<ptrdiff_t, 2> half_size(2, 3);
const Eigen::DSizes<ptrdiff_t, 2> first_half(0, 0);
const Eigen::DSizes<ptrdiff_t, 2> second_half(0, 3);
Tensor<std::string, 2> t1 = data.slice(first_half, half_size);
Tensor<std::string, 2> t2 = data.slice(second_half, half_size);
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
VERIFY_IS_EQUAL(data(i, j), t1(i, j));
VERIFY_IS_EQUAL(data(i, j+3), t2(i, j));
}
}
}
static void test_additions()
{
Tensor<std::string, 1> data1(3);
Tensor<std::string, 1> data2(3);
for (int i = 0; i < 3; ++i) {
data1(i) = "abc";
std::ostringstream s1;
s1 << i;
data2(i) = s1.str();
}
Tensor<std::string, 1> sum = data1 + data2;
for (int i = 0; i < 3; ++i) {
std::ostringstream concat;
concat << "abc" << i;
std::string expected = concat.str();
VERIFY_IS_EQUAL(sum(i), expected);
}
}
static void test_initialization()
{
Tensor<std::string, 2> a(2, 3);
a.setConstant(std::string("foo"));
for (int i = 0; i < 2*3; ++i) {
VERIFY_IS_EQUAL(a(i), std::string("foo"));
}
}
EIGEN_DECLARE_TEST(cxx11_tensor_of_strings)
{
// Beware: none of this is likely to ever work on a GPU.
CALL_SUBTEST(test_assign());
CALL_SUBTEST(test_concat());
CALL_SUBTEST(test_slices());
CALL_SUBTEST(test_additions());
CALL_SUBTEST(test_initialization());
}

View File

@@ -0,0 +1,93 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
#include <Eigen/CXX11/Tensor>
using Eigen::Tensor;
template<int DataLayout>
static void test_simple_padding()
{
Tensor<float, 4, DataLayout> tensor(2,3,5,7);
tensor.setRandom();
array<std::pair<ptrdiff_t, ptrdiff_t>, 4> paddings;
paddings[0] = std::make_pair(0, 0);
paddings[1] = std::make_pair(2, 1);
paddings[2] = std::make_pair(3, 4);
paddings[3] = std::make_pair(0, 0);
Tensor<float, 4, DataLayout> padded;
padded = tensor.pad(paddings);
VERIFY_IS_EQUAL(padded.dimension(0), 2+0);
VERIFY_IS_EQUAL(padded.dimension(1), 3+3);
VERIFY_IS_EQUAL(padded.dimension(2), 5+7);
VERIFY_IS_EQUAL(padded.dimension(3), 7+0);
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 6; ++j) {
for (int k = 0; k < 12; ++k) {
for (int l = 0; l < 7; ++l) {
if (j >= 2 && j < 5 && k >= 3 && k < 8) {
VERIFY_IS_EQUAL(padded(i,j,k,l), tensor(i,j-2,k-3,l));
} else {
VERIFY_IS_EQUAL(padded(i,j,k,l), 0.0f);
}
}
}
}
}
}
template<int DataLayout>
static void test_padded_expr()
{
Tensor<float, 4, DataLayout> tensor(2,3,5,7);
tensor.setRandom();
array<std::pair<ptrdiff_t, ptrdiff_t>, 4> paddings;
paddings[0] = std::make_pair(0, 0);
paddings[1] = std::make_pair(2, 1);
paddings[2] = std::make_pair(3, 4);
paddings[3] = std::make_pair(0, 0);
Eigen::DSizes<ptrdiff_t, 2> reshape_dims;
reshape_dims[0] = 12;
reshape_dims[1] = 84;
Tensor<float, 2, DataLayout> result;
result = tensor.pad(paddings).reshape(reshape_dims);
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 6; ++j) {
for (int k = 0; k < 12; ++k) {
for (int l = 0; l < 7; ++l) {
const float result_value = DataLayout == ColMajor ?
result(i+2*j,k+12*l) : result(j+6*i,l+7*k);
if (j >= 2 && j < 5 && k >= 3 && k < 8) {
VERIFY_IS_EQUAL(result_value, tensor(i,j-2,k-3,l));
} else {
VERIFY_IS_EQUAL(result_value, 0.0f);
}
}
}
}
}
}
EIGEN_DECLARE_TEST(cxx11_tensor_padding)
{
CALL_SUBTEST(test_simple_padding<ColMajor>());
CALL_SUBTEST(test_simple_padding<RowMajor>());
CALL_SUBTEST(test_padded_expr<ColMajor>());
CALL_SUBTEST(test_padded_expr<RowMajor>());
}

View File

@@ -0,0 +1,157 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2016
// Mehdi Goli Codeplay Software Ltd.
// Ralph Potter Codeplay Software Ltd.
// Luke Iwanski Codeplay Software Ltd.
// Contact: <eigen@codeplay.com>
// Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#define EIGEN_TEST_NO_LONGDOUBLE
#define EIGEN_TEST_NO_COMPLEX
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
#define EIGEN_USE_SYCL
#include "main.h"
#include <unsupported/Eigen/CXX11/Tensor>
using Eigen::array;
using Eigen::SyclDevice;
using Eigen::Tensor;
using Eigen::TensorMap;
template<typename DataType, int DataLayout, typename IndexType>
static void test_simple_padding(const Eigen::SyclDevice& sycl_device)
{
IndexType sizeDim1 = 2;
IndexType sizeDim2 = 3;
IndexType sizeDim3 = 5;
IndexType sizeDim4 = 7;
array<IndexType, 4> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}};
Tensor<DataType, 4, DataLayout, IndexType> tensor(tensorRange);
tensor.setRandom();
array<std::pair<IndexType, IndexType>, 4> paddings;
paddings[0] = std::make_pair(0, 0);
paddings[1] = std::make_pair(2, 1);
paddings[2] = std::make_pair(3, 4);
paddings[3] = std::make_pair(0, 0);
IndexType padedSizeDim1 = 2;
IndexType padedSizeDim2 = 6;
IndexType padedSizeDim3 = 12;
IndexType padedSizeDim4 = 7;
array<IndexType, 4> padedtensorRange = {{padedSizeDim1, padedSizeDim2, padedSizeDim3, padedSizeDim4}};
Tensor<DataType, 4, DataLayout, IndexType> padded(padedtensorRange);
DataType* gpu_data1 = static_cast<DataType*>(sycl_device.allocate(tensor.size()*sizeof(DataType)));
DataType* gpu_data2 = static_cast<DataType*>(sycl_device.allocate(padded.size()*sizeof(DataType)));
TensorMap<Tensor<DataType, 4,DataLayout,IndexType>> gpu1(gpu_data1, tensorRange);
TensorMap<Tensor<DataType, 4,DataLayout,IndexType>> gpu2(gpu_data2, padedtensorRange);
VERIFY_IS_EQUAL(padded.dimension(0), 2+0);
VERIFY_IS_EQUAL(padded.dimension(1), 3+3);
VERIFY_IS_EQUAL(padded.dimension(2), 5+7);
VERIFY_IS_EQUAL(padded.dimension(3), 7+0);
sycl_device.memcpyHostToDevice(gpu_data1, tensor.data(),(tensor.size())*sizeof(DataType));
gpu2.device(sycl_device)=gpu1.pad(paddings);
sycl_device.memcpyDeviceToHost(padded.data(), gpu_data2,(padded.size())*sizeof(DataType));
for (IndexType i = 0; i < padedSizeDim1; ++i) {
for (IndexType j = 0; j < padedSizeDim2; ++j) {
for (IndexType k = 0; k < padedSizeDim3; ++k) {
for (IndexType l = 0; l < padedSizeDim4; ++l) {
if (j >= 2 && j < 5 && k >= 3 && k < 8) {
VERIFY_IS_EQUAL(padded(i,j,k,l), tensor(i,j-2,k-3,l));
} else {
VERIFY_IS_EQUAL(padded(i,j,k,l), 0.0f);
}
}
}
}
}
sycl_device.deallocate(gpu_data1);
sycl_device.deallocate(gpu_data2);
}
template<typename DataType, int DataLayout, typename IndexType>
static void test_padded_expr(const Eigen::SyclDevice& sycl_device)
{
IndexType sizeDim1 = 2;
IndexType sizeDim2 = 3;
IndexType sizeDim3 = 5;
IndexType sizeDim4 = 7;
array<IndexType, 4> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}};
Tensor<DataType, 4, DataLayout, IndexType> tensor(tensorRange);
tensor.setRandom();
array<std::pair<IndexType, IndexType>, 4> paddings;
paddings[0] = std::make_pair(0, 0);
paddings[1] = std::make_pair(2, 1);
paddings[2] = std::make_pair(3, 4);
paddings[3] = std::make_pair(0, 0);
Eigen::DSizes<IndexType, 2> reshape_dims;
reshape_dims[0] = 12;
reshape_dims[1] = 84;
Tensor<DataType, 2, DataLayout, IndexType> result(reshape_dims);
DataType* gpu_data1 = static_cast<DataType*>(sycl_device.allocate(tensor.size()*sizeof(DataType)));
DataType* gpu_data2 = static_cast<DataType*>(sycl_device.allocate(result.size()*sizeof(DataType)));
TensorMap<Tensor<DataType, 4,DataLayout,IndexType>> gpu1(gpu_data1, tensorRange);
TensorMap<Tensor<DataType, 2,DataLayout,IndexType>> gpu2(gpu_data2, reshape_dims);
sycl_device.memcpyHostToDevice(gpu_data1, tensor.data(),(tensor.size())*sizeof(DataType));
gpu2.device(sycl_device)=gpu1.pad(paddings).reshape(reshape_dims);
sycl_device.memcpyDeviceToHost(result.data(), gpu_data2,(result.size())*sizeof(DataType));
for (IndexType i = 0; i < 2; ++i) {
for (IndexType j = 0; j < 6; ++j) {
for (IndexType k = 0; k < 12; ++k) {
for (IndexType l = 0; l < 7; ++l) {
const float result_value = DataLayout == ColMajor ?
result(i+2*j,k+12*l) : result(j+6*i,l+7*k);
if (j >= 2 && j < 5 && k >= 3 && k < 8) {
VERIFY_IS_EQUAL(result_value, tensor(i,j-2,k-3,l));
} else {
VERIFY_IS_EQUAL(result_value, 0.0f);
}
}
}
}
}
sycl_device.deallocate(gpu_data1);
sycl_device.deallocate(gpu_data2);
}
template<typename DataType, typename dev_Selector> void sycl_padding_test_per_device(dev_Selector s){
QueueInterface queueInterface(s);
auto sycl_device = Eigen::SyclDevice(&queueInterface);
test_simple_padding<DataType, RowMajor, int64_t>(sycl_device);
test_simple_padding<DataType, ColMajor, int64_t>(sycl_device);
test_padded_expr<DataType, RowMajor, int64_t>(sycl_device);
test_padded_expr<DataType, ColMajor, int64_t>(sycl_device);
}
EIGEN_DECLARE_TEST(cxx11_tensor_padding_sycl)
{
for (const auto& device :Eigen::get_sycl_supported_devices()) {
CALL_SUBTEST(sycl_padding_test_per_device<float>(device));
}
}

View File

@@ -0,0 +1,172 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
#include <Eigen/CXX11/Tensor>
using Eigen::Tensor;
template<int DataLayout>
static void test_simple_patch()
{
Tensor<float, 4, DataLayout> tensor(2,3,5,7);
tensor.setRandom();
array<ptrdiff_t, 4> patch_dims;
patch_dims[0] = 1;
patch_dims[1] = 1;
patch_dims[2] = 1;
patch_dims[3] = 1;
Tensor<float, 5, DataLayout> no_patch;
no_patch = tensor.extract_patches(patch_dims);
if (DataLayout == ColMajor) {
VERIFY_IS_EQUAL(no_patch.dimension(0), 1);
VERIFY_IS_EQUAL(no_patch.dimension(1), 1);
VERIFY_IS_EQUAL(no_patch.dimension(2), 1);
VERIFY_IS_EQUAL(no_patch.dimension(3), 1);
VERIFY_IS_EQUAL(no_patch.dimension(4), tensor.size());
} else {
VERIFY_IS_EQUAL(no_patch.dimension(0), tensor.size());
VERIFY_IS_EQUAL(no_patch.dimension(1), 1);
VERIFY_IS_EQUAL(no_patch.dimension(2), 1);
VERIFY_IS_EQUAL(no_patch.dimension(3), 1);
VERIFY_IS_EQUAL(no_patch.dimension(4), 1);
}
for (int i = 0; i < tensor.size(); ++i) {
VERIFY_IS_EQUAL(tensor.data()[i], no_patch.data()[i]);
}
patch_dims[0] = 2;
patch_dims[1] = 3;
patch_dims[2] = 5;
patch_dims[3] = 7;
Tensor<float, 5, DataLayout> single_patch;
single_patch = tensor.extract_patches(patch_dims);
if (DataLayout == ColMajor) {
VERIFY_IS_EQUAL(single_patch.dimension(0), 2);
VERIFY_IS_EQUAL(single_patch.dimension(1), 3);
VERIFY_IS_EQUAL(single_patch.dimension(2), 5);
VERIFY_IS_EQUAL(single_patch.dimension(3), 7);
VERIFY_IS_EQUAL(single_patch.dimension(4), 1);
} else {
VERIFY_IS_EQUAL(single_patch.dimension(0), 1);
VERIFY_IS_EQUAL(single_patch.dimension(1), 2);
VERIFY_IS_EQUAL(single_patch.dimension(2), 3);
VERIFY_IS_EQUAL(single_patch.dimension(3), 5);
VERIFY_IS_EQUAL(single_patch.dimension(4), 7);
}
for (int i = 0; i < tensor.size(); ++i) {
VERIFY_IS_EQUAL(tensor.data()[i], single_patch.data()[i]);
}
patch_dims[0] = 1;
patch_dims[1] = 2;
patch_dims[2] = 2;
patch_dims[3] = 1;
Tensor<float, 5, DataLayout> twod_patch;
twod_patch = tensor.extract_patches(patch_dims);
if (DataLayout == ColMajor) {
VERIFY_IS_EQUAL(twod_patch.dimension(0), 1);
VERIFY_IS_EQUAL(twod_patch.dimension(1), 2);
VERIFY_IS_EQUAL(twod_patch.dimension(2), 2);
VERIFY_IS_EQUAL(twod_patch.dimension(3), 1);
VERIFY_IS_EQUAL(twod_patch.dimension(4), 2*2*4*7);
} else {
VERIFY_IS_EQUAL(twod_patch.dimension(0), 2*2*4*7);
VERIFY_IS_EQUAL(twod_patch.dimension(1), 1);
VERIFY_IS_EQUAL(twod_patch.dimension(2), 2);
VERIFY_IS_EQUAL(twod_patch.dimension(3), 2);
VERIFY_IS_EQUAL(twod_patch.dimension(4), 1);
}
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 2; ++j) {
for (int k = 0; k < 4; ++k) {
for (int l = 0; l < 7; ++l) {
int patch_loc;
if (DataLayout == ColMajor) {
patch_loc = i + 2 * (j + 2 * (k + 4 * l));
} else {
patch_loc = l + 7 * (k + 4 * (j + 2 * i));
}
for (int x = 0; x < 2; ++x) {
for (int y = 0; y < 2; ++y) {
if (DataLayout == ColMajor) {
VERIFY_IS_EQUAL(tensor(i,j+x,k+y,l), twod_patch(0,x,y,0,patch_loc));
} else {
VERIFY_IS_EQUAL(tensor(i,j+x,k+y,l), twod_patch(patch_loc,0,x,y,0));
}
}
}
}
}
}
}
patch_dims[0] = 1;
patch_dims[1] = 2;
patch_dims[2] = 3;
patch_dims[3] = 5;
Tensor<float, 5, DataLayout> threed_patch;
threed_patch = tensor.extract_patches(patch_dims);
if (DataLayout == ColMajor) {
VERIFY_IS_EQUAL(threed_patch.dimension(0), 1);
VERIFY_IS_EQUAL(threed_patch.dimension(1), 2);
VERIFY_IS_EQUAL(threed_patch.dimension(2), 3);
VERIFY_IS_EQUAL(threed_patch.dimension(3), 5);
VERIFY_IS_EQUAL(threed_patch.dimension(4), 2*2*3*3);
} else {
VERIFY_IS_EQUAL(threed_patch.dimension(0), 2*2*3*3);
VERIFY_IS_EQUAL(threed_patch.dimension(1), 1);
VERIFY_IS_EQUAL(threed_patch.dimension(2), 2);
VERIFY_IS_EQUAL(threed_patch.dimension(3), 3);
VERIFY_IS_EQUAL(threed_patch.dimension(4), 5);
}
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 2; ++j) {
for (int k = 0; k < 3; ++k) {
for (int l = 0; l < 3; ++l) {
int patch_loc;
if (DataLayout == ColMajor) {
patch_loc = i + 2 * (j + 2 * (k + 3 * l));
} else {
patch_loc = l + 3 * (k + 3 * (j + 2 * i));
}
for (int x = 0; x < 2; ++x) {
for (int y = 0; y < 3; ++y) {
for (int z = 0; z < 5; ++z) {
if (DataLayout == ColMajor) {
VERIFY_IS_EQUAL(tensor(i,j+x,k+y,l+z), threed_patch(0,x,y,z,patch_loc));
} else {
VERIFY_IS_EQUAL(tensor(i,j+x,k+y,l+z), threed_patch(patch_loc,0,x,y,z));
}
}
}
}
}
}
}
}
}
EIGEN_DECLARE_TEST(cxx11_tensor_patch)
{
CALL_SUBTEST(test_simple_patch<ColMajor>());
CALL_SUBTEST(test_simple_patch<RowMajor>());
// CALL_SUBTEST(test_expr_shuffling());
}

View File

@@ -0,0 +1,249 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2016
// Mehdi Goli Codeplay Software Ltd.
// Ralph Potter Codeplay Software Ltd.
// Luke Iwanski Codeplay Software Ltd.
// Contact: <eigen@codeplay.com>
// Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#define EIGEN_TEST_NO_LONGDOUBLE
#define EIGEN_TEST_NO_COMPLEX
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
#define EIGEN_USE_SYCL
#include "main.h"
#include <Eigen/CXX11/Tensor>
using Eigen::Tensor;
template <typename DataType, int DataLayout, typename IndexType>
static void test_simple_patch_sycl(const Eigen::SyclDevice& sycl_device){
IndexType sizeDim1 = 2;
IndexType sizeDim2 = 3;
IndexType sizeDim3 = 5;
IndexType sizeDim4 = 7;
array<IndexType, 4> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}};
array<IndexType, 5> patchTensorRange;
if (DataLayout == ColMajor) {
patchTensorRange = {{1, 1, 1, 1, sizeDim1*sizeDim2*sizeDim3*sizeDim4}};
}else{
patchTensorRange = {{sizeDim1*sizeDim2*sizeDim3*sizeDim4,1, 1, 1, 1}};
}
Tensor<DataType, 4, DataLayout,IndexType> tensor(tensorRange);
Tensor<DataType, 5, DataLayout,IndexType> no_patch(patchTensorRange);
tensor.setRandom();
array<ptrdiff_t, 4> patch_dims;
patch_dims[0] = 1;
patch_dims[1] = 1;
patch_dims[2] = 1;
patch_dims[3] = 1;
const size_t tensorBuffSize =tensor.size()*sizeof(DataType);
size_t patchTensorBuffSize =no_patch.size()*sizeof(DataType);
DataType* gpu_data_tensor = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize));
DataType* gpu_data_no_patch = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_tensor(gpu_data_tensor, tensorRange);
TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_no_patch(gpu_data_no_patch, patchTensorRange);
sycl_device.memcpyHostToDevice(gpu_data_tensor, tensor.data(), tensorBuffSize);
gpu_no_patch.device(sycl_device)=gpu_tensor.extract_patches(patch_dims);
sycl_device.memcpyDeviceToHost(no_patch.data(), gpu_data_no_patch, patchTensorBuffSize);
if (DataLayout == ColMajor) {
VERIFY_IS_EQUAL(no_patch.dimension(0), 1);
VERIFY_IS_EQUAL(no_patch.dimension(1), 1);
VERIFY_IS_EQUAL(no_patch.dimension(2), 1);
VERIFY_IS_EQUAL(no_patch.dimension(3), 1);
VERIFY_IS_EQUAL(no_patch.dimension(4), tensor.size());
} else {
VERIFY_IS_EQUAL(no_patch.dimension(0), tensor.size());
VERIFY_IS_EQUAL(no_patch.dimension(1), 1);
VERIFY_IS_EQUAL(no_patch.dimension(2), 1);
VERIFY_IS_EQUAL(no_patch.dimension(3), 1);
VERIFY_IS_EQUAL(no_patch.dimension(4), 1);
}
for (int i = 0; i < tensor.size(); ++i) {
VERIFY_IS_EQUAL(tensor.data()[i], no_patch.data()[i]);
}
patch_dims[0] = 2;
patch_dims[1] = 3;
patch_dims[2] = 5;
patch_dims[3] = 7;
if (DataLayout == ColMajor) {
patchTensorRange = {{sizeDim1,sizeDim2,sizeDim3,sizeDim4,1}};
}else{
patchTensorRange = {{1,sizeDim1,sizeDim2,sizeDim3,sizeDim4}};
}
Tensor<DataType, 5, DataLayout,IndexType> single_patch(patchTensorRange);
patchTensorBuffSize =single_patch.size()*sizeof(DataType);
DataType* gpu_data_single_patch = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_single_patch(gpu_data_single_patch, patchTensorRange);
gpu_single_patch.device(sycl_device)=gpu_tensor.extract_patches(patch_dims);
sycl_device.memcpyDeviceToHost(single_patch.data(), gpu_data_single_patch, patchTensorBuffSize);
if (DataLayout == ColMajor) {
VERIFY_IS_EQUAL(single_patch.dimension(0), 2);
VERIFY_IS_EQUAL(single_patch.dimension(1), 3);
VERIFY_IS_EQUAL(single_patch.dimension(2), 5);
VERIFY_IS_EQUAL(single_patch.dimension(3), 7);
VERIFY_IS_EQUAL(single_patch.dimension(4), 1);
} else {
VERIFY_IS_EQUAL(single_patch.dimension(0), 1);
VERIFY_IS_EQUAL(single_patch.dimension(1), 2);
VERIFY_IS_EQUAL(single_patch.dimension(2), 3);
VERIFY_IS_EQUAL(single_patch.dimension(3), 5);
VERIFY_IS_EQUAL(single_patch.dimension(4), 7);
}
for (int i = 0; i < tensor.size(); ++i) {
VERIFY_IS_EQUAL(tensor.data()[i], single_patch.data()[i]);
}
patch_dims[0] = 1;
patch_dims[1] = 2;
patch_dims[2] = 2;
patch_dims[3] = 1;
if (DataLayout == ColMajor) {
patchTensorRange = {{1,2,2,1,2*2*4*7}};
}else{
patchTensorRange = {{2*2*4*7, 1, 2,2,1}};
}
Tensor<DataType, 5, DataLayout,IndexType> twod_patch(patchTensorRange);
patchTensorBuffSize =twod_patch.size()*sizeof(DataType);
DataType* gpu_data_twod_patch = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_twod_patch(gpu_data_twod_patch, patchTensorRange);
gpu_twod_patch.device(sycl_device)=gpu_tensor.extract_patches(patch_dims);
sycl_device.memcpyDeviceToHost(twod_patch.data(), gpu_data_twod_patch, patchTensorBuffSize);
if (DataLayout == ColMajor) {
VERIFY_IS_EQUAL(twod_patch.dimension(0), 1);
VERIFY_IS_EQUAL(twod_patch.dimension(1), 2);
VERIFY_IS_EQUAL(twod_patch.dimension(2), 2);
VERIFY_IS_EQUAL(twod_patch.dimension(3), 1);
VERIFY_IS_EQUAL(twod_patch.dimension(4), 2*2*4*7);
} else {
VERIFY_IS_EQUAL(twod_patch.dimension(0), 2*2*4*7);
VERIFY_IS_EQUAL(twod_patch.dimension(1), 1);
VERIFY_IS_EQUAL(twod_patch.dimension(2), 2);
VERIFY_IS_EQUAL(twod_patch.dimension(3), 2);
VERIFY_IS_EQUAL(twod_patch.dimension(4), 1);
}
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 2; ++j) {
for (int k = 0; k < 4; ++k) {
for (int l = 0; l < 7; ++l) {
int patch_loc;
if (DataLayout == ColMajor) {
patch_loc = i + 2 * (j + 2 * (k + 4 * l));
} else {
patch_loc = l + 7 * (k + 4 * (j + 2 * i));
}
for (int x = 0; x < 2; ++x) {
for (int y = 0; y < 2; ++y) {
if (DataLayout == ColMajor) {
VERIFY_IS_EQUAL(tensor(i,j+x,k+y,l), twod_patch(0,x,y,0,patch_loc));
} else {
VERIFY_IS_EQUAL(tensor(i,j+x,k+y,l), twod_patch(patch_loc,0,x,y,0));
}
}
}
}
}
}
}
patch_dims[0] = 1;
patch_dims[1] = 2;
patch_dims[2] = 3;
patch_dims[3] = 5;
if (DataLayout == ColMajor) {
patchTensorRange = {{1,2,3,5,2*2*3*3}};
}else{
patchTensorRange = {{2*2*3*3, 1, 2,3,5}};
}
Tensor<DataType, 5, DataLayout,IndexType> threed_patch(patchTensorRange);
patchTensorBuffSize =threed_patch.size()*sizeof(DataType);
DataType* gpu_data_threed_patch = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_threed_patch(gpu_data_threed_patch, patchTensorRange);
gpu_threed_patch.device(sycl_device)=gpu_tensor.extract_patches(patch_dims);
sycl_device.memcpyDeviceToHost(threed_patch.data(), gpu_data_threed_patch, patchTensorBuffSize);
if (DataLayout == ColMajor) {
VERIFY_IS_EQUAL(threed_patch.dimension(0), 1);
VERIFY_IS_EQUAL(threed_patch.dimension(1), 2);
VERIFY_IS_EQUAL(threed_patch.dimension(2), 3);
VERIFY_IS_EQUAL(threed_patch.dimension(3), 5);
VERIFY_IS_EQUAL(threed_patch.dimension(4), 2*2*3*3);
} else {
VERIFY_IS_EQUAL(threed_patch.dimension(0), 2*2*3*3);
VERIFY_IS_EQUAL(threed_patch.dimension(1), 1);
VERIFY_IS_EQUAL(threed_patch.dimension(2), 2);
VERIFY_IS_EQUAL(threed_patch.dimension(3), 3);
VERIFY_IS_EQUAL(threed_patch.dimension(4), 5);
}
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 2; ++j) {
for (int k = 0; k < 3; ++k) {
for (int l = 0; l < 3; ++l) {
int patch_loc;
if (DataLayout == ColMajor) {
patch_loc = i + 2 * (j + 2 * (k + 3 * l));
} else {
patch_loc = l + 3 * (k + 3 * (j + 2 * i));
}
for (int x = 0; x < 2; ++x) {
for (int y = 0; y < 3; ++y) {
for (int z = 0; z < 5; ++z) {
if (DataLayout == ColMajor) {
VERIFY_IS_EQUAL(tensor(i,j+x,k+y,l+z), threed_patch(0,x,y,z,patch_loc));
} else {
VERIFY_IS_EQUAL(tensor(i,j+x,k+y,l+z), threed_patch(patch_loc,0,x,y,z));
}
}
}
}
}
}
}
}
sycl_device.deallocate(gpu_data_tensor);
sycl_device.deallocate(gpu_data_no_patch);
sycl_device.deallocate(gpu_data_single_patch);
sycl_device.deallocate(gpu_data_twod_patch);
sycl_device.deallocate(gpu_data_threed_patch);
}
template<typename DataType, typename dev_Selector> void sycl_tensor_patch_test_per_device(dev_Selector s){
QueueInterface queueInterface(s);
auto sycl_device = Eigen::SyclDevice(&queueInterface);
test_simple_patch_sycl<DataType, RowMajor, int64_t>(sycl_device);
test_simple_patch_sycl<DataType, ColMajor, int64_t>(sycl_device);
}
EIGEN_DECLARE_TEST(cxx11_tensor_patch_sycl)
{
for (const auto& device :Eigen::get_sycl_supported_devices()) {
CALL_SUBTEST(sycl_tensor_patch_test_per_device<float>(device));
}
}

View File

@@ -0,0 +1,86 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
#include <Eigen/CXX11/Tensor>
template<typename Scalar>
static void test_default()
{
Tensor<Scalar, 1> vec(6);
vec.setRandom();
// Fixme: we should check that the generated numbers follow a uniform
// distribution instead.
for (int i = 1; i < 6; ++i) {
VERIFY_IS_NOT_EQUAL(vec(i), vec(i-1));
}
}
template<typename Scalar>
static void test_normal()
{
Tensor<Scalar, 1> vec(6);
vec.template setRandom<Eigen::internal::NormalRandomGenerator<Scalar>>();
// Fixme: we should check that the generated numbers follow a gaussian
// distribution instead.
for (int i = 1; i < 6; ++i) {
VERIFY_IS_NOT_EQUAL(vec(i), vec(i-1));
}
}
struct MyGenerator {
MyGenerator() { }
MyGenerator(const MyGenerator&) { }
// Return a random value to be used. "element_location" is the
// location of the entry to set in the tensor, it can typically
// be ignored.
int operator()(Eigen::DenseIndex element_location, Eigen::DenseIndex /*unused*/ = 0) const {
return static_cast<int>(3 * element_location);
}
// Same as above but generates several numbers at a time.
internal::packet_traits<int>::type packetOp(
Eigen::DenseIndex packet_location, Eigen::DenseIndex /*unused*/ = 0) const {
const int packetSize = internal::packet_traits<int>::size;
EIGEN_ALIGN_MAX int values[packetSize];
for (int i = 0; i < packetSize; ++i) {
values[i] = static_cast<int>(3 * (packet_location + i));
}
return internal::pload<typename internal::packet_traits<int>::type>(values);
}
};
static void test_custom()
{
Tensor<int, 1> vec(6);
vec.setRandom<MyGenerator>();
for (int i = 0; i < 6; ++i) {
VERIFY_IS_EQUAL(vec(i), 3*i);
}
}
EIGEN_DECLARE_TEST(cxx11_tensor_random)
{
CALL_SUBTEST((test_default<float>()));
CALL_SUBTEST((test_normal<float>()));
CALL_SUBTEST((test_default<double>()));
CALL_SUBTEST((test_normal<double>()));
CALL_SUBTEST((test_default<Eigen::half>()));
CALL_SUBTEST((test_normal<Eigen::half>()));
CALL_SUBTEST((test_default<Eigen::bfloat16>()));
CALL_SUBTEST((test_normal<Eigen::bfloat16>()));
CALL_SUBTEST(test_custom());
}

View File

@@ -0,0 +1,86 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#define EIGEN_TEST_NO_LONGDOUBLE
#define EIGEN_TEST_NO_COMPLEX
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
#define EIGEN_USE_GPU
#include "main.h"
#include <Eigen/CXX11/Tensor>
#include <Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h>
void test_gpu_random_uniform()
{
Tensor<float, 2> out(72,97);
out.setZero();
std::size_t out_bytes = out.size() * sizeof(float);
float* d_out;
gpuMalloc((void**)(&d_out), out_bytes);
Eigen::GpuStreamDevice stream;
Eigen::GpuDevice gpu_device(&stream);
Eigen::TensorMap<Eigen::Tensor<float, 2> > gpu_out(d_out, 72,97);
gpu_out.device(gpu_device) = gpu_out.random();
assert(gpuMemcpyAsync(out.data(), d_out, out_bytes, gpuMemcpyDeviceToHost, gpu_device.stream()) == gpuSuccess);
assert(gpuStreamSynchronize(gpu_device.stream()) == gpuSuccess);
// For now we just check this code doesn't crash.
// TODO: come up with a valid test of randomness
}
void test_gpu_random_normal()
{
Tensor<float, 2> out(72,97);
out.setZero();
std::size_t out_bytes = out.size() * sizeof(float);
float* d_out;
gpuMalloc((void**)(&d_out), out_bytes);
Eigen::GpuStreamDevice stream;
Eigen::GpuDevice gpu_device(&stream);
Eigen::TensorMap<Eigen::Tensor<float, 2> > gpu_out(d_out, 72,97);
Eigen::internal::NormalRandomGenerator<float> gen(true);
gpu_out.device(gpu_device) = gpu_out.random(gen);
assert(gpuMemcpyAsync(out.data(), d_out, out_bytes, gpuMemcpyDeviceToHost, gpu_device.stream()) == gpuSuccess);
assert(gpuStreamSynchronize(gpu_device.stream()) == gpuSuccess);
}
static void test_complex()
{
Tensor<std::complex<float>, 1> vec(6);
vec.setRandom();
// Fixme: we should check that the generated numbers follow a uniform
// distribution instead.
for (int i = 1; i < 6; ++i) {
VERIFY_IS_NOT_EQUAL(vec(i), vec(i-1));
}
}
EIGEN_DECLARE_TEST(cxx11_tensor_random_gpu)
{
CALL_SUBTEST(test_gpu_random_uniform());
CALL_SUBTEST(test_gpu_random_normal());
CALL_SUBTEST(test_complex());
}

View File

@@ -0,0 +1,100 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2016
// Mehdi Goli Codeplay Software Ltd.
// Ralph Potter Codeplay Software Ltd.
// Luke Iwanski Codeplay Software Ltd.
// Contact: <eigen@codeplay.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#define EIGEN_TEST_NO_LONGDOUBLE
#define EIGEN_TEST_NO_COMPLEX
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
#define EIGEN_USE_SYCL
#include "main.h"
#include <unsupported/Eigen/CXX11/Tensor>
template <typename DataType, int DataLayout, typename IndexType>
static void test_sycl_random_uniform(const Eigen::SyclDevice& sycl_device)
{
Tensor<DataType, 2,DataLayout, IndexType> out(72,97);
out.setZero();
std::size_t out_bytes = out.size() * sizeof(DataType);
IndexType sizeDim0 = 72;
IndexType sizeDim1 = 97;
array<IndexType, 2> tensorRange = {{sizeDim0, sizeDim1}};
DataType* d_out = static_cast<DataType*>(sycl_device.allocate(out_bytes));
TensorMap<Tensor<DataType, 2, DataLayout, IndexType>> gpu_out(d_out, tensorRange);
gpu_out.device(sycl_device)=gpu_out.random();
sycl_device.memcpyDeviceToHost(out.data(), d_out,out_bytes);
for(IndexType i=1; i<sizeDim0; i++)
for(IndexType j=1; j<sizeDim1; j++)
{
VERIFY_IS_NOT_EQUAL(out(i,j), out(i-1,j));
VERIFY_IS_NOT_EQUAL(out(i,j), out(i,j-1));
VERIFY_IS_NOT_EQUAL(out(i,j), out(i-1,j-1)); }
// For now we just check thes code doesn't crash.
// TODO: come up with a valid test of randomness
sycl_device.deallocate(d_out);
}
template <typename DataType, int DataLayout, typename IndexType>
void test_sycl_random_normal(const Eigen::SyclDevice& sycl_device)
{
Tensor<DataType, 2,DataLayout,IndexType> out(72,97);
out.setZero();
std::size_t out_bytes = out.size() * sizeof(DataType);
IndexType sizeDim0 = 72;
IndexType sizeDim1 = 97;
array<IndexType, 2> tensorRange = {{sizeDim0, sizeDim1}};
DataType* d_out = static_cast<DataType*>(sycl_device.allocate(out_bytes));
TensorMap<Tensor<DataType, 2, DataLayout, IndexType>> gpu_out(d_out, tensorRange);
Eigen::internal::NormalRandomGenerator<DataType> gen(true);
gpu_out.device(sycl_device)=gpu_out.random(gen);
sycl_device.memcpyDeviceToHost(out.data(), d_out,out_bytes);
for(IndexType i=1; i<sizeDim0; i++)
for(IndexType j=1; j<sizeDim1; j++)
{
VERIFY_IS_NOT_EQUAL(out(i,j), out(i-1,j));
VERIFY_IS_NOT_EQUAL(out(i,j), out(i,j-1));
VERIFY_IS_NOT_EQUAL(out(i,j), out(i-1,j-1));
}
// For now we just check thes code doesn't crash.
// TODO: come up with a valid test of randomness
sycl_device.deallocate(d_out);
}
template<typename DataType, typename dev_Selector> void sycl_random_test_per_device(dev_Selector s){
QueueInterface queueInterface(s);
auto sycl_device = Eigen::SyclDevice(&queueInterface);
test_sycl_random_uniform<DataType, RowMajor, int64_t>(sycl_device);
test_sycl_random_uniform<DataType, ColMajor, int64_t>(sycl_device);
test_sycl_random_normal<DataType, RowMajor, int64_t>(sycl_device);
test_sycl_random_normal<DataType, ColMajor, int64_t>(sycl_device);
}
EIGEN_DECLARE_TEST(cxx11_tensor_random_sycl)
{
for (const auto& device :Eigen::get_sycl_supported_devices()) {
CALL_SUBTEST(sycl_random_test_per_device<float>(device));
#ifdef EIGEN_SYCL_DOUBLE_SUPPORT
CALL_SUBTEST(sycl_random_test_per_device<double>(device));
#endif
}
}

View File

@@ -0,0 +1,532 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
#include <limits>
#include <numeric>
#include <Eigen/CXX11/Tensor>
using Eigen::Tensor;
template <int DataLayout>
static void test_trivial_reductions() {
{
Tensor<float, 0, DataLayout> tensor;
tensor.setRandom();
array<ptrdiff_t, 0> reduction_axis;
Tensor<float, 0, DataLayout> result = tensor.sum(reduction_axis);
VERIFY_IS_EQUAL(result(), tensor());
}
{
Tensor<float, 1, DataLayout> tensor(7);
tensor.setRandom();
array<ptrdiff_t, 0> reduction_axis;
Tensor<float, 1, DataLayout> result = tensor.sum(reduction_axis);
VERIFY_IS_EQUAL(result.dimension(0), 7);
for (int i = 0; i < 7; ++i) {
VERIFY_IS_EQUAL(result(i), tensor(i));
}
}
{
Tensor<float, 2, DataLayout> tensor(2, 3);
tensor.setRandom();
array<ptrdiff_t, 0> reduction_axis;
Tensor<float, 2, DataLayout> result = tensor.sum(reduction_axis);
VERIFY_IS_EQUAL(result.dimension(0), 2);
VERIFY_IS_EQUAL(result.dimension(1), 3);
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
VERIFY_IS_EQUAL(result(i, j), tensor(i, j));
}
}
}
}
template <typename Scalar,int DataLayout>
static void test_simple_reductions() {
Tensor<Scalar, 4, DataLayout> tensor(2, 3, 5, 7);
tensor.setRandom();
// Add a little offset so that the product reductions won't be close to zero.
tensor += tensor.constant(Scalar(0.5f));
array<ptrdiff_t, 2> reduction_axis2;
reduction_axis2[0] = 1;
reduction_axis2[1] = 3;
Tensor<Scalar, 2, DataLayout> result = tensor.sum(reduction_axis2);
VERIFY_IS_EQUAL(result.dimension(0), 2);
VERIFY_IS_EQUAL(result.dimension(1), 5);
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 5; ++j) {
Scalar sum = Scalar(0.0f);
for (int k = 0; k < 3; ++k) {
for (int l = 0; l < 7; ++l) {
sum += tensor(i, k, j, l);
}
}
VERIFY_IS_APPROX(result(i, j), sum);
}
}
{
Tensor<Scalar, 0, DataLayout> sum1 = tensor.sum();
VERIFY_IS_EQUAL(sum1.rank(), 0);
array<ptrdiff_t, 4> reduction_axis4;
reduction_axis4[0] = 0;
reduction_axis4[1] = 1;
reduction_axis4[2] = 2;
reduction_axis4[3] = 3;
Tensor<Scalar, 0, DataLayout> sum2 = tensor.sum(reduction_axis4);
VERIFY_IS_EQUAL(sum2.rank(), 0);
VERIFY_IS_APPROX(sum1(), sum2());
}
reduction_axis2[0] = 0;
reduction_axis2[1] = 2;
result = tensor.prod(reduction_axis2);
VERIFY_IS_EQUAL(result.dimension(0), 3);
VERIFY_IS_EQUAL(result.dimension(1), 7);
for (int i = 0; i < 3; ++i) {
for (int j = 0; j < 7; ++j) {
Scalar prod = Scalar(1.0f);
for (int k = 0; k < 2; ++k) {
for (int l = 0; l < 5; ++l) {
prod *= tensor(k, i, l, j);
}
}
VERIFY_IS_APPROX(result(i, j), prod);
}
}
{
Tensor<Scalar, 0, DataLayout> prod1 = tensor.prod();
VERIFY_IS_EQUAL(prod1.rank(), 0);
array<ptrdiff_t, 4> reduction_axis4;
reduction_axis4[0] = 0;
reduction_axis4[1] = 1;
reduction_axis4[2] = 2;
reduction_axis4[3] = 3;
Tensor<Scalar, 0, DataLayout> prod2 = tensor.prod(reduction_axis4);
VERIFY_IS_EQUAL(prod2.rank(), 0);
VERIFY_IS_APPROX(prod1(), prod2());
}
reduction_axis2[0] = 0;
reduction_axis2[1] = 2;
result = tensor.maximum(reduction_axis2);
VERIFY_IS_EQUAL(result.dimension(0), 3);
VERIFY_IS_EQUAL(result.dimension(1), 7);
for (int i = 0; i < 3; ++i) {
for (int j = 0; j < 7; ++j) {
Scalar max_val = std::numeric_limits<Scalar>::lowest();
for (int k = 0; k < 2; ++k) {
for (int l = 0; l < 5; ++l) {
max_val = (std::max)(max_val, tensor(k, i, l, j));
}
}
VERIFY_IS_APPROX(result(i, j), max_val);
}
}
{
Tensor<Scalar, 0, DataLayout> max1 = tensor.maximum();
VERIFY_IS_EQUAL(max1.rank(), 0);
array<ptrdiff_t, 4> reduction_axis4;
reduction_axis4[0] = 0;
reduction_axis4[1] = 1;
reduction_axis4[2] = 2;
reduction_axis4[3] = 3;
Tensor<Scalar, 0, DataLayout> max2 = tensor.maximum(reduction_axis4);
VERIFY_IS_EQUAL(max2.rank(), 0);
VERIFY_IS_APPROX(max1(), max2());
}
reduction_axis2[0] = 0;
reduction_axis2[1] = 1;
result = tensor.minimum(reduction_axis2);
VERIFY_IS_EQUAL(result.dimension(0), 5);
VERIFY_IS_EQUAL(result.dimension(1), 7);
for (int i = 0; i < 5; ++i) {
for (int j = 0; j < 7; ++j) {
Scalar min_val = (std::numeric_limits<Scalar>::max)();
for (int k = 0; k < 2; ++k) {
for (int l = 0; l < 3; ++l) {
min_val = (std::min)(min_val, tensor(k, l, i, j));
}
}
VERIFY_IS_APPROX(result(i, j), min_val);
}
}
{
Tensor<Scalar, 0, DataLayout> min1 = tensor.minimum();
VERIFY_IS_EQUAL(min1.rank(), 0);
array<ptrdiff_t, 4> reduction_axis4;
reduction_axis4[0] = 0;
reduction_axis4[1] = 1;
reduction_axis4[2] = 2;
reduction_axis4[3] = 3;
Tensor<Scalar, 0, DataLayout> min2 = tensor.minimum(reduction_axis4);
VERIFY_IS_EQUAL(min2.rank(), 0);
VERIFY_IS_APPROX(min1(), min2());
}
reduction_axis2[0] = 0;
reduction_axis2[1] = 1;
result = tensor.mean(reduction_axis2);
VERIFY_IS_EQUAL(result.dimension(0), 5);
VERIFY_IS_EQUAL(result.dimension(1), 7);
for (int i = 0; i < 5; ++i) {
for (int j = 0; j < 7; ++j) {
Scalar sum = Scalar(0.0f);
int count = 0;
for (int k = 0; k < 2; ++k) {
for (int l = 0; l < 3; ++l) {
sum += tensor(k, l, i, j);
++count;
}
}
VERIFY_IS_APPROX(result(i, j), sum / Scalar(count));
}
}
{
Tensor<Scalar, 0, DataLayout> mean1 = tensor.mean();
VERIFY_IS_EQUAL(mean1.rank(), 0);
array<ptrdiff_t, 4> reduction_axis4;
reduction_axis4[0] = 0;
reduction_axis4[1] = 1;
reduction_axis4[2] = 2;
reduction_axis4[3] = 3;
Tensor<Scalar, 0, DataLayout> mean2 = tensor.mean(reduction_axis4);
VERIFY_IS_EQUAL(mean2.rank(), 0);
VERIFY_IS_APPROX(mean1(), mean2());
}
{
Tensor<int, 1> ints(10);
std::iota(ints.data(), ints.data() + ints.dimension(0), 0);
TensorFixedSize<bool, Sizes<> > all_;
all_ = ints.all();
VERIFY(!all_());
all_ = (ints >= ints.constant(0)).all();
VERIFY(all_());
TensorFixedSize<bool, Sizes<> > any;
any = (ints > ints.constant(10)).any();
VERIFY(!any());
any = (ints < ints.constant(1)).any();
VERIFY(any());
}
}
template <int DataLayout>
static void test_reductions_in_expr() {
Tensor<float, 4, DataLayout> tensor(2, 3, 5, 7);
tensor.setRandom();
array<ptrdiff_t, 2> reduction_axis2;
reduction_axis2[0] = 1;
reduction_axis2[1] = 3;
Tensor<float, 2, DataLayout> result(2, 5);
result = result.constant(1.0f) - tensor.sum(reduction_axis2);
VERIFY_IS_EQUAL(result.dimension(0), 2);
VERIFY_IS_EQUAL(result.dimension(1), 5);
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 5; ++j) {
float sum = 0.0f;
for (int k = 0; k < 3; ++k) {
for (int l = 0; l < 7; ++l) {
sum += tensor(i, k, j, l);
}
}
VERIFY_IS_APPROX(result(i, j), 1.0f - sum);
}
}
}
template <int DataLayout>
static void test_full_reductions() {
Tensor<float, 2, DataLayout> tensor(2, 3);
tensor.setRandom();
array<ptrdiff_t, 2> reduction_axis;
reduction_axis[0] = 0;
reduction_axis[1] = 1;
Tensor<float, 0, DataLayout> result = tensor.sum(reduction_axis);
VERIFY_IS_EQUAL(result.rank(), 0);
float sum = 0.0f;
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
sum += tensor(i, j);
}
}
VERIFY_IS_APPROX(result(0), sum);
result = tensor.square().sum(reduction_axis).sqrt();
VERIFY_IS_EQUAL(result.rank(), 0);
sum = 0.0f;
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
sum += tensor(i, j) * tensor(i, j);
}
}
VERIFY_IS_APPROX(result(), sqrtf(sum));
}
struct UserReducer {
static const bool PacketAccess = false;
UserReducer(float offset) : offset_(offset) {}
void reduce(const float val, float* accum) { *accum += val * val; }
float initialize() const { return 0; }
float finalize(const float accum) const { return 1.0f / (accum + offset_); }
private:
const float offset_;
};
template <int DataLayout>
static void test_user_defined_reductions() {
Tensor<float, 2, DataLayout> tensor(5, 7);
tensor.setRandom();
array<ptrdiff_t, 1> reduction_axis;
reduction_axis[0] = 1;
UserReducer reducer(10.0f);
Tensor<float, 1, DataLayout> result = tensor.reduce(reduction_axis, reducer);
VERIFY_IS_EQUAL(result.dimension(0), 5);
for (int i = 0; i < 5; ++i) {
float expected = 10.0f;
for (int j = 0; j < 7; ++j) {
expected += tensor(i, j) * tensor(i, j);
}
expected = 1.0f / expected;
VERIFY_IS_APPROX(result(i), expected);
}
}
template <int DataLayout>
static void test_tensor_maps() {
int inputs[2 * 3 * 5 * 7];
TensorMap<Tensor<int, 4, DataLayout> > tensor_map(inputs, 2, 3, 5, 7);
TensorMap<Tensor<const int, 4, DataLayout> > tensor_map_const(inputs, 2, 3, 5,
7);
const TensorMap<Tensor<const int, 4, DataLayout> > tensor_map_const_const(
inputs, 2, 3, 5, 7);
tensor_map.setRandom();
array<ptrdiff_t, 2> reduction_axis;
reduction_axis[0] = 1;
reduction_axis[1] = 3;
Tensor<int, 2, DataLayout> result = tensor_map.sum(reduction_axis);
Tensor<int, 2, DataLayout> result2 = tensor_map_const.sum(reduction_axis);
Tensor<int, 2, DataLayout> result3 =
tensor_map_const_const.sum(reduction_axis);
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 5; ++j) {
int sum = 0;
for (int k = 0; k < 3; ++k) {
for (int l = 0; l < 7; ++l) {
sum += tensor_map(i, k, j, l);
}
}
VERIFY_IS_EQUAL(result(i, j), sum);
VERIFY_IS_EQUAL(result2(i, j), sum);
VERIFY_IS_EQUAL(result3(i, j), sum);
}
}
}
template <int DataLayout>
static void test_static_dims() {
Tensor<float, 4, DataLayout> in(72, 53, 97, 113);
Tensor<float, 2, DataLayout> out(72, 97);
in.setRandom();
#if !EIGEN_HAS_CONSTEXPR
array<int, 2> reduction_axis;
reduction_axis[0] = 1;
reduction_axis[1] = 3;
#else
Eigen::IndexList<Eigen::type2index<1>, Eigen::type2index<3> > reduction_axis;
#endif
out = in.maximum(reduction_axis);
for (int i = 0; i < 72; ++i) {
for (int j = 0; j < 97; ++j) {
float expected = -1e10f;
for (int k = 0; k < 53; ++k) {
for (int l = 0; l < 113; ++l) {
expected = (std::max)(expected, in(i, k, j, l));
}
}
VERIFY_IS_EQUAL(out(i, j), expected);
}
}
}
template <int DataLayout>
static void test_innermost_last_dims() {
Tensor<float, 4, DataLayout> in(72, 53, 97, 113);
Tensor<float, 2, DataLayout> out(97, 113);
in.setRandom();
// Reduce on the innermost dimensions.
#if !EIGEN_HAS_CONSTEXPR
array<int, 2> reduction_axis;
reduction_axis[0] = 0;
reduction_axis[1] = 1;
#else
// This triggers the use of packets for ColMajor.
Eigen::IndexList<Eigen::type2index<0>, Eigen::type2index<1> > reduction_axis;
#endif
out = in.maximum(reduction_axis);
for (int i = 0; i < 97; ++i) {
for (int j = 0; j < 113; ++j) {
float expected = -1e10f;
for (int k = 0; k < 53; ++k) {
for (int l = 0; l < 72; ++l) {
expected = (std::max)(expected, in(l, k, i, j));
}
}
VERIFY_IS_EQUAL(out(i, j), expected);
}
}
}
template <int DataLayout>
static void test_innermost_first_dims() {
Tensor<float, 4, DataLayout> in(72, 53, 97, 113);
Tensor<float, 2, DataLayout> out(72, 53);
in.setRandom();
// Reduce on the innermost dimensions.
#if !EIGEN_HAS_CONSTEXPR
array<int, 2> reduction_axis;
reduction_axis[0] = 2;
reduction_axis[1] = 3;
#else
// This triggers the use of packets for RowMajor.
Eigen::IndexList<Eigen::type2index<2>, Eigen::type2index<3>> reduction_axis;
#endif
out = in.maximum(reduction_axis);
for (int i = 0; i < 72; ++i) {
for (int j = 0; j < 53; ++j) {
float expected = -1e10f;
for (int k = 0; k < 97; ++k) {
for (int l = 0; l < 113; ++l) {
expected = (std::max)(expected, in(i, j, k, l));
}
}
VERIFY_IS_EQUAL(out(i, j), expected);
}
}
}
template <int DataLayout>
static void test_reduce_middle_dims() {
Tensor<float, 4, DataLayout> in(72, 53, 97, 113);
Tensor<float, 2, DataLayout> out(72, 53);
in.setRandom();
// Reduce on the innermost dimensions.
#if !EIGEN_HAS_CONSTEXPR
array<int, 2> reduction_axis;
reduction_axis[0] = 1;
reduction_axis[1] = 2;
#else
// This triggers the use of packets for RowMajor.
Eigen::IndexList<Eigen::type2index<1>, Eigen::type2index<2>> reduction_axis;
#endif
out = in.maximum(reduction_axis);
for (int i = 0; i < 72; ++i) {
for (int j = 0; j < 113; ++j) {
float expected = -1e10f;
for (int k = 0; k < 53; ++k) {
for (int l = 0; l < 97; ++l) {
expected = (std::max)(expected, in(i, k, l, j));
}
}
VERIFY_IS_EQUAL(out(i, j), expected);
}
}
}
static void test_sum_accuracy() {
Tensor<float, 3> tensor(101, 101, 101);
for (float prescribed_mean : {1.0f, 10.0f, 100.0f, 1000.0f, 10000.0f}) {
tensor.setRandom();
tensor += tensor.constant(prescribed_mean);
Tensor<float, 0> sum = tensor.sum();
double expected_sum = 0.0;
for (int i = 0; i < 101; ++i) {
for (int j = 0; j < 101; ++j) {
for (int k = 0; k < 101; ++k) {
expected_sum += static_cast<double>(tensor(i, j, k));
}
}
}
VERIFY_IS_APPROX(sum(), static_cast<float>(expected_sum));
}
}
EIGEN_DECLARE_TEST(cxx11_tensor_reduction) {
CALL_SUBTEST(test_trivial_reductions<ColMajor>());
CALL_SUBTEST(test_trivial_reductions<RowMajor>());
CALL_SUBTEST(( test_simple_reductions<float,ColMajor>() ));
CALL_SUBTEST(( test_simple_reductions<float,RowMajor>() ));
CALL_SUBTEST(( test_simple_reductions<Eigen::half,ColMajor>() ));
CALL_SUBTEST(( test_simple_reductions<Eigen::bfloat16,ColMajor>() ));
CALL_SUBTEST(test_reductions_in_expr<ColMajor>());
CALL_SUBTEST(test_reductions_in_expr<RowMajor>());
CALL_SUBTEST(test_full_reductions<ColMajor>());
CALL_SUBTEST(test_full_reductions<RowMajor>());
CALL_SUBTEST(test_user_defined_reductions<ColMajor>());
CALL_SUBTEST(test_user_defined_reductions<RowMajor>());
CALL_SUBTEST(test_tensor_maps<ColMajor>());
CALL_SUBTEST(test_tensor_maps<RowMajor>());
CALL_SUBTEST(test_static_dims<ColMajor>());
CALL_SUBTEST(test_static_dims<RowMajor>());
CALL_SUBTEST(test_innermost_last_dims<ColMajor>());
CALL_SUBTEST(test_innermost_last_dims<RowMajor>());
CALL_SUBTEST(test_innermost_first_dims<ColMajor>());
CALL_SUBTEST(test_innermost_first_dims<RowMajor>());
CALL_SUBTEST(test_reduce_middle_dims<ColMajor>());
CALL_SUBTEST(test_reduce_middle_dims<RowMajor>());
CALL_SUBTEST(test_sum_accuracy());
}

View File

@@ -0,0 +1,154 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#define EIGEN_TEST_NO_LONGDOUBLE
#define EIGEN_TEST_NO_COMPLEX
#define EIGEN_USE_GPU
#include "main.h"
#include <unsupported/Eigen/CXX11/Tensor>
template<typename Type, int DataLayout>
static void test_full_reductions() {
Eigen::GpuStreamDevice stream;
Eigen::GpuDevice gpu_device(&stream);
const int num_rows = internal::random<int>(1024, 5*1024);
const int num_cols = internal::random<int>(1024, 5*1024);
Tensor<Type, 2, DataLayout> in(num_rows, num_cols);
in.setRandom();
Tensor<Type, 0, DataLayout> full_redux;
full_redux = in.sum();
std::size_t in_bytes = in.size() * sizeof(Type);
std::size_t out_bytes = full_redux.size() * sizeof(Type);
Type* gpu_in_ptr = static_cast<Type*>(gpu_device.allocate(in_bytes));
Type* gpu_out_ptr = static_cast<Type*>(gpu_device.allocate(out_bytes));
gpu_device.memcpyHostToDevice(gpu_in_ptr, in.data(), in_bytes);
TensorMap<Tensor<Type, 2, DataLayout> > in_gpu(gpu_in_ptr, num_rows, num_cols);
TensorMap<Tensor<Type, 0, DataLayout> > out_gpu(gpu_out_ptr);
out_gpu.device(gpu_device) = in_gpu.sum();
Tensor<Type, 0, DataLayout> full_redux_gpu;
gpu_device.memcpyDeviceToHost(full_redux_gpu.data(), gpu_out_ptr, out_bytes);
gpu_device.synchronize();
// Check that the CPU and GPU reductions return the same result.
VERIFY_IS_APPROX(full_redux(), full_redux_gpu());
gpu_device.deallocate(gpu_in_ptr);
gpu_device.deallocate(gpu_out_ptr);
}
template<typename Type, int DataLayout>
static void test_first_dim_reductions() {
int dim_x = 33;
int dim_y = 1;
int dim_z = 128;
Tensor<Type, 3, DataLayout> in(dim_x, dim_y, dim_z);
in.setRandom();
Eigen::array<int, 1> red_axis;
red_axis[0] = 0;
Tensor<Type, 2, DataLayout> redux = in.sum(red_axis);
// Create device
Eigen::GpuStreamDevice stream;
Eigen::GpuDevice dev(&stream);
// Create data(T)
Type* in_data = (Type*)dev.allocate(dim_x*dim_y*dim_z*sizeof(Type));
Type* out_data = (Type*)dev.allocate(dim_z*dim_y*sizeof(Type));
Eigen::TensorMap<Eigen::Tensor<Type, 3, DataLayout> > gpu_in(in_data, dim_x, dim_y, dim_z);
Eigen::TensorMap<Eigen::Tensor<Type, 2, DataLayout> > gpu_out(out_data, dim_y, dim_z);
// Perform operation
dev.memcpyHostToDevice(in_data, in.data(), in.size()*sizeof(Type));
gpu_out.device(dev) = gpu_in.sum(red_axis);
gpu_out.device(dev) += gpu_in.sum(red_axis);
Tensor<Type, 2, DataLayout> redux_gpu(dim_y, dim_z);
dev.memcpyDeviceToHost(redux_gpu.data(), out_data, gpu_out.size()*sizeof(Type));
dev.synchronize();
// Check that the CPU and GPU reductions return the same result.
for (int i = 0; i < gpu_out.size(); ++i) {
VERIFY_IS_APPROX(2*redux(i), redux_gpu(i));
}
dev.deallocate(in_data);
dev.deallocate(out_data);
}
template<typename Type, int DataLayout>
static void test_last_dim_reductions() {
int dim_x = 128;
int dim_y = 1;
int dim_z = 33;
Tensor<Type, 3, DataLayout> in(dim_x, dim_y, dim_z);
in.setRandom();
Eigen::array<int, 1> red_axis;
red_axis[0] = 2;
Tensor<Type, 2, DataLayout> redux = in.sum(red_axis);
// Create device
Eigen::GpuStreamDevice stream;
Eigen::GpuDevice dev(&stream);
// Create data
Type* in_data = (Type*)dev.allocate(dim_x*dim_y*dim_z*sizeof(Type));
Type* out_data = (Type*)dev.allocate(dim_x*dim_y*sizeof(Type));
Eigen::TensorMap<Eigen::Tensor<Type, 3, DataLayout> > gpu_in(in_data, dim_x, dim_y, dim_z);
Eigen::TensorMap<Eigen::Tensor<Type, 2, DataLayout> > gpu_out(out_data, dim_x, dim_y);
// Perform operation
dev.memcpyHostToDevice(in_data, in.data(), in.size()*sizeof(Type));
gpu_out.device(dev) = gpu_in.sum(red_axis);
gpu_out.device(dev) += gpu_in.sum(red_axis);
Tensor<Type, 2, DataLayout> redux_gpu(dim_x, dim_y);
dev.memcpyDeviceToHost(redux_gpu.data(), out_data, gpu_out.size()*sizeof(Type));
dev.synchronize();
// Check that the CPU and GPU reductions return the same result.
for (int i = 0; i < gpu_out.size(); ++i) {
VERIFY_IS_APPROX(2*redux(i), redux_gpu(i));
}
dev.deallocate(in_data);
dev.deallocate(out_data);
}
EIGEN_DECLARE_TEST(cxx11_tensor_reduction_gpu) {
CALL_SUBTEST_1((test_full_reductions<float, ColMajor>()));
CALL_SUBTEST_1((test_full_reductions<double, ColMajor>()));
CALL_SUBTEST_2((test_full_reductions<float, RowMajor>()));
CALL_SUBTEST_2((test_full_reductions<double, RowMajor>()));
CALL_SUBTEST_3((test_first_dim_reductions<float, ColMajor>()));
CALL_SUBTEST_3((test_first_dim_reductions<double, ColMajor>()));
CALL_SUBTEST_4((test_first_dim_reductions<float, RowMajor>()));
// Outer reductions of doubles aren't supported just yet.
// CALL_SUBTEST_4((test_first_dim_reductions<double, RowMajor>()))
CALL_SUBTEST_5((test_last_dim_reductions<float, ColMajor>()));
// Outer reductions of doubles aren't supported just yet.
// CALL_SUBTEST_5((test_last_dim_reductions<double, ColMajor>()));
CALL_SUBTEST_6((test_last_dim_reductions<float, RowMajor>()));
CALL_SUBTEST_6((test_last_dim_reductions<double, RowMajor>()));
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,248 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
#include <Eigen/CXX11/Tensor>
using Eigen::Tensor;
using Eigen::RowMajor;
static void test_simple_lvalue_ref()
{
Tensor<int, 1> input(6);
input.setRandom();
TensorRef<Tensor<int, 1>> ref3(input);
TensorRef<Tensor<int, 1>> ref4 = input;
VERIFY_IS_EQUAL(ref3.data(), input.data());
VERIFY_IS_EQUAL(ref4.data(), input.data());
for (int i = 0; i < 6; ++i) {
VERIFY_IS_EQUAL(ref3(i), input(i));
VERIFY_IS_EQUAL(ref4(i), input(i));
}
for (int i = 0; i < 6; ++i) {
ref3.coeffRef(i) = i;
}
for (int i = 0; i < 6; ++i) {
VERIFY_IS_EQUAL(input(i), i);
}
for (int i = 0; i < 6; ++i) {
ref4.coeffRef(i) = -i * 2;
}
for (int i = 0; i < 6; ++i) {
VERIFY_IS_EQUAL(input(i), -i*2);
}
}
static void test_simple_rvalue_ref()
{
Tensor<int, 1> input1(6);
input1.setRandom();
Tensor<int, 1> input2(6);
input2.setRandom();
TensorRef<Tensor<int, 1>> ref3(input1 + input2);
TensorRef<Tensor<int, 1>> ref4 = input1 + input2;
VERIFY_IS_NOT_EQUAL(ref3.data(), input1.data());
VERIFY_IS_NOT_EQUAL(ref4.data(), input1.data());
VERIFY_IS_NOT_EQUAL(ref3.data(), input2.data());
VERIFY_IS_NOT_EQUAL(ref4.data(), input2.data());
for (int i = 0; i < 6; ++i) {
VERIFY_IS_EQUAL(ref3(i), input1(i) + input2(i));
VERIFY_IS_EQUAL(ref4(i), input1(i) + input2(i));
}
}
static void test_multiple_dims()
{
Tensor<float, 3> input(3,5,7);
input.setRandom();
TensorRef<Tensor<float, 3>> ref(input);
VERIFY_IS_EQUAL(ref.data(), input.data());
VERIFY_IS_EQUAL(ref.dimension(0), 3);
VERIFY_IS_EQUAL(ref.dimension(1), 5);
VERIFY_IS_EQUAL(ref.dimension(2), 7);
for (int i = 0; i < 3; ++i) {
for (int j = 0; j < 5; ++j) {
for (int k = 0; k < 7; ++k) {
VERIFY_IS_EQUAL(ref(i,j,k), input(i,j,k));
}
}
}
}
static void test_slice()
{
Tensor<float, 5> tensor(2,3,5,7,11);
tensor.setRandom();
Eigen::DSizes<ptrdiff_t, 5> indices(1,2,3,4,5);
Eigen::DSizes<ptrdiff_t, 5> sizes(1,1,1,1,1);
TensorRef<Tensor<float, 5>> slice = tensor.slice(indices, sizes);
VERIFY_IS_EQUAL(slice(0,0,0,0,0), tensor(1,2,3,4,5));
Eigen::DSizes<ptrdiff_t, 5> indices2(1,1,3,4,5);
Eigen::DSizes<ptrdiff_t, 5> sizes2(1,1,2,2,3);
slice = tensor.slice(indices2, sizes2);
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 2; ++j) {
for (int k = 0; k < 3; ++k) {
VERIFY_IS_EQUAL(slice(0,0,i,j,k), tensor(1,1,3+i,4+j,5+k));
}
}
}
Eigen::DSizes<ptrdiff_t, 5> indices3(0,0,0,0,0);
Eigen::DSizes<ptrdiff_t, 5> sizes3(2,3,1,1,1);
slice = tensor.slice(indices3, sizes3);
VERIFY_IS_EQUAL(slice.data(), tensor.data());
}
static void test_ref_of_ref()
{
Tensor<float, 3> input(3,5,7);
input.setRandom();
TensorRef<Tensor<float, 3>> ref(input);
TensorRef<Tensor<float, 3>> ref_of_ref(ref);
TensorRef<Tensor<float, 3>> ref_of_ref2;
ref_of_ref2 = ref;
VERIFY_IS_EQUAL(ref_of_ref.data(), input.data());
VERIFY_IS_EQUAL(ref_of_ref.dimension(0), 3);
VERIFY_IS_EQUAL(ref_of_ref.dimension(1), 5);
VERIFY_IS_EQUAL(ref_of_ref.dimension(2), 7);
VERIFY_IS_EQUAL(ref_of_ref2.data(), input.data());
VERIFY_IS_EQUAL(ref_of_ref2.dimension(0), 3);
VERIFY_IS_EQUAL(ref_of_ref2.dimension(1), 5);
VERIFY_IS_EQUAL(ref_of_ref2.dimension(2), 7);
for (int i = 0; i < 3; ++i) {
for (int j = 0; j < 5; ++j) {
for (int k = 0; k < 7; ++k) {
VERIFY_IS_EQUAL(ref_of_ref(i,j,k), input(i,j,k));
VERIFY_IS_EQUAL(ref_of_ref2(i,j,k), input(i,j,k));
}
}
}
}
static void test_ref_in_expr()
{
Tensor<float, 3> input(3,5,7);
input.setRandom();
TensorRef<Tensor<float, 3>> input_ref(input);
Tensor<float, 3> result(3,5,7);
result.setRandom();
TensorRef<Tensor<float, 3>> result_ref(result);
Tensor<float, 3> bias(3,5,7);
bias.setRandom();
result_ref = input_ref + bias;
for (int i = 0; i < 3; ++i) {
for (int j = 0; j < 5; ++j) {
for (int k = 0; k < 7; ++k) {
VERIFY_IS_EQUAL(result_ref(i,j,k), input(i,j,k) + bias(i,j,k));
VERIFY_IS_NOT_EQUAL(result(i,j,k), input(i,j,k) + bias(i,j,k));
}
}
}
result = result_ref;
for (int i = 0; i < 3; ++i) {
for (int j = 0; j < 5; ++j) {
for (int k = 0; k < 7; ++k) {
VERIFY_IS_EQUAL(result(i,j,k), input(i,j,k) + bias(i,j,k));
}
}
}
}
static void test_coeff_ref()
{
Tensor<float, 5> tensor(2,3,5,7,11);
tensor.setRandom();
Tensor<float, 5> original = tensor;
TensorRef<Tensor<float, 4>> slice = tensor.chip(7, 4);
slice.coeffRef(0, 0, 0, 0) = 1.0f;
slice.coeffRef(1, 0, 0, 0) += 2.0f;
VERIFY_IS_EQUAL(tensor(0,0,0,0,7), 1.0f);
VERIFY_IS_EQUAL(tensor(1,0,0,0,7), original(1,0,0,0,7) + 2.0f);
}
static void test_nested_ops_with_ref()
{
Tensor<float, 4> t(2, 3, 5, 7);
t.setRandom();
TensorMap<Tensor<const float, 4> > m(t.data(), 2, 3, 5, 7);
array<std::pair<ptrdiff_t, ptrdiff_t>, 4> paddings;
paddings[0] = std::make_pair(0, 0);
paddings[1] = std::make_pair(2, 1);
paddings[2] = std::make_pair(3, 4);
paddings[3] = std::make_pair(0, 0);
DSizes<Eigen::DenseIndex, 4> shuffle_dims(0, 1, 2, 3);
TensorRef<Tensor<const float, 4> > ref(m.pad(paddings));
array<std::pair<ptrdiff_t, ptrdiff_t>, 4> trivial;
trivial[0] = std::make_pair(0, 0);
trivial[1] = std::make_pair(0, 0);
trivial[2] = std::make_pair(0, 0);
trivial[3] = std::make_pair(0, 0);
Tensor<float, 4> padded = ref.shuffle(shuffle_dims).pad(trivial);
VERIFY_IS_EQUAL(padded.dimension(0), 2+0);
VERIFY_IS_EQUAL(padded.dimension(1), 3+3);
VERIFY_IS_EQUAL(padded.dimension(2), 5+7);
VERIFY_IS_EQUAL(padded.dimension(3), 7+0);
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 6; ++j) {
for (int k = 0; k < 12; ++k) {
for (int l = 0; l < 7; ++l) {
if (j >= 2 && j < 5 && k >= 3 && k < 8) {
VERIFY_IS_EQUAL(padded(i,j,k,l), t(i,j-2,k-3,l));
} else {
VERIFY_IS_EQUAL(padded(i,j,k,l), 0.0f);
}
}
}
}
}
}
EIGEN_DECLARE_TEST(cxx11_tensor_ref)
{
CALL_SUBTEST(test_simple_lvalue_ref());
CALL_SUBTEST(test_simple_rvalue_ref());
CALL_SUBTEST(test_multiple_dims());
CALL_SUBTEST(test_slice());
CALL_SUBTEST(test_ref_of_ref());
CALL_SUBTEST(test_ref_in_expr());
CALL_SUBTEST(test_coeff_ref());
CALL_SUBTEST(test_nested_ops_with_ref());
}

View File

@@ -0,0 +1,190 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2014 Navdeep Jaitly <ndjaitly@google.com and
// Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
#include <Eigen/CXX11/Tensor>
using Eigen::Tensor;
using Eigen::array;
template <int DataLayout>
static void test_simple_reverse()
{
Tensor<float, 4, DataLayout> tensor(2,3,5,7);
tensor.setRandom();
array<bool, 4> dim_rev;
dim_rev[0] = false;
dim_rev[1] = true;
dim_rev[2] = true;
dim_rev[3] = false;
Tensor<float, 4, DataLayout> reversed_tensor;
reversed_tensor = tensor.reverse(dim_rev);
VERIFY_IS_EQUAL(reversed_tensor.dimension(0), 2);
VERIFY_IS_EQUAL(reversed_tensor.dimension(1), 3);
VERIFY_IS_EQUAL(reversed_tensor.dimension(2), 5);
VERIFY_IS_EQUAL(reversed_tensor.dimension(3), 7);
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 5; ++k) {
for (int l = 0; l < 7; ++l) {
VERIFY_IS_EQUAL(tensor(i,j,k,l), reversed_tensor(i,2-j,4-k,l));
}
}
}
}
dim_rev[0] = true;
dim_rev[1] = false;
dim_rev[2] = false;
dim_rev[3] = false;
reversed_tensor = tensor.reverse(dim_rev);
VERIFY_IS_EQUAL(reversed_tensor.dimension(0), 2);
VERIFY_IS_EQUAL(reversed_tensor.dimension(1), 3);
VERIFY_IS_EQUAL(reversed_tensor.dimension(2), 5);
VERIFY_IS_EQUAL(reversed_tensor.dimension(3), 7);
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 5; ++k) {
for (int l = 0; l < 7; ++l) {
VERIFY_IS_EQUAL(tensor(i,j,k,l), reversed_tensor(1-i,j,k,l));
}
}
}
}
dim_rev[0] = true;
dim_rev[1] = false;
dim_rev[2] = false;
dim_rev[3] = true;
reversed_tensor = tensor.reverse(dim_rev);
VERIFY_IS_EQUAL(reversed_tensor.dimension(0), 2);
VERIFY_IS_EQUAL(reversed_tensor.dimension(1), 3);
VERIFY_IS_EQUAL(reversed_tensor.dimension(2), 5);
VERIFY_IS_EQUAL(reversed_tensor.dimension(3), 7);
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 5; ++k) {
for (int l = 0; l < 7; ++l) {
VERIFY_IS_EQUAL(tensor(i,j,k,l), reversed_tensor(1-i,j,k,6-l));
}
}
}
}
}
template <int DataLayout>
static void test_expr_reverse(bool LValue)
{
Tensor<float, 4, DataLayout> tensor(2,3,5,7);
tensor.setRandom();
array<bool, 4> dim_rev;
dim_rev[0] = false;
dim_rev[1] = true;
dim_rev[2] = false;
dim_rev[3] = true;
Tensor<float, 4, DataLayout> expected(2, 3, 5, 7);
if (LValue) {
expected.reverse(dim_rev) = tensor;
} else {
expected = tensor.reverse(dim_rev);
}
Tensor<float, 4, DataLayout> result(2,3,5,7);
array<ptrdiff_t, 4> src_slice_dim;
src_slice_dim[0] = 2;
src_slice_dim[1] = 3;
src_slice_dim[2] = 1;
src_slice_dim[3] = 7;
array<ptrdiff_t, 4> src_slice_start;
src_slice_start[0] = 0;
src_slice_start[1] = 0;
src_slice_start[2] = 0;
src_slice_start[3] = 0;
array<ptrdiff_t, 4> dst_slice_dim = src_slice_dim;
array<ptrdiff_t, 4> dst_slice_start = src_slice_start;
for (int i = 0; i < 5; ++i) {
if (LValue) {
result.slice(dst_slice_start, dst_slice_dim).reverse(dim_rev) =
tensor.slice(src_slice_start, src_slice_dim);
} else {
result.slice(dst_slice_start, dst_slice_dim) =
tensor.slice(src_slice_start, src_slice_dim).reverse(dim_rev);
}
src_slice_start[2] += 1;
dst_slice_start[2] += 1;
}
VERIFY_IS_EQUAL(result.dimension(0), 2);
VERIFY_IS_EQUAL(result.dimension(1), 3);
VERIFY_IS_EQUAL(result.dimension(2), 5);
VERIFY_IS_EQUAL(result.dimension(3), 7);
for (int i = 0; i < expected.dimension(0); ++i) {
for (int j = 0; j < expected.dimension(1); ++j) {
for (int k = 0; k < expected.dimension(2); ++k) {
for (int l = 0; l < expected.dimension(3); ++l) {
VERIFY_IS_EQUAL(result(i,j,k,l), expected(i,j,k,l));
}
}
}
}
dst_slice_start[2] = 0;
result.setRandom();
for (int i = 0; i < 5; ++i) {
if (LValue) {
result.slice(dst_slice_start, dst_slice_dim).reverse(dim_rev) =
tensor.slice(dst_slice_start, dst_slice_dim);
} else {
result.slice(dst_slice_start, dst_slice_dim) =
tensor.reverse(dim_rev).slice(dst_slice_start, dst_slice_dim);
}
dst_slice_start[2] += 1;
}
for (int i = 0; i < expected.dimension(0); ++i) {
for (int j = 0; j < expected.dimension(1); ++j) {
for (int k = 0; k < expected.dimension(2); ++k) {
for (int l = 0; l < expected.dimension(3); ++l) {
VERIFY_IS_EQUAL(result(i,j,k,l), expected(i,j,k,l));
}
}
}
}
}
EIGEN_DECLARE_TEST(cxx11_tensor_reverse)
{
CALL_SUBTEST(test_simple_reverse<ColMajor>());
CALL_SUBTEST(test_simple_reverse<RowMajor>());
CALL_SUBTEST(test_expr_reverse<ColMajor>(true));
CALL_SUBTEST(test_expr_reverse<RowMajor>(true));
CALL_SUBTEST(test_expr_reverse<ColMajor>(false));
CALL_SUBTEST(test_expr_reverse<RowMajor>(false));
}

View File

@@ -0,0 +1,253 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2015
// Mehdi Goli Codeplay Software Ltd.
// Ralph Potter Codeplay Software Ltd.
// Luke Iwanski Codeplay Software Ltd.
// Contact: <eigen@codeplay.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#define EIGEN_TEST_NO_LONGDOUBLE
#define EIGEN_TEST_NO_COMPLEX
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
#define EIGEN_USE_SYCL
#include "main.h"
#include <unsupported/Eigen/CXX11/Tensor>
template <typename DataType, int DataLayout, typename IndexType>
static void test_simple_reverse(const Eigen::SyclDevice& sycl_device) {
IndexType dim1 = 2;
IndexType dim2 = 3;
IndexType dim3 = 5;
IndexType dim4 = 7;
array<IndexType, 4> tensorRange = {{dim1, dim2, dim3, dim4}};
Tensor<DataType, 4, DataLayout, IndexType> tensor(tensorRange);
Tensor<DataType, 4, DataLayout, IndexType> reversed_tensor(tensorRange);
tensor.setRandom();
array<bool, 4> dim_rev;
dim_rev[0] = false;
dim_rev[1] = true;
dim_rev[2] = true;
dim_rev[3] = false;
DataType* gpu_in_data = static_cast<DataType*>(
sycl_device.allocate(tensor.dimensions().TotalSize() * sizeof(DataType)));
DataType* gpu_out_data = static_cast<DataType*>(sycl_device.allocate(
reversed_tensor.dimensions().TotalSize() * sizeof(DataType)));
TensorMap<Tensor<DataType, 4, DataLayout, IndexType> > in_gpu(gpu_in_data,
tensorRange);
TensorMap<Tensor<DataType, 4, DataLayout, IndexType> > out_gpu(gpu_out_data,
tensorRange);
sycl_device.memcpyHostToDevice(
gpu_in_data, tensor.data(),
(tensor.dimensions().TotalSize()) * sizeof(DataType));
out_gpu.device(sycl_device) = in_gpu.reverse(dim_rev);
sycl_device.memcpyDeviceToHost(
reversed_tensor.data(), gpu_out_data,
reversed_tensor.dimensions().TotalSize() * sizeof(DataType));
// Check that the CPU and GPU reductions return the same result.
for (IndexType i = 0; i < 2; ++i) {
for (IndexType j = 0; j < 3; ++j) {
for (IndexType k = 0; k < 5; ++k) {
for (IndexType l = 0; l < 7; ++l) {
VERIFY_IS_EQUAL(tensor(i, j, k, l),
reversed_tensor(i, 2 - j, 4 - k, l));
}
}
}
}
dim_rev[0] = true;
dim_rev[1] = false;
dim_rev[2] = false;
dim_rev[3] = false;
out_gpu.device(sycl_device) = in_gpu.reverse(dim_rev);
sycl_device.memcpyDeviceToHost(
reversed_tensor.data(), gpu_out_data,
reversed_tensor.dimensions().TotalSize() * sizeof(DataType));
for (IndexType i = 0; i < 2; ++i) {
for (IndexType j = 0; j < 3; ++j) {
for (IndexType k = 0; k < 5; ++k) {
for (IndexType l = 0; l < 7; ++l) {
VERIFY_IS_EQUAL(tensor(i, j, k, l), reversed_tensor(1 - i, j, k, l));
}
}
}
}
dim_rev[0] = true;
dim_rev[1] = false;
dim_rev[2] = false;
dim_rev[3] = true;
out_gpu.device(sycl_device) = in_gpu.reverse(dim_rev);
sycl_device.memcpyDeviceToHost(
reversed_tensor.data(), gpu_out_data,
reversed_tensor.dimensions().TotalSize() * sizeof(DataType));
for (IndexType i = 0; i < 2; ++i) {
for (IndexType j = 0; j < 3; ++j) {
for (IndexType k = 0; k < 5; ++k) {
for (IndexType l = 0; l < 7; ++l) {
VERIFY_IS_EQUAL(tensor(i, j, k, l),
reversed_tensor(1 - i, j, k, 6 - l));
}
}
}
}
sycl_device.deallocate(gpu_in_data);
sycl_device.deallocate(gpu_out_data);
}
template <typename DataType, int DataLayout, typename IndexType>
static void test_expr_reverse(const Eigen::SyclDevice& sycl_device,
bool LValue) {
IndexType dim1 = 2;
IndexType dim2 = 3;
IndexType dim3 = 5;
IndexType dim4 = 7;
array<IndexType, 4> tensorRange = {{dim1, dim2, dim3, dim4}};
Tensor<DataType, 4, DataLayout, IndexType> tensor(tensorRange);
Tensor<DataType, 4, DataLayout, IndexType> expected(tensorRange);
Tensor<DataType, 4, DataLayout, IndexType> result(tensorRange);
tensor.setRandom();
array<bool, 4> dim_rev;
dim_rev[0] = false;
dim_rev[1] = true;
dim_rev[2] = false;
dim_rev[3] = true;
DataType* gpu_in_data = static_cast<DataType*>(
sycl_device.allocate(tensor.dimensions().TotalSize() * sizeof(DataType)));
DataType* gpu_out_data_expected = static_cast<DataType*>(sycl_device.allocate(
expected.dimensions().TotalSize() * sizeof(DataType)));
DataType* gpu_out_data_result = static_cast<DataType*>(
sycl_device.allocate(result.dimensions().TotalSize() * sizeof(DataType)));
TensorMap<Tensor<DataType, 4, DataLayout, IndexType> > in_gpu(gpu_in_data,
tensorRange);
TensorMap<Tensor<DataType, 4, DataLayout, IndexType> > out_gpu_expected(
gpu_out_data_expected, tensorRange);
TensorMap<Tensor<DataType, 4, DataLayout, IndexType> > out_gpu_result(
gpu_out_data_result, tensorRange);
sycl_device.memcpyHostToDevice(
gpu_in_data, tensor.data(),
(tensor.dimensions().TotalSize()) * sizeof(DataType));
if (LValue) {
out_gpu_expected.reverse(dim_rev).device(sycl_device) = in_gpu;
} else {
out_gpu_expected.device(sycl_device) = in_gpu.reverse(dim_rev);
}
sycl_device.memcpyDeviceToHost(
expected.data(), gpu_out_data_expected,
expected.dimensions().TotalSize() * sizeof(DataType));
array<IndexType, 4> src_slice_dim;
src_slice_dim[0] = 2;
src_slice_dim[1] = 3;
src_slice_dim[2] = 1;
src_slice_dim[3] = 7;
array<IndexType, 4> src_slice_start;
src_slice_start[0] = 0;
src_slice_start[1] = 0;
src_slice_start[2] = 0;
src_slice_start[3] = 0;
array<IndexType, 4> dst_slice_dim = src_slice_dim;
array<IndexType, 4> dst_slice_start = src_slice_start;
for (IndexType i = 0; i < 5; ++i) {
if (LValue) {
out_gpu_result.slice(dst_slice_start, dst_slice_dim)
.reverse(dim_rev)
.device(sycl_device) = in_gpu.slice(src_slice_start, src_slice_dim);
} else {
out_gpu_result.slice(dst_slice_start, dst_slice_dim).device(sycl_device) =
in_gpu.slice(src_slice_start, src_slice_dim).reverse(dim_rev);
}
src_slice_start[2] += 1;
dst_slice_start[2] += 1;
}
sycl_device.memcpyDeviceToHost(
result.data(), gpu_out_data_result,
result.dimensions().TotalSize() * sizeof(DataType));
for (IndexType i = 0; i < expected.dimension(0); ++i) {
for (IndexType j = 0; j < expected.dimension(1); ++j) {
for (IndexType k = 0; k < expected.dimension(2); ++k) {
for (IndexType l = 0; l < expected.dimension(3); ++l) {
VERIFY_IS_EQUAL(result(i, j, k, l), expected(i, j, k, l));
}
}
}
}
dst_slice_start[2] = 0;
result.setRandom();
sycl_device.memcpyHostToDevice(
gpu_out_data_result, result.data(),
(result.dimensions().TotalSize()) * sizeof(DataType));
for (IndexType i = 0; i < 5; ++i) {
if (LValue) {
out_gpu_result.slice(dst_slice_start, dst_slice_dim)
.reverse(dim_rev)
.device(sycl_device) = in_gpu.slice(dst_slice_start, dst_slice_dim);
} else {
out_gpu_result.slice(dst_slice_start, dst_slice_dim).device(sycl_device) =
in_gpu.reverse(dim_rev).slice(dst_slice_start, dst_slice_dim);
}
dst_slice_start[2] += 1;
}
sycl_device.memcpyDeviceToHost(
result.data(), gpu_out_data_result,
result.dimensions().TotalSize() * sizeof(DataType));
for (IndexType i = 0; i < expected.dimension(0); ++i) {
for (IndexType j = 0; j < expected.dimension(1); ++j) {
for (IndexType k = 0; k < expected.dimension(2); ++k) {
for (IndexType l = 0; l < expected.dimension(3); ++l) {
VERIFY_IS_EQUAL(result(i, j, k, l), expected(i, j, k, l));
}
}
}
}
}
template <typename DataType>
void sycl_reverse_test_per_device(const cl::sycl::device& d) {
QueueInterface queueInterface(d);
auto sycl_device = Eigen::SyclDevice(&queueInterface);
test_simple_reverse<DataType, RowMajor, int64_t>(sycl_device);
test_simple_reverse<DataType, ColMajor, int64_t>(sycl_device);
test_expr_reverse<DataType, RowMajor, int64_t>(sycl_device, false);
test_expr_reverse<DataType, ColMajor, int64_t>(sycl_device, false);
test_expr_reverse<DataType, RowMajor, int64_t>(sycl_device, true);
test_expr_reverse<DataType, ColMajor, int64_t>(sycl_device, true);
}
EIGEN_DECLARE_TEST(cxx11_tensor_reverse_sycl) {
for (const auto& device : Eigen::get_sycl_supported_devices()) {
std::cout << "Running on "
<< device.get_info<cl::sycl::info::device::name>() << std::endl;
CALL_SUBTEST_1(sycl_reverse_test_per_device<short>(device));
CALL_SUBTEST_2(sycl_reverse_test_per_device<int>(device));
CALL_SUBTEST_3(sycl_reverse_test_per_device<unsigned int>(device));
#ifdef EIGEN_SYCL_DOUBLE_SUPPORT
CALL_SUBTEST_4(sycl_reverse_test_per_device<double>(device));
#endif
CALL_SUBTEST_5(sycl_reverse_test_per_device<float>(device));
}
}

View File

@@ -0,0 +1,62 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
#include <Eigen/CXX11/Tensor>
static void test_float_rounding()
{
Tensor<float, 2> ftensor(20,30);
ftensor = ftensor.random() * 100.f;
Tensor<float, 2> result = ftensor.round();
for (int i = 0; i < 20; ++i) {
for (int j = 0; j < 30; ++j) {
VERIFY_IS_EQUAL(result(i,j), numext::round(ftensor(i,j)));
}
}
}
static void test_float_flooring()
{
Tensor<float, 2> ftensor(20,30);
ftensor = ftensor.random() * 100.f;
Tensor<float, 2> result = ftensor.floor();
for (int i = 0; i < 20; ++i) {
for (int j = 0; j < 30; ++j) {
VERIFY_IS_EQUAL(result(i,j), numext::floor(ftensor(i,j)));
}
}
}
static void test_float_ceiling()
{
Tensor<float, 2> ftensor(20,30);
ftensor = ftensor.random() * 100.f;
Tensor<float, 2> result = ftensor.ceil();
for (int i = 0; i < 20; ++i) {
for (int j = 0; j < 30; ++j) {
VERIFY_IS_EQUAL(result(i,j), numext::ceil(ftensor(i,j)));
}
}
}
EIGEN_DECLARE_TEST(cxx11_tensor_roundings)
{
CALL_SUBTEST(test_float_rounding());
CALL_SUBTEST(test_float_ceiling());
CALL_SUBTEST(test_float_flooring());
}

View File

@@ -0,0 +1,110 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2016 Igor Babuschkin <igor@babuschk.in>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
#include <limits>
#include <numeric>
#include <Eigen/CXX11/Tensor>
using Eigen::Tensor;
template <int DataLayout, typename Type=float, bool Exclusive = false>
static void test_1d_scan()
{
int size = 50;
Tensor<Type, 1, DataLayout> tensor(size);
tensor.setRandom();
Tensor<Type, 1, DataLayout> result = tensor.cumsum(0, Exclusive);
VERIFY_IS_EQUAL(tensor.dimension(0), result.dimension(0));
float accum = 0;
for (int i = 0; i < size; i++) {
if (Exclusive) {
VERIFY_IS_EQUAL(result(i), accum);
accum += tensor(i);
} else {
accum += tensor(i);
VERIFY_IS_EQUAL(result(i), accum);
}
}
accum = 1;
result = tensor.cumprod(0, Exclusive);
for (int i = 0; i < size; i++) {
if (Exclusive) {
VERIFY_IS_EQUAL(result(i), accum);
accum *= tensor(i);
} else {
accum *= tensor(i);
VERIFY_IS_EQUAL(result(i), accum);
}
}
}
template <int DataLayout, typename Type=float>
static void test_4d_scan()
{
int size = 5;
Tensor<Type, 4, DataLayout> tensor(size, size, size, size);
tensor.setRandom();
Tensor<Type, 4, DataLayout> result(size, size, size, size);
result = tensor.cumsum(0);
float accum = 0;
for (int i = 0; i < size; i++) {
accum += tensor(i, 1, 2, 3);
VERIFY_IS_EQUAL(result(i, 1, 2, 3), accum);
}
result = tensor.cumsum(1);
accum = 0;
for (int i = 0; i < size; i++) {
accum += tensor(1, i, 2, 3);
VERIFY_IS_EQUAL(result(1, i, 2, 3), accum);
}
result = tensor.cumsum(2);
accum = 0;
for (int i = 0; i < size; i++) {
accum += tensor(1, 2, i, 3);
VERIFY_IS_EQUAL(result(1, 2, i, 3), accum);
}
result = tensor.cumsum(3);
accum = 0;
for (int i = 0; i < size; i++) {
accum += tensor(1, 2, 3, i);
VERIFY_IS_EQUAL(result(1, 2, 3, i), accum);
}
}
template <int DataLayout>
static void test_tensor_maps() {
int inputs[20];
TensorMap<Tensor<int, 1, DataLayout> > tensor_map(inputs, 20);
tensor_map.setRandom();
Tensor<int, 1, DataLayout> result = tensor_map.cumsum(0);
int accum = 0;
for (int i = 0; i < 20; ++i) {
accum += tensor_map(i);
VERIFY_IS_EQUAL(result(i), accum);
}
}
EIGEN_DECLARE_TEST(cxx11_tensor_scan) {
CALL_SUBTEST((test_1d_scan<ColMajor, float, true>()));
CALL_SUBTEST((test_1d_scan<ColMajor, float, false>()));
CALL_SUBTEST((test_1d_scan<RowMajor, float, true>()));
CALL_SUBTEST((test_1d_scan<RowMajor, float, false>()));
CALL_SUBTEST(test_4d_scan<ColMajor>());
CALL_SUBTEST(test_4d_scan<RowMajor>());
CALL_SUBTEST(test_tensor_maps<ColMajor>());
CALL_SUBTEST(test_tensor_maps<RowMajor>());
}

View File

@@ -0,0 +1,78 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#define EIGEN_TEST_NO_LONGDOUBLE
#define EIGEN_TEST_NO_COMPLEX
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
#define EIGEN_USE_GPU
#include "main.h"
#include <unsupported/Eigen/CXX11/Tensor>
#include <Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h>
using Eigen::Tensor;
typedef Tensor<float, 1>::DimensionPair DimPair;
template<int DataLayout>
void test_gpu_cumsum(int m_size, int k_size, int n_size)
{
std::cout << "Testing for (" << m_size << "," << k_size << "," << n_size << ")" << std::endl;
Tensor<float, 3, DataLayout> t_input(m_size, k_size, n_size);
Tensor<float, 3, DataLayout> t_result(m_size, k_size, n_size);
Tensor<float, 3, DataLayout> t_result_gpu(m_size, k_size, n_size);
t_input.setRandom();
std::size_t t_input_bytes = t_input.size() * sizeof(float);
std::size_t t_result_bytes = t_result.size() * sizeof(float);
float* d_t_input;
float* d_t_result;
gpuMalloc((void**)(&d_t_input), t_input_bytes);
gpuMalloc((void**)(&d_t_result), t_result_bytes);
gpuMemcpy(d_t_input, t_input.data(), t_input_bytes, gpuMemcpyHostToDevice);
Eigen::GpuStreamDevice stream;
Eigen::GpuDevice gpu_device(&stream);
Eigen::TensorMap<Eigen::Tensor<float, 3, DataLayout> >
gpu_t_input(d_t_input, Eigen::array<int, 3>(m_size, k_size, n_size));
Eigen::TensorMap<Eigen::Tensor<float, 3, DataLayout> >
gpu_t_result(d_t_result, Eigen::array<int, 3>(m_size, k_size, n_size));
gpu_t_result.device(gpu_device) = gpu_t_input.cumsum(1);
t_result = t_input.cumsum(1);
gpuMemcpy(t_result_gpu.data(), d_t_result, t_result_bytes, gpuMemcpyDeviceToHost);
for (DenseIndex i = 0; i < t_result.size(); i++) {
if (fabs(t_result(i) - t_result_gpu(i)) < 1e-4f) {
continue;
}
if (Eigen::internal::isApprox(t_result(i), t_result_gpu(i), 1e-4f)) {
continue;
}
std::cout << "mismatch detected at index " << i << ": " << t_result(i)
<< " vs " << t_result_gpu(i) << std::endl;
assert(false);
}
gpuFree((void*)d_t_input);
gpuFree((void*)d_t_result);
}
EIGEN_DECLARE_TEST(cxx11_tensor_scan_gpu)
{
CALL_SUBTEST_1(test_gpu_cumsum<ColMajor>(128, 128, 128));
CALL_SUBTEST_2(test_gpu_cumsum<RowMajor>(128, 128, 128));
}

View File

@@ -0,0 +1,141 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2016
// Mehdi Goli Codeplay Software Ltd.
// Ralph Potter Codeplay Software Ltd.
// Luke Iwanski Codeplay Software Ltd.
// Contact: <eigen@codeplay.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#define EIGEN_TEST_NO_LONGDOUBLE
#define EIGEN_TEST_NO_COMPLEX
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
#define EIGEN_USE_SYCL
#include "main.h"
#include <unsupported/Eigen/CXX11/Tensor>
using Eigen::Tensor;
typedef Tensor<float, 1>::DimensionPair DimPair;
template <typename DataType, int DataLayout, typename IndexType>
void test_sycl_cumsum(const Eigen::SyclDevice& sycl_device, IndexType m_size,
IndexType k_size, IndexType n_size, int consume_dim,
bool exclusive) {
static const DataType error_threshold = 1e-4f;
std::cout << "Testing for (" << m_size << "," << k_size << "," << n_size
<< " consume_dim : " << consume_dim << ")" << std::endl;
Tensor<DataType, 3, DataLayout, IndexType> t_input(m_size, k_size, n_size);
Tensor<DataType, 3, DataLayout, IndexType> t_result(m_size, k_size, n_size);
Tensor<DataType, 3, DataLayout, IndexType> t_result_gpu(m_size, k_size,
n_size);
t_input.setRandom();
std::size_t t_input_bytes = t_input.size() * sizeof(DataType);
std::size_t t_result_bytes = t_result.size() * sizeof(DataType);
DataType* gpu_data_in =
static_cast<DataType*>(sycl_device.allocate(t_input_bytes));
DataType* gpu_data_out =
static_cast<DataType*>(sycl_device.allocate(t_result_bytes));
array<IndexType, 3> tensorRange = {{m_size, k_size, n_size}};
TensorMap<Tensor<DataType, 3, DataLayout, IndexType>> gpu_t_input(
gpu_data_in, tensorRange);
TensorMap<Tensor<DataType, 3, DataLayout, IndexType>> gpu_t_result(
gpu_data_out, tensorRange);
sycl_device.memcpyHostToDevice(gpu_data_in, t_input.data(), t_input_bytes);
sycl_device.memcpyHostToDevice(gpu_data_out, t_input.data(), t_input_bytes);
gpu_t_result.device(sycl_device) = gpu_t_input.cumsum(consume_dim, exclusive);
t_result = t_input.cumsum(consume_dim, exclusive);
sycl_device.memcpyDeviceToHost(t_result_gpu.data(), gpu_data_out,
t_result_bytes);
sycl_device.synchronize();
for (IndexType i = 0; i < t_result.size(); i++) {
if (static_cast<DataType>(std::fabs(static_cast<DataType>(
t_result(i) - t_result_gpu(i)))) < error_threshold) {
continue;
}
if (Eigen::internal::isApprox(t_result(i), t_result_gpu(i),
error_threshold)) {
continue;
}
std::cout << "mismatch detected at index " << i << " CPU : " << t_result(i)
<< " vs SYCL : " << t_result_gpu(i) << std::endl;
assert(false);
}
sycl_device.deallocate(gpu_data_in);
sycl_device.deallocate(gpu_data_out);
}
template <typename DataType, typename Dev>
void sycl_scan_test_exclusive_dim0_per_device(const Dev& sycl_device) {
test_sycl_cumsum<DataType, ColMajor, int64_t>(sycl_device, 2049, 1023, 127, 0,
true);
test_sycl_cumsum<DataType, RowMajor, int64_t>(sycl_device, 2049, 1023, 127, 0,
true);
}
template <typename DataType, typename Dev>
void sycl_scan_test_exclusive_dim1_per_device(const Dev& sycl_device) {
test_sycl_cumsum<DataType, ColMajor, int64_t>(sycl_device, 1023, 2049, 127, 1,
true);
test_sycl_cumsum<DataType, RowMajor, int64_t>(sycl_device, 1023, 2049, 127, 1,
true);
}
template <typename DataType, typename Dev>
void sycl_scan_test_exclusive_dim2_per_device(const Dev& sycl_device) {
test_sycl_cumsum<DataType, ColMajor, int64_t>(sycl_device, 1023, 127, 2049, 2,
true);
test_sycl_cumsum<DataType, RowMajor, int64_t>(sycl_device, 1023, 127, 2049, 2,
true);
}
template <typename DataType, typename Dev>
void sycl_scan_test_inclusive_dim0_per_device(const Dev& sycl_device) {
test_sycl_cumsum<DataType, ColMajor, int64_t>(sycl_device, 2049, 1023, 127, 0,
false);
test_sycl_cumsum<DataType, RowMajor, int64_t>(sycl_device, 2049, 1023, 127, 0,
false);
}
template <typename DataType, typename Dev>
void sycl_scan_test_inclusive_dim1_per_device(const Dev& sycl_device) {
test_sycl_cumsum<DataType, ColMajor, int64_t>(sycl_device, 1023, 2049, 127, 1,
false);
test_sycl_cumsum<DataType, RowMajor, int64_t>(sycl_device, 1023, 2049, 127, 1,
false);
}
template <typename DataType, typename Dev>
void sycl_scan_test_inclusive_dim2_per_device(const Dev& sycl_device) {
test_sycl_cumsum<DataType, ColMajor, int64_t>(sycl_device, 1023, 127, 2049, 2,
false);
test_sycl_cumsum<DataType, RowMajor, int64_t>(sycl_device, 1023, 127, 2049, 2,
false);
}
EIGEN_DECLARE_TEST(cxx11_tensor_scan_sycl) {
for (const auto& device : Eigen::get_sycl_supported_devices()) {
std::cout << "Running on "
<< device.template get_info<cl::sycl::info::device::name>()
<< std::endl;
QueueInterface queueInterface(device);
auto sycl_device = Eigen::SyclDevice(&queueInterface);
CALL_SUBTEST_1(
sycl_scan_test_exclusive_dim0_per_device<float>(sycl_device));
CALL_SUBTEST_2(
sycl_scan_test_exclusive_dim1_per_device<float>(sycl_device));
CALL_SUBTEST_3(
sycl_scan_test_exclusive_dim2_per_device<float>(sycl_device));
CALL_SUBTEST_4(
sycl_scan_test_inclusive_dim0_per_device<float>(sycl_device));
CALL_SUBTEST_5(
sycl_scan_test_inclusive_dim1_per_device<float>(sycl_device));
CALL_SUBTEST_6(
sycl_scan_test_inclusive_dim2_per_device<float>(sycl_device));
}
}

View File

@@ -0,0 +1,283 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
#include <Eigen/CXX11/Tensor>
using Eigen::Tensor;
using Eigen::array;
template <int DataLayout>
static void test_simple_shuffling()
{
Tensor<float, 4, DataLayout> tensor(2,3,5,7);
tensor.setRandom();
array<ptrdiff_t, 4> shuffles;
shuffles[0] = 0;
shuffles[1] = 1;
shuffles[2] = 2;
shuffles[3] = 3;
Tensor<float, 4, DataLayout> no_shuffle;
no_shuffle = tensor.shuffle(shuffles);
VERIFY_IS_EQUAL(no_shuffle.dimension(0), 2);
VERIFY_IS_EQUAL(no_shuffle.dimension(1), 3);
VERIFY_IS_EQUAL(no_shuffle.dimension(2), 5);
VERIFY_IS_EQUAL(no_shuffle.dimension(3), 7);
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 5; ++k) {
for (int l = 0; l < 7; ++l) {
VERIFY_IS_EQUAL(tensor(i,j,k,l), no_shuffle(i,j,k,l));
}
}
}
}
shuffles[0] = 2;
shuffles[1] = 3;
shuffles[2] = 1;
shuffles[3] = 0;
Tensor<float, 4, DataLayout> shuffle;
shuffle = tensor.shuffle(shuffles);
VERIFY_IS_EQUAL(shuffle.dimension(0), 5);
VERIFY_IS_EQUAL(shuffle.dimension(1), 7);
VERIFY_IS_EQUAL(shuffle.dimension(2), 3);
VERIFY_IS_EQUAL(shuffle.dimension(3), 2);
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 5; ++k) {
for (int l = 0; l < 7; ++l) {
VERIFY_IS_EQUAL(tensor(i,j,k,l), shuffle(k,l,j,i));
}
}
}
}
}
template <int DataLayout>
static void test_expr_shuffling()
{
Tensor<float, 4, DataLayout> tensor(2,3,5,7);
tensor.setRandom();
array<ptrdiff_t, 4> shuffles;
shuffles[0] = 2;
shuffles[1] = 3;
shuffles[2] = 1;
shuffles[3] = 0;
Tensor<float, 4, DataLayout> expected;
expected = tensor.shuffle(shuffles);
Tensor<float, 4, DataLayout> result(5, 7, 3, 2);
array<ptrdiff_t, 4> src_slice_dim{{2, 3, 1, 7}};
array<ptrdiff_t, 4> src_slice_start{{0, 0, 0, 0}};
array<ptrdiff_t, 4> dst_slice_dim{{1, 7, 3, 2}};
array<ptrdiff_t, 4> dst_slice_start{{0, 0, 0, 0}};
for (int i = 0; i < 5; ++i) {
result.slice(dst_slice_start, dst_slice_dim) =
tensor.slice(src_slice_start, src_slice_dim).shuffle(shuffles);
src_slice_start[2] += 1;
dst_slice_start[0] += 1;
}
VERIFY_IS_EQUAL(result.dimension(0), 5);
VERIFY_IS_EQUAL(result.dimension(1), 7);
VERIFY_IS_EQUAL(result.dimension(2), 3);
VERIFY_IS_EQUAL(result.dimension(3), 2);
for (int i = 0; i < expected.dimension(0); ++i) {
for (int j = 0; j < expected.dimension(1); ++j) {
for (int k = 0; k < expected.dimension(2); ++k) {
for (int l = 0; l < expected.dimension(3); ++l) {
VERIFY_IS_EQUAL(result(i,j,k,l), expected(i,j,k,l));
}
}
}
}
dst_slice_start[0] = 0;
result.setRandom();
for (int i = 0; i < 5; ++i) {
result.slice(dst_slice_start, dst_slice_dim) =
tensor.shuffle(shuffles).slice(dst_slice_start, dst_slice_dim);
dst_slice_start[0] += 1;
}
for (int i = 0; i < expected.dimension(0); ++i) {
for (int j = 0; j < expected.dimension(1); ++j) {
for (int k = 0; k < expected.dimension(2); ++k) {
for (int l = 0; l < expected.dimension(3); ++l) {
VERIFY_IS_EQUAL(result(i,j,k,l), expected(i,j,k,l));
}
}
}
}
}
template <int DataLayout>
static void test_shuffling_as_value()
{
Tensor<float, 4, DataLayout> tensor(2,3,5,7);
tensor.setRandom();
array<ptrdiff_t, 4> shuffles;
shuffles[2] = 0;
shuffles[3] = 1;
shuffles[1] = 2;
shuffles[0] = 3;
Tensor<float, 4, DataLayout> shuffle(5,7,3,2);
shuffle.shuffle(shuffles) = tensor;
VERIFY_IS_EQUAL(shuffle.dimension(0), 5);
VERIFY_IS_EQUAL(shuffle.dimension(1), 7);
VERIFY_IS_EQUAL(shuffle.dimension(2), 3);
VERIFY_IS_EQUAL(shuffle.dimension(3), 2);
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 5; ++k) {
for (int l = 0; l < 7; ++l) {
VERIFY_IS_EQUAL(tensor(i,j,k,l), shuffle(k,l,j,i));
}
}
}
}
array<ptrdiff_t, 4> no_shuffle;
no_shuffle[0] = 0;
no_shuffle[1] = 1;
no_shuffle[2] = 2;
no_shuffle[3] = 3;
Tensor<float, 4, DataLayout> shuffle2(5,7,3,2);
shuffle2.shuffle(shuffles) = tensor.shuffle(no_shuffle);
for (int i = 0; i < 5; ++i) {
for (int j = 0; j < 7; ++j) {
for (int k = 0; k < 3; ++k) {
for (int l = 0; l < 2; ++l) {
VERIFY_IS_EQUAL(shuffle2(i,j,k,l), shuffle(i,j,k,l));
}
}
}
}
}
template <int DataLayout>
static void test_shuffle_unshuffle()
{
Tensor<float, 4, DataLayout> tensor(2,3,5,7);
tensor.setRandom();
// Choose a random permutation.
array<ptrdiff_t, 4> shuffles;
for (int i = 0; i < 4; ++i) {
shuffles[i] = i;
}
array<ptrdiff_t, 4> shuffles_inverse;
for (int i = 0; i < 4; ++i) {
const ptrdiff_t index = internal::random<ptrdiff_t>(i, 3);
shuffles_inverse[shuffles[index]] = i;
std::swap(shuffles[i], shuffles[index]);
}
Tensor<float, 4, DataLayout> shuffle;
shuffle = tensor.shuffle(shuffles).shuffle(shuffles_inverse);
VERIFY_IS_EQUAL(shuffle.dimension(0), 2);
VERIFY_IS_EQUAL(shuffle.dimension(1), 3);
VERIFY_IS_EQUAL(shuffle.dimension(2), 5);
VERIFY_IS_EQUAL(shuffle.dimension(3), 7);
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 5; ++k) {
for (int l = 0; l < 7; ++l) {
VERIFY_IS_EQUAL(tensor(i,j,k,l), shuffle(i,j,k,l));
}
}
}
}
}
template <int DataLayout>
static void test_empty_shuffling()
{
Tensor<float, 4, DataLayout> tensor(2,3,0,7);
tensor.setRandom();
array<ptrdiff_t, 4> shuffles;
shuffles[0] = 0;
shuffles[1] = 1;
shuffles[2] = 2;
shuffles[3] = 3;
Tensor<float, 4, DataLayout> no_shuffle;
no_shuffle = tensor.shuffle(shuffles);
VERIFY_IS_EQUAL(no_shuffle.dimension(0), 2);
VERIFY_IS_EQUAL(no_shuffle.dimension(1), 3);
VERIFY_IS_EQUAL(no_shuffle.dimension(2), 0);
VERIFY_IS_EQUAL(no_shuffle.dimension(3), 7);
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 0; ++k) {
for (int l = 0; l < 7; ++l) {
VERIFY_IS_EQUAL(tensor(i,j,k,l), no_shuffle(i,j,k,l));
}
}
}
}
shuffles[0] = 2;
shuffles[1] = 3;
shuffles[2] = 1;
shuffles[3] = 0;
Tensor<float, 4, DataLayout> shuffle;
shuffle = tensor.shuffle(shuffles);
VERIFY_IS_EQUAL(shuffle.dimension(0), 0);
VERIFY_IS_EQUAL(shuffle.dimension(1), 7);
VERIFY_IS_EQUAL(shuffle.dimension(2), 3);
VERIFY_IS_EQUAL(shuffle.dimension(3), 2);
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 0; ++k) {
for (int l = 0; l < 7; ++l) {
VERIFY_IS_EQUAL(tensor(i,j,k,l), shuffle(k,l,j,i));
}
}
}
}
}
EIGEN_DECLARE_TEST(cxx11_tensor_shuffling)
{
CALL_SUBTEST(test_simple_shuffling<ColMajor>());
CALL_SUBTEST(test_simple_shuffling<RowMajor>());
CALL_SUBTEST(test_expr_shuffling<ColMajor>());
CALL_SUBTEST(test_expr_shuffling<RowMajor>());
CALL_SUBTEST(test_shuffling_as_value<ColMajor>());
CALL_SUBTEST(test_shuffling_as_value<RowMajor>());
CALL_SUBTEST(test_shuffle_unshuffle<ColMajor>());
CALL_SUBTEST(test_shuffle_unshuffle<RowMajor>());
CALL_SUBTEST(test_empty_shuffling<ColMajor>());
CALL_SUBTEST(test_empty_shuffling<RowMajor>());
}

View File

@@ -0,0 +1,117 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2016
// Mehdi Goli Codeplay Software Ltd.
// Ralph Potter Codeplay Software Ltd.
// Luke Iwanski Codeplay Software Ltd.
// Contact: <eigen@codeplay.com>
// Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#define EIGEN_TEST_NO_LONGDOUBLE
#define EIGEN_TEST_NO_COMPLEX
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
#define EIGEN_USE_SYCL
#include "main.h"
#include <unsupported/Eigen/CXX11/Tensor>
using Eigen::array;
using Eigen::SyclDevice;
using Eigen::Tensor;
using Eigen::TensorMap;
template <typename DataType, int DataLayout, typename IndexType>
static void test_simple_shuffling_sycl(const Eigen::SyclDevice& sycl_device) {
IndexType sizeDim1 = 2;
IndexType sizeDim2 = 3;
IndexType sizeDim3 = 5;
IndexType sizeDim4 = 7;
array<IndexType, 4> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}};
Tensor<DataType, 4, DataLayout, IndexType> tensor(tensorRange);
Tensor<DataType, 4, DataLayout, IndexType> no_shuffle(tensorRange);
tensor.setRandom();
const size_t buffSize = tensor.size() * sizeof(DataType);
array<IndexType, 4> shuffles;
shuffles[0] = 0;
shuffles[1] = 1;
shuffles[2] = 2;
shuffles[3] = 3;
DataType* gpu_data1 = static_cast<DataType*>(sycl_device.allocate(buffSize));
DataType* gpu_data2 = static_cast<DataType*>(sycl_device.allocate(buffSize));
TensorMap<Tensor<DataType, 4, DataLayout, IndexType>> gpu1(gpu_data1,
tensorRange);
TensorMap<Tensor<DataType, 4, DataLayout, IndexType>> gpu2(gpu_data2,
tensorRange);
sycl_device.memcpyHostToDevice(gpu_data1, tensor.data(), buffSize);
gpu2.device(sycl_device) = gpu1.shuffle(shuffles);
sycl_device.memcpyDeviceToHost(no_shuffle.data(), gpu_data2, buffSize);
sycl_device.synchronize();
VERIFY_IS_EQUAL(no_shuffle.dimension(0), sizeDim1);
VERIFY_IS_EQUAL(no_shuffle.dimension(1), sizeDim2);
VERIFY_IS_EQUAL(no_shuffle.dimension(2), sizeDim3);
VERIFY_IS_EQUAL(no_shuffle.dimension(3), sizeDim4);
for (IndexType i = 0; i < sizeDim1; ++i) {
for (IndexType j = 0; j < sizeDim2; ++j) {
for (IndexType k = 0; k < sizeDim3; ++k) {
for (IndexType l = 0; l < sizeDim4; ++l) {
VERIFY_IS_EQUAL(tensor(i, j, k, l), no_shuffle(i, j, k, l));
}
}
}
}
shuffles[0] = 2;
shuffles[1] = 3;
shuffles[2] = 1;
shuffles[3] = 0;
array<IndexType, 4> tensorrangeShuffle = {
{sizeDim3, sizeDim4, sizeDim2, sizeDim1}};
Tensor<DataType, 4, DataLayout, IndexType> shuffle(tensorrangeShuffle);
DataType* gpu_data3 = static_cast<DataType*>(sycl_device.allocate(buffSize));
TensorMap<Tensor<DataType, 4, DataLayout, IndexType>> gpu3(
gpu_data3, tensorrangeShuffle);
gpu3.device(sycl_device) = gpu1.shuffle(shuffles);
sycl_device.memcpyDeviceToHost(shuffle.data(), gpu_data3, buffSize);
sycl_device.synchronize();
VERIFY_IS_EQUAL(shuffle.dimension(0), sizeDim3);
VERIFY_IS_EQUAL(shuffle.dimension(1), sizeDim4);
VERIFY_IS_EQUAL(shuffle.dimension(2), sizeDim2);
VERIFY_IS_EQUAL(shuffle.dimension(3), sizeDim1);
for (IndexType i = 0; i < sizeDim1; ++i) {
for (IndexType j = 0; j < sizeDim2; ++j) {
for (IndexType k = 0; k < sizeDim3; ++k) {
for (IndexType l = 0; l < sizeDim4; ++l) {
VERIFY_IS_EQUAL(tensor(i, j, k, l), shuffle(k, l, j, i));
}
}
}
}
}
template <typename DataType, typename dev_Selector>
void sycl_shuffling_test_per_device(dev_Selector s) {
QueueInterface queueInterface(s);
auto sycl_device = Eigen::SyclDevice(&queueInterface);
test_simple_shuffling_sycl<DataType, RowMajor, int64_t>(sycl_device);
test_simple_shuffling_sycl<DataType, ColMajor, int64_t>(sycl_device);
}
EIGEN_DECLARE_TEST(cxx11_tensor_shuffling_sycl) {
for (const auto& device : Eigen::get_sycl_supported_devices()) {
CALL_SUBTEST(sycl_shuffling_test_per_device<float>(device));
}
}

Some files were not shown because too many files have changed in this diff Show More