ADD: added other eigen lib

This commit is contained in:
Henry Winkel
2022-12-21 16:19:04 +01:00
parent a570766dc6
commit 9e56c7f2c0
832 changed files with 36586 additions and 20006 deletions

View File

@@ -2,7 +2,7 @@ add_subdirectory(Eigen)
if(EIGEN_BUILD_DOC)
add_subdirectory(doc EXCLUDE_FROM_ALL)
endif()
if(BUILD_TESTING)
if(EIGEN_BUILD_TESTING)
if(EIGEN_LEAVE_TEST_IN_ALL_TARGET)
add_subdirectory(test) # can't do EXCLUDE_FROM_ALL here, breaks CTest
else()

View File

@@ -7,8 +7,8 @@
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_ADLOC_FORWARD
#define EIGEN_ADLOC_FORWARD
#ifndef EIGEN_ADLOC_FORWARD_MODULE_H
#define EIGEN_ADLOC_FORWARD_MODULE_H
//--------------------------------------------------------------------------------
//
@@ -156,4 +156,4 @@ protected:
}
#endif // EIGEN_ADLOC_FORWARD
#endif // EIGEN_ADLOC_FORWARD_MODULE_H

View File

@@ -7,8 +7,8 @@
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_ALIGNED_VECTOR3
#define EIGEN_ALIGNED_VECTOR3
#ifndef EIGEN_ALIGNED_VECTOR3_MODULE_H
#define EIGEN_ALIGNED_VECTOR3_MODULE_H
#include "../../Eigen/Geometry"
@@ -37,23 +37,23 @@ namespace Eigen {
*
*/
// TODO specialize Cwise
template<typename _Scalar> class AlignedVector3;
template<typename Scalar_> class AlignedVector3;
namespace internal {
template<typename _Scalar> struct traits<AlignedVector3<_Scalar> >
: traits<Matrix<_Scalar,3,1,0,4,1> >
template<typename Scalar_> struct traits<AlignedVector3<Scalar_> >
: traits<Matrix<Scalar_,3,1,0,4,1> >
{
};
}
template<typename _Scalar> class AlignedVector3
: public MatrixBase<AlignedVector3<_Scalar> >
template<typename Scalar_> class AlignedVector3
: public MatrixBase<AlignedVector3<Scalar_> >
{
typedef Matrix<_Scalar,4,1> CoeffType;
typedef Matrix<Scalar_,4,1> CoeffType;
CoeffType m_coeffs;
public:
typedef MatrixBase<AlignedVector3<_Scalar> > Base;
typedef MatrixBase<AlignedVector3<Scalar_> > Base;
EIGEN_DENSE_PUBLIC_INTERFACE(AlignedVector3)
using Base::operator*;
@@ -207,10 +207,10 @@ template<typename _Scalar> class AlignedVector3
namespace internal {
template<typename _Scalar>
struct eval<AlignedVector3<_Scalar>, Dense>
template<typename Scalar_>
struct eval<AlignedVector3<Scalar_>, Dense>
{
typedef const AlignedVector3<_Scalar>& type;
typedef const AlignedVector3<Scalar_>& type;
};
template<typename Scalar>
@@ -231,4 +231,4 @@ struct evaluator<AlignedVector3<Scalar> >
#include "../../Eigen/src/Core/util/ReenableStupidWarnings.h"
#endif // EIGEN_ALIGNED_VECTOR3
#endif // EIGEN_ALIGNED_VECTOR3_MODULE_H

View File

@@ -7,8 +7,10 @@
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_AUTODIFF_MODULE
#define EIGEN_AUTODIFF_MODULE
#ifndef EIGEN_AUTODIFF_MODULE_H
#define EIGEN_AUTODIFF_MODULE_H
#include "../../Eigen/Core"
namespace Eigen {
@@ -43,4 +45,4 @@ namespace Eigen {
//@}
}
#endif // EIGEN_AUTODIFF_MODULE
#endif // EIGEN_AUTODIFF_MODULE_H

View File

@@ -12,6 +12,7 @@ set(Eigen_HEADERS
MatrixFunctions
MoreVectorization
MPRealSupport
NNLS
NonLinearOptimization
NumericalDiff
OpenGLSupport

View File

@@ -8,13 +8,11 @@
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
//#ifndef EIGEN_CXX11_TENSOR_MODULE
//#define EIGEN_CXX11_TENSOR_MODULE
//#ifndef EIGEN_CXX11_TENSOR_MODULE_H
#define EIGEN_CXX11_TENSOR_MODULE_H
#include "../../../Eigen/Core"
#if EIGEN_HAS_CXX11
#include "../SpecialFunctions"
#include "../../../Eigen/src/Core/util/DisableStupidWarnings.h"
@@ -38,6 +36,8 @@
#include <cmath>
#include <cstddef>
#include <cstring>
#include <iterator>
#include <numeric>
#include <random>
#include <thread>
@@ -76,6 +76,8 @@
#include "src/Tensor/TensorIntDiv.h"
#include "src/Tensor/TensorGlobalFunctions.h"
#include "src/Tensor/TensorIO.h"
#include "src/Tensor/TensorBase.h"
#include "src/Tensor/TensorBlock.h"
@@ -129,9 +131,8 @@
#include "src/Tensor/TensorMap.h"
#include "src/Tensor/TensorRef.h"
#include "src/Tensor/TensorIO.h"
#include "../../../Eigen/src/Core/util/ReenableStupidWarnings.h"
#endif // EIGEN_HAS_CXX11
//#endif // EIGEN_CXX11_TENSOR_MODULE
//#endif // EIGEN_CXX11_TENSOR_MODULE_H

View File

@@ -7,8 +7,8 @@
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_CXX11_TENSORSYMMETRY_MODULE
#define EIGEN_CXX11_TENSORSYMMETRY_MODULE
#ifndef EIGEN_CXX11_TENSORSYMMETRY_MODULE_H
#define EIGEN_CXX11_TENSORSYMMETRY_MODULE_H
#include "Tensor"
@@ -35,8 +35,4 @@
#include "../../../Eigen/src/Core/util/ReenableStupidWarnings.h"
#endif // EIGEN_CXX11_TENSORSYMMETRY_MODULE
/*
* kate: space-indent on; indent-width 2; mixedindent off; indent-mode cstyle;
*/
#endif // EIGEN_CXX11_TENSORSYMMETRY_MODULE_H

View File

@@ -7,8 +7,8 @@
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_CXX11_THREADPOOL_MODULE
#define EIGEN_CXX11_THREADPOOL_MODULE
#ifndef EIGEN_CXX11_THREADPOOL_MODULE_H
#define EIGEN_CXX11_THREADPOOL_MODULE_H
#include "../../../Eigen/Core"
@@ -30,7 +30,6 @@
// The code depends on CXX11, so only include the module if the
// compiler supports it.
#if (EIGEN_COMP_CXXVER >= 11)
#include <cstddef>
#include <cstring>
#include <time.h>
@@ -67,8 +66,6 @@
#include "src/ThreadPool/Barrier.h"
#include "src/ThreadPool/NonBlockingThreadPool.h"
#endif
#include "../../../Eigen/src/Core/util/ReenableStupidWarnings.h"
#endif // EIGEN_CXX11_THREADPOOL_MODULE
#endif // EIGEN_CXX11_THREADPOOL_MODULE_H

View File

@@ -0,0 +1,3 @@
#ifndef EIGEN_CXX11_TENSOR_MODULE_H
#error "Please include unsupported/Eigen/CXX11/Tensor instead of including headers inside the src directory directly."
#endif

View File

@@ -120,9 +120,7 @@ specified position. The value returned is of the datatype of the tensor.
## TensorLayout
The tensor library supports 2 layouts: `ColMajor` (the default) and
`RowMajor`. Only the default column major layout is currently fully
supported, and it is therefore not recommended to attempt to use the row major
layout at the moment.
`RowMajor`.
The layout of a tensor is optionally specified as part of its type. If not
specified explicitly column major is assumed.
@@ -888,6 +886,23 @@ containing the natural logarithms of the original tensor.
Returns a tensor of the same type and dimensions as the original tensor
containing the absolute values of the original tensor.
### <Operation> arg()
Returns a tensor with the same dimensions as the original tensor
containing the complex argument (phase angle) of the values of the
original tensor.
### <Operation> real()
Returns a tensor with the same dimensions as the original tensor
containing the real part of the complex values of the original tensor.
### <Operation> imag()
Returns a tensor with the same dimensions as the orginal tensor
containing the imaginary part of the complex values of the original
tensor.
### <Operation> pow(Scalar exponent)
Returns a tensor of the same type and dimensions as the original tensor
@@ -1466,9 +1481,9 @@ the input tensor.
Eigen::Tensor<int, 2> a(4, 3);
a.setValues({{0, 100, 200}, {300, 400, 500},
{600, 700, 800}, {900, 1000, 1100}});
Eigen::array<int, 2> offsets = {1, 0};
Eigen::array<int, 2> extents = {2, 2};
Eigen::Tensor<int, 1> slice = a.slice(offsets, extents);
Eigen::array<Eigen::Index, 2> offsets = {1, 0};
Eigen::array<Eigen::Index, 2> extents = {2, 2};
Eigen::Tensor<int, 2> slice = a.slice(offsets, extents);
cout << "a" << endl << a << endl;
=>
a
@@ -1794,6 +1809,45 @@ but you can easily cast the tensors to floats to do the division:
TODO
## Tensor Printing
Tensors can be printed into a stream object (e.g. `std::cout`) using different formatting options.
Eigen::Tensor<float, 3> tensor3d = {4, 3, 2};
tensor3d.setValues( {{{1, 2}, {3, 4}, {5, 6}}, {{7, 8}, {9, 10}, {11, 12}}, {{13, 14}, {15, 16}, {17, 18}}, {{19, 20}, {21, 22}, {23, 24}}} );
std::cout << tensor3d.format(Eigen::TensorIOFormat::Plain()) << std::endl;
==>
1 2
3 4
5 6
7 8
9 10
11 12
13 14
15 16
17 18
19 20
21 22
23 24
In the example, we used the predefined format `Eigen::TensorIOFormat::Plain`.
Here is the list of all predefined formats from which you can choose:
- `Eigen::TensorIOFormat::Plain()` for a plain output without braces. Different submatrices are separated by a blank line.
- `Eigen::TensorIOFormat::Numpy()` for numpy-like output.
- `Eigen::TensorIOFormat::Native()` for a `c++` like output which can be directly copy-pasted to setValues().
- `Eigen::TensorIOFormat::Legacy()` for a backwards compatible printing of tensors.
If you send the tensor directly to the stream the default format is called which is `Eigen::IOFormats::Plain()`.
You can define your own format by explicitly providing a `Eigen::TensorIOFormat` class instance. Here, you can specify:
- The overall prefix and suffix with `std::string tenPrefix` and `std::string tenSuffix`
- The prefix, separator and suffix for each new element, row, matrix, 3d subtensor, ... with `std::vector<std::string> prefix`, `std::vector<std::string> separator` and `std::vector<std::string> suffix`. Note that the first entry in each of the vectors refer to the last dimension of the tensor, e.g. `separator[0]` will be printed between adjacent elements, `separator[1]` will be printed between adjacent matrices, ...
- `char fill`: character which will be placed if the elements are aligned.
- `int precision`
- `int flags`: an OR-ed combination of flags, the default value is 0, the only currently available flag is `Eigen::DontAlignCols` which allows to disable the alignment of columns, resulting in faster code.
## Representation of scalar values
@@ -1808,8 +1862,3 @@ product of 2 1d tensors (through contractions) returns a 0d tensor.
* The IndexList class requires a cxx11 compliant compiler. You can use an
array of indices instead if you don't have access to a modern compiler.
* On GPUs only floating point values are properly tested and optimized for.
* Complex and integer values are known to be broken on GPUs. If you try to use
them you'll most likely end up triggering a static assertion failure such as
EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE)

View File

@@ -11,6 +11,8 @@
#ifndef EIGEN_CXX11_TENSOR_TENSOR_H
#define EIGEN_CXX11_TENSOR_TENSOR_H
#include "./InternalHeaderCheck.h"
namespace Eigen {
/** \class Tensor
@@ -42,7 +44,8 @@ namespace Eigen {
* \endcode
*
* This class can be extended with the help of the plugin mechanism described on the page
* \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_TENSOR_PLUGIN.
* \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_TENSOR_PLUGIN,
* \c EIGEN_TENSORBASE_PLUGIN, and \c EIGEN_READONLY_TENSORBASE_PLUGIN.
*
* <i><b>Some notes:</b></i>
*
@@ -73,27 +76,25 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
typedef typename Base::CoeffReturnType CoeffReturnType;
enum {
IsAligned = bool(EIGEN_MAX_ALIGN_BYTES>0) & !(Options_&DontAlign),
Layout = Options_ & RowMajor ? RowMajor : ColMajor,
IsAligned = (EIGEN_MAX_ALIGN_BYTES>0) && !(Options_&DontAlign),
CoordAccess = true,
RawAccess = true
};
static const int Options = Options_;
static const int NumIndices = NumIndices_;
static constexpr int Layout = Options_ & RowMajor ? RowMajor : ColMajor;
static constexpr int Options = Options_;
static constexpr int NumIndices = NumIndices_;
typedef DSizes<Index, NumIndices_> Dimensions;
protected:
TensorStorage<Scalar, Dimensions, Options> m_storage;
#ifdef EIGEN_HAS_SFINAE
template<typename CustomIndices>
struct isOfNormalIndex{
static const bool is_array = internal::is_base_of<array<Index, NumIndices>, CustomIndices>::value;
static const bool is_int = NumTraits<CustomIndices>::IsInteger;
static const bool value = is_array | is_int;
};
#endif
public:
// Metadata
@@ -110,7 +111,6 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
inline Self& base() { return *this; }
inline const Self& base() const { return *this; }
#if EIGEN_HAS_VARIADIC_TEMPLATES
template<typename... IndexTypes>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) const
{
@@ -118,7 +118,6 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
return coeff(array<Index, NumIndices>{{firstIndex, secondIndex, otherIndices...}});
}
#endif
// normal indices
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(const array<Index, NumIndices>& indices) const
@@ -128,7 +127,6 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
}
// custom indices
#ifdef EIGEN_HAS_SFINAE
template<typename CustomIndices,
EIGEN_SFINAE_ENABLE_IF( !(isOfNormalIndex<CustomIndices>::value) )
>
@@ -136,7 +134,6 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
{
return coeff(internal::customIndices2Array<Index,NumIndices>(indices));
}
#endif
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff() const
{
@@ -150,7 +147,6 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
return m_storage.data()[index];
}
#if EIGEN_HAS_VARIADIC_TEMPLATES
template<typename... IndexTypes>
inline Scalar& coeffRef(Index firstIndex, Index secondIndex, IndexTypes... otherIndices)
{
@@ -158,7 +154,6 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
return coeffRef(array<Index, NumIndices>{{firstIndex, secondIndex, otherIndices...}});
}
#endif
// normal indices
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(const array<Index, NumIndices>& indices)
@@ -168,7 +163,6 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
}
// custom indices
#ifdef EIGEN_HAS_SFINAE
template<typename CustomIndices,
EIGEN_SFINAE_ENABLE_IF( !(isOfNormalIndex<CustomIndices>::value) )
>
@@ -176,7 +170,6 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
{
return coeffRef(internal::customIndices2Array<Index,NumIndices>(indices));
}
#endif
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef()
{
@@ -190,7 +183,6 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
return m_storage.data()[index];
}
#if EIGEN_HAS_VARIADIC_TEMPLATES
template<typename... IndexTypes>
inline const Scalar& operator()(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) const
{
@@ -198,31 +190,8 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
return this->operator()(array<Index, NumIndices>{{firstIndex, secondIndex, otherIndices...}});
}
#else
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1) const
{
return coeff(array<Index, 2>(i0, i1));
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2) const
{
return coeff(array<Index, 3>(i0, i1, i2));
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2, Index i3) const
{
return coeff(array<Index, 4>(i0, i1, i2, i3));
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2, Index i3, Index i4) const
{
return coeff(array<Index, 5>(i0, i1, i2, i3, i4));
}
#endif
// custom indices
#ifdef EIGEN_HAS_SFINAE
template<typename CustomIndices,
EIGEN_SFINAE_ENABLE_IF( !(isOfNormalIndex<CustomIndices>::value) )
>
@@ -230,7 +199,6 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
{
return coeff(internal::customIndices2Array<Index,NumIndices>(indices));
}
#endif
// normal indices
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(const array<Index, NumIndices>& indices) const
@@ -257,7 +225,6 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
return coeff(index);
}
#if EIGEN_HAS_VARIADIC_TEMPLATES
template<typename... IndexTypes>
inline Scalar& operator()(Index firstIndex, Index secondIndex, IndexTypes... otherIndices)
{
@@ -265,28 +232,6 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
return operator()(array<Index, NumIndices>{{firstIndex, secondIndex, otherIndices...}});
}
#else
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1)
{
return coeffRef(array<Index, 2>(i0, i1));
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2)
{
return coeffRef(array<Index, 3>(i0, i1, i2));
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3)
{
return coeffRef(array<Index, 4>(i0, i1, i2, i3));
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3, Index i4)
{
return coeffRef(array<Index, 5>(i0, i1, i2, i3, i4));
}
#endif
// normal indices
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()(const array<Index, NumIndices>& indices)
@@ -295,7 +240,6 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
}
// custom indices
#ifdef EIGEN_HAS_SFINAE
template<typename CustomIndices,
EIGEN_SFINAE_ENABLE_IF( !(isOfNormalIndex<CustomIndices>::value) )
>
@@ -303,7 +247,6 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
{
return coeffRef(internal::customIndices2Array<Index,NumIndices>(indices));
}
#endif
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()(Index index)
{
@@ -332,11 +275,10 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Tensor(const Self& other)
: m_storage(other.m_storage)
: Base(other), m_storage(other.m_storage)
{
}
#if EIGEN_HAS_VARIADIC_TEMPLATES
template<typename... IndexTypes>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor(Index firstDimension, IndexTypes... otherDimensions)
: m_storage(firstDimension, otherDimensions...)
@@ -344,33 +286,6 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
// The number of dimensions used to construct a tensor must be equal to the rank of the tensor.
EIGEN_STATIC_ASSERT(sizeof...(otherDimensions) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
}
#else
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit Tensor(Index dim1)
: m_storage(dim1, array<Index, 1>(dim1))
{
EIGEN_STATIC_ASSERT(1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor(Index dim1, Index dim2)
: m_storage(dim1*dim2, array<Index, 2>(dim1, dim2))
{
EIGEN_STATIC_ASSERT(2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor(Index dim1, Index dim2, Index dim3)
: m_storage(dim1*dim2*dim3, array<Index, 3>(dim1, dim2, dim3))
{
EIGEN_STATIC_ASSERT(3 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor(Index dim1, Index dim2, Index dim3, Index dim4)
: m_storage(dim1*dim2*dim3*dim4, array<Index, 4>(dim1, dim2, dim3, dim4))
{
EIGEN_STATIC_ASSERT(4 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor(Index dim1, Index dim2, Index dim3, Index dim4, Index dim5)
: m_storage(dim1*dim2*dim3*dim4*dim5, array<Index, 5>(dim1, dim2, dim3, dim4, dim5))
{
EIGEN_STATIC_ASSERT(5 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
}
#endif
/** Normal Dimension */
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit Tensor(const array<Index, NumIndices>& dimensions)
@@ -399,7 +314,6 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
}
#if EIGEN_HAS_RVALUE_REFERENCES
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Tensor(Self&& other)
: m_storage(std::move(other.m_storage))
@@ -411,7 +325,6 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
m_storage = std::move(other.m_storage);
return *this;
}
#endif
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Tensor& operator=(const Tensor& other)
@@ -433,7 +346,6 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
return *this;
}
#if EIGEN_HAS_VARIADIC_TEMPLATES
template<typename... IndexTypes> EIGEN_DEVICE_FUNC
void resize(Index firstDimension, IndexTypes... otherDimensions)
{
@@ -441,7 +353,6 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
EIGEN_STATIC_ASSERT(sizeof...(otherDimensions) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
resize(array<Index, NumIndices>{{firstDimension, otherDimensions...}});
}
#endif
/** Normal Dimension */
EIGEN_DEVICE_FUNC void resize(const array<Index, NumIndices>& dimensions)
@@ -477,7 +388,6 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
// Nothing to do: rank 0 tensors have fixed size
}
#ifdef EIGEN_HAS_INDEX_LIST
template <typename FirstType, typename... OtherTypes>
EIGEN_DEVICE_FUNC
void resize(const Eigen::IndexList<FirstType, OtherTypes...>& dimensions) {
@@ -487,10 +397,8 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
}
resize(dims);
}
#endif
/** Custom Dimension */
#ifdef EIGEN_HAS_SFINAE
template<typename CustomDimension,
EIGEN_SFINAE_ENABLE_IF( !(isOfNormalIndex<CustomDimension>::value) )
>
@@ -498,7 +406,6 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
{
resize(internal::customIndices2Array<Index,NumIndices>(dimensions));
}
#endif
#ifndef EIGEN_EMULATE_CXX11_META_H
template <typename std::ptrdiff_t... Indices>
@@ -522,6 +429,10 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
}
#endif
#ifdef EIGEN_TENSOR_PLUGIN
#include EIGEN_TENSOR_PLUGIN
#endif
protected:
bool checkIndexRange(const array<Index, NumIndices>& indices) const

View File

@@ -11,60 +11,62 @@
#ifndef EIGEN_CXX11_TENSOR_TENSOR_ARG_MAX_H
#define EIGEN_CXX11_TENSOR_TENSOR_ARG_MAX_H
#include "./InternalHeaderCheck.h"
namespace Eigen {
namespace internal {
/** \class TensorIndexTuple
/** \class TensorIndexPair
* \ingroup CXX11_Tensor_Module
*
* \brief Tensor + Index Tuple class.
* \brief Tensor + Index Pair class.
*
*
*/
template<typename XprType>
struct traits<TensorIndexTupleOp<XprType> > : public traits<XprType>
struct traits<TensorIndexPairOp<XprType> > : public traits<XprType>
{
typedef traits<XprType> XprTraits;
typedef typename XprTraits::StorageKind StorageKind;
typedef typename XprTraits::Index Index;
typedef Tuple<Index, typename XprTraits::Scalar> Scalar;
typedef Pair<Index, typename XprTraits::Scalar> Scalar;
typedef typename XprType::Nested Nested;
typedef typename remove_reference<Nested>::type _Nested;
static const int NumDimensions = XprTraits::NumDimensions;
static const int Layout = XprTraits::Layout;
typedef std::remove_reference_t<Nested> Nested_;
static constexpr int NumDimensions = XprTraits::NumDimensions;
static constexpr int Layout = XprTraits::Layout;
};
template<typename XprType>
struct eval<TensorIndexTupleOp<XprType>, Eigen::Dense>
struct eval<TensorIndexPairOp<XprType>, Eigen::Dense>
{
typedef const TensorIndexTupleOp<XprType>EIGEN_DEVICE_REF type;
typedef const TensorIndexPairOp<XprType>EIGEN_DEVICE_REF type;
};
template<typename XprType>
struct nested<TensorIndexTupleOp<XprType>, 1,
typename eval<TensorIndexTupleOp<XprType> >::type>
struct nested<TensorIndexPairOp<XprType>, 1,
typename eval<TensorIndexPairOp<XprType> >::type>
{
typedef TensorIndexTupleOp<XprType> type;
typedef TensorIndexPairOp<XprType> type;
};
} // end namespace internal
template<typename XprType>
class TensorIndexTupleOp : public TensorBase<TensorIndexTupleOp<XprType>, ReadOnlyAccessors>
class TensorIndexPairOp : public TensorBase<TensorIndexPairOp<XprType>, ReadOnlyAccessors>
{
public:
typedef typename Eigen::internal::traits<TensorIndexTupleOp>::Scalar Scalar;
typedef typename Eigen::internal::traits<TensorIndexPairOp>::Scalar Scalar;
typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
typedef typename Eigen::internal::nested<TensorIndexTupleOp>::type Nested;
typedef typename Eigen::internal::traits<TensorIndexTupleOp>::StorageKind StorageKind;
typedef typename Eigen::internal::traits<TensorIndexTupleOp>::Index Index;
typedef Tuple<Index, typename XprType::CoeffReturnType> CoeffReturnType;
typedef typename Eigen::internal::nested<TensorIndexPairOp>::type Nested;
typedef typename Eigen::internal::traits<TensorIndexPairOp>::StorageKind StorageKind;
typedef typename Eigen::internal::traits<TensorIndexPairOp>::Index Index;
typedef Pair<Index, typename XprType::CoeffReturnType> CoeffReturnType;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIndexTupleOp(const XprType& expr)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIndexPairOp(const XprType& expr)
: m_xpr(expr) {}
EIGEN_DEVICE_FUNC
const typename internal::remove_all<typename XprType::Nested>::type&
const internal::remove_all_t<typename XprType::Nested>&
expression() const { return m_xpr; }
protected:
@@ -73,15 +75,15 @@ class TensorIndexTupleOp : public TensorBase<TensorIndexTupleOp<XprType>, ReadOn
// Eval as rvalue
template<typename ArgType, typename Device>
struct TensorEvaluator<const TensorIndexTupleOp<ArgType>, Device>
struct TensorEvaluator<const TensorIndexPairOp<ArgType>, Device>
{
typedef TensorIndexTupleOp<ArgType> XprType;
typedef TensorIndexPairOp<ArgType> XprType;
typedef typename XprType::Index Index;
typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
static const int NumDims = internal::array_size<Dimensions>::value;
static constexpr int NumDims = internal::array_size<Dimensions>::value;
typedef StorageMemory<CoeffReturnType, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
@@ -90,10 +92,10 @@ struct TensorEvaluator<const TensorIndexTupleOp<ArgType>, Device>
PacketAccess = /*TensorEvaluator<ArgType, Device>::PacketAccess*/ false,
BlockAccess = false,
PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
Layout = TensorEvaluator<ArgType, Device>::Layout,
CoordAccess = false, // to be implemented
RawAccess = false
};
static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
typedef internal::TensorBlockNotImplemented TensorBlock;
@@ -138,59 +140,59 @@ struct TensorEvaluator<const TensorIndexTupleOp<ArgType>, Device>
namespace internal {
/** \class TensorTupleIndex
/** \class TensorPairIndex
* \ingroup CXX11_Tensor_Module
*
* \brief Converts to Tensor<Tuple<Index, Scalar> > and reduces to Tensor<Index>.
* \brief Converts to Tensor<Pair<Index, Scalar> > and reduces to Tensor<Index>.
*
*/
template<typename ReduceOp, typename Dims, typename XprType>
struct traits<TensorTupleReducerOp<ReduceOp, Dims, XprType> > : public traits<XprType>
struct traits<TensorPairReducerOp<ReduceOp, Dims, XprType> > : public traits<XprType>
{
typedef traits<XprType> XprTraits;
typedef typename XprTraits::StorageKind StorageKind;
typedef typename XprTraits::Index Index;
typedef Index Scalar;
typedef typename XprType::Nested Nested;
typedef typename remove_reference<Nested>::type _Nested;
static const int NumDimensions = XprTraits::NumDimensions - array_size<Dims>::value;
static const int Layout = XprTraits::Layout;
typedef std::remove_reference_t<Nested> Nested_;
static constexpr int NumDimensions = XprTraits::NumDimensions - array_size<Dims>::value;
static constexpr int Layout = XprTraits::Layout;
};
template<typename ReduceOp, typename Dims, typename XprType>
struct eval<TensorTupleReducerOp<ReduceOp, Dims, XprType>, Eigen::Dense>
struct eval<TensorPairReducerOp<ReduceOp, Dims, XprType>, Eigen::Dense>
{
typedef const TensorTupleReducerOp<ReduceOp, Dims, XprType>EIGEN_DEVICE_REF type;
typedef const TensorPairReducerOp<ReduceOp, Dims, XprType>EIGEN_DEVICE_REF type;
};
template<typename ReduceOp, typename Dims, typename XprType>
struct nested<TensorTupleReducerOp<ReduceOp, Dims, XprType>, 1,
typename eval<TensorTupleReducerOp<ReduceOp, Dims, XprType> >::type>
struct nested<TensorPairReducerOp<ReduceOp, Dims, XprType>, 1,
typename eval<TensorPairReducerOp<ReduceOp, Dims, XprType> >::type>
{
typedef TensorTupleReducerOp<ReduceOp, Dims, XprType> type;
typedef TensorPairReducerOp<ReduceOp, Dims, XprType> type;
};
} // end namespace internal
template<typename ReduceOp, typename Dims, typename XprType>
class TensorTupleReducerOp : public TensorBase<TensorTupleReducerOp<ReduceOp, Dims, XprType>, ReadOnlyAccessors>
class TensorPairReducerOp : public TensorBase<TensorPairReducerOp<ReduceOp, Dims, XprType>, ReadOnlyAccessors>
{
public:
typedef typename Eigen::internal::traits<TensorTupleReducerOp>::Scalar Scalar;
typedef typename Eigen::internal::traits<TensorPairReducerOp>::Scalar Scalar;
typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
typedef typename Eigen::internal::nested<TensorTupleReducerOp>::type Nested;
typedef typename Eigen::internal::traits<TensorTupleReducerOp>::StorageKind StorageKind;
typedef typename Eigen::internal::traits<TensorTupleReducerOp>::Index Index;
typedef typename Eigen::internal::nested<TensorPairReducerOp>::type Nested;
typedef typename Eigen::internal::traits<TensorPairReducerOp>::StorageKind StorageKind;
typedef typename Eigen::internal::traits<TensorPairReducerOp>::Index Index;
typedef Index CoeffReturnType;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorTupleReducerOp(const XprType& expr,
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorPairReducerOp(const XprType& expr,
const ReduceOp& reduce_op,
const Index return_dim,
const Dims& reduce_dims)
: m_xpr(expr), m_reduce_op(reduce_op), m_return_dim(return_dim), m_reduce_dims(reduce_dims) {}
EIGEN_DEVICE_FUNC
const typename internal::remove_all<typename XprType::Nested>::type&
const internal::remove_all_t<typename XprType::Nested>&
expression() const { return m_xpr; }
EIGEN_DEVICE_FUNC
@@ -211,38 +213,37 @@ class TensorTupleReducerOp : public TensorBase<TensorTupleReducerOp<ReduceOp, Di
// Eval as rvalue
template<typename ReduceOp, typename Dims, typename ArgType, typename Device>
struct TensorEvaluator<const TensorTupleReducerOp<ReduceOp, Dims, ArgType>, Device>
struct TensorEvaluator<const TensorPairReducerOp<ReduceOp, Dims, ArgType>, Device>
{
typedef TensorTupleReducerOp<ReduceOp, Dims, ArgType> XprType;
typedef TensorPairReducerOp<ReduceOp, Dims, ArgType> XprType;
typedef typename XprType::Index Index;
typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename TensorIndexTupleOp<ArgType>::CoeffReturnType TupleType;
typedef typename TensorEvaluator<const TensorReductionOp<ReduceOp, Dims, const TensorIndexTupleOp<ArgType> >, Device>::Dimensions Dimensions;
typedef typename TensorEvaluator<const TensorIndexTupleOp<ArgType> , Device>::Dimensions InputDimensions;
static const int NumDims = internal::array_size<InputDimensions>::value;
typedef typename TensorIndexPairOp<ArgType>::CoeffReturnType PairType;
typedef typename TensorEvaluator<const TensorReductionOp<ReduceOp, Dims, const TensorIndexPairOp<ArgType> >, Device>::Dimensions Dimensions;
typedef typename TensorEvaluator<const TensorIndexPairOp<ArgType> , Device>::Dimensions InputDimensions;
static constexpr int NumDims = internal::array_size<InputDimensions>::value;
typedef array<Index, NumDims> StrideDims;
typedef StorageMemory<CoeffReturnType, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
typedef StorageMemory<TupleType, Device> TupleStorageMem;
typedef StorageMemory<PairType, Device> PairStorageMem;
enum {
IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/ false,
PacketAccess = /*TensorEvaluator<ArgType, Device>::PacketAccess*/ false,
BlockAccess = false,
PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
Layout = TensorEvaluator<const TensorReductionOp<ReduceOp, Dims, const TensorIndexTupleOp<ArgType> >, Device>::Layout,
CoordAccess = false, // to be implemented
RawAccess = false
};
static constexpr int Layout = TensorEvaluator<const TensorReductionOp<ReduceOp, Dims, const TensorIndexPairOp<ArgType>>, Device>::Layout;
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
typedef internal::TensorBlockNotImplemented TensorBlock;
//===--------------------------------------------------------------------===//
EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
: m_orig_impl(op.expression(), device),
m_impl(op.expression().index_tuples().reduce(op.reduce_dims(), op.reduce_op()), device),
m_impl(op.expression().index_pairs().reduce(op.reduce_dims(), op.reduce_op()), device),
m_return_dim(op.return_dim())
{
gen_strides(m_orig_impl.dimensions(), m_strides);
@@ -272,7 +273,7 @@ struct TensorEvaluator<const TensorTupleReducerOp<ReduceOp, Dims, ArgType>, Devi
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
const TupleType v = m_impl.coeff(index);
const PairType v = m_impl.coeff(index);
return (m_return_dim < 0) ? v.first : (v.first % m_stride_mod) / m_stride_div;
}
@@ -316,8 +317,8 @@ struct TensorEvaluator<const TensorTupleReducerOp<ReduceOp, Dims, ArgType>, Devi
}
protected:
TensorEvaluator<const TensorIndexTupleOp<ArgType>, Device> m_orig_impl;
TensorEvaluator<const TensorReductionOp<ReduceOp, Dims, const TensorIndexTupleOp<ArgType> >, Device> m_impl;
TensorEvaluator<const TensorIndexPairOp<ArgType>, Device> m_orig_impl;
TensorEvaluator<const TensorReductionOp<ReduceOp, Dims, const TensorIndexPairOp<ArgType> >, Device> m_impl;
const Index m_return_dim;
StrideDims m_strides;
Index m_stride_mod;

View File

@@ -10,6 +10,8 @@
#ifndef EIGEN_CXX11_TENSOR_TENSOR_ASSIGN_H
#define EIGEN_CXX11_TENSOR_TENSOR_ASSIGN_H
#include "./InternalHeaderCheck.h"
namespace Eigen {
/** \class TensorAssign
@@ -30,10 +32,10 @@ struct traits<TensorAssignOp<LhsXprType, RhsXprType> >
typename traits<RhsXprType>::Index>::type Index;
typedef typename LhsXprType::Nested LhsNested;
typedef typename RhsXprType::Nested RhsNested;
typedef typename remove_reference<LhsNested>::type _LhsNested;
typedef typename remove_reference<RhsNested>::type _RhsNested;
static const std::size_t NumDimensions = internal::traits<LhsXprType>::NumDimensions;
static const int Layout = internal::traits<LhsXprType>::Layout;
typedef std::remove_reference_t<LhsNested> LhsNested_;
typedef std::remove_reference_t<RhsNested> RhsNested_;
static constexpr std::size_t NumDimensions = internal::traits<LhsXprType>::NumDimensions;
static constexpr int Layout = internal::traits<LhsXprType>::Layout;
typedef typename traits<LhsXprType>::PointerType PointerType;
enum {
@@ -68,23 +70,23 @@ class TensorAssignOp : public TensorBase<TensorAssignOp<LhsXprType, RhsXprType>
typedef typename Eigen::internal::traits<TensorAssignOp>::StorageKind StorageKind;
typedef typename Eigen::internal::traits<TensorAssignOp>::Index Index;
static const int NumDims = Eigen::internal::traits<TensorAssignOp>::NumDimensions;
static constexpr int NumDims = Eigen::internal::traits<TensorAssignOp>::NumDimensions;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorAssignOp(LhsXprType& lhs, const RhsXprType& rhs)
: m_lhs_xpr(lhs), m_rhs_xpr(rhs) {}
/** \returns the nested expressions */
EIGEN_DEVICE_FUNC
typename internal::remove_all<typename LhsXprType::Nested>::type&
lhsExpression() const { return *((typename internal::remove_all<typename LhsXprType::Nested>::type*)&m_lhs_xpr); }
internal::remove_all_t<typename LhsXprType::Nested>&
lhsExpression() const { return *((internal::remove_all_t<typename LhsXprType::Nested>*)&m_lhs_xpr); }
EIGEN_DEVICE_FUNC
const typename internal::remove_all<typename RhsXprType::Nested>::type&
const internal::remove_all_t<typename RhsXprType::Nested>&
rhsExpression() const { return m_rhs_xpr; }
protected:
typename internal::remove_all<typename LhsXprType::Nested>::type& m_lhs_xpr;
const typename internal::remove_all<typename RhsXprType::Nested>::type& m_rhs_xpr;
internal::remove_all_t<typename LhsXprType::Nested>& m_lhs_xpr;
const internal::remove_all_t<typename RhsXprType::Nested>& m_rhs_xpr;
};
@@ -100,8 +102,9 @@ struct TensorEvaluator<const TensorAssignOp<LeftArgType, RightArgType>, Device>
typedef StorageMemory<CoeffReturnType, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
static const int NumDims = XprType::NumDims;
static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
static constexpr int NumDims = XprType::NumDims;
static constexpr int Layout = TensorEvaluator<LeftArgType, Device>::Layout;
enum {
IsAligned = int(TensorEvaluator<LeftArgType, Device>::IsAligned) &
@@ -112,7 +115,6 @@ struct TensorEvaluator<const TensorAssignOp<LeftArgType, RightArgType>, Device>
int(TensorEvaluator<RightArgType, Device>::BlockAccess),
PreferBlockAccess = int(TensorEvaluator<LeftArgType, Device>::PreferBlockAccess) |
int(TensorEvaluator<RightArgType, Device>::PreferBlockAccess),
Layout = TensorEvaluator<LeftArgType, Device>::Layout,
RawAccess = TensorEvaluator<LeftArgType, Device>::RawAccess
};

View File

@@ -12,6 +12,8 @@
// clang-format off
#include "./InternalHeaderCheck.h"
namespace Eigen {
/** \class TensorBase
@@ -32,8 +34,8 @@ class TensorBase<Derived, ReadOnlyAccessors>
typedef internal::traits<Derived> DerivedTraits;
typedef typename DerivedTraits::Scalar Scalar;
typedef typename DerivedTraits::Index Index;
typedef typename internal::remove_const<Scalar>::type CoeffReturnType;
static const int NumDimensions = DerivedTraits::NumDimensions;
typedef std::remove_const_t<Scalar> CoeffReturnType;
static constexpr int NumDimensions = DerivedTraits::NumDimensions;
// Generic nullary operation support.
template <typename CustomNullaryOp> EIGEN_DEVICE_FUNC
@@ -309,6 +311,12 @@ class TensorBase<Derived, ReadOnlyAccessors>
return unaryExpr(internal::scalar_abs_op<Scalar>());
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_arg_op<Scalar>, const Derived>
arg() const {
return unaryExpr(internal::scalar_arg_op<Scalar>());
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_clamp_op<Scalar>, const Derived>
clip(Scalar min, Scalar max) const {
@@ -316,17 +324,19 @@ class TensorBase<Derived, ReadOnlyAccessors>
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const typename internal::conditional<NumTraits<CoeffReturnType>::IsComplex,
TensorCwiseUnaryOp<internal::scalar_conjugate_op<Scalar>, const Derived>,
Derived>::type
EIGEN_STRONG_INLINE const std::conditional_t<NumTraits<CoeffReturnType>::IsComplex,
TensorCwiseUnaryOp<internal::scalar_conjugate_op<Scalar>, const Derived>,
Derived>
conjugate() const {
return choose(Cond<NumTraits<CoeffReturnType>::IsComplex>(), unaryExpr(internal::scalar_conjugate_op<Scalar>()), derived());
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::bind2nd_op<internal::scalar_pow_op<Scalar,Scalar> >, const Derived>
pow(Scalar exponent) const {
return unaryExpr(internal::bind2nd_op<internal::scalar_pow_op<Scalar,Scalar> >(exponent));
template<typename ScalarExponent>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const std::enable_if_t<internal::is_arithmetic<typename NumTraits<ScalarExponent>::Real>::value,
TensorCwiseUnaryOp<internal::scalar_unary_pow_op<Scalar, ScalarExponent>, const Derived>>
pow(ScalarExponent exponent) const
{
return unaryExpr(internal::scalar_unary_pow_op<Scalar, ScalarExponent>(exponent));
}
EIGEN_DEVICE_FUNC
@@ -417,9 +427,9 @@ class TensorBase<Derived, ReadOnlyAccessors>
template<typename NewType>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const typename internal::conditional<internal::is_same<NewType, CoeffReturnType>::value,
Derived,
TensorConversionOp<NewType, const Derived> >::type
EIGEN_STRONG_INLINE const std::conditional_t<internal::is_same<NewType, CoeffReturnType>::value,
Derived,
TensorConversionOp<NewType, const Derived> >
cast() const {
return choose(Cond<internal::is_same<NewType, CoeffReturnType>::value>(), derived(), TensorConversionOp<NewType, const Derived>(derived()));
}
@@ -513,34 +523,34 @@ class TensorBase<Derived, ReadOnlyAccessors>
// Comparisons and tests.
template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_LT>, const Derived, const OtherDerived>
operator<(const OtherDerived& other) const {
operator<(const TensorBase<OtherDerived, ReadOnlyAccessors>& other) const {
return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_LT>());
}
template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_LE>, const Derived, const OtherDerived>
operator<=(const OtherDerived& other) const {
operator<=(const TensorBase<OtherDerived, ReadOnlyAccessors>& other) const {
return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_LE>());
}
template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_GT>, const Derived, const OtherDerived>
operator>(const OtherDerived& other) const {
operator>(const TensorBase<OtherDerived, ReadOnlyAccessors>& other) const {
return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_GT>());
}
template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_GE>, const Derived, const OtherDerived>
operator>=(const OtherDerived& other) const {
operator>=(const TensorBase<OtherDerived, ReadOnlyAccessors>& other) const {
return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_GE>());
}
template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_EQ>, const Derived, const OtherDerived>
operator==(const OtherDerived& other) const {
operator==(const TensorBase<OtherDerived, ReadOnlyAccessors>& other) const {
return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_EQ>());
}
template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_NEQ>, const Derived, const OtherDerived>
operator!=(const OtherDerived& other) const {
operator!=(const TensorBase<OtherDerived, ReadOnlyAccessors>& other) const {
return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_NEQ>());
}
@@ -715,81 +725,81 @@ class TensorBase<Derived, ReadOnlyAccessors>
}
template <typename Dims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const TensorReductionOp<internal::AndReducer, const Dims, const typename internal::conditional<internal::is_same<bool, CoeffReturnType>::value, Derived, TensorConversionOp<bool, const Derived> >::type >
const TensorReductionOp<internal::AndReducer, const Dims, const std::conditional_t<internal::is_same<bool, CoeffReturnType>::value, Derived, TensorConversionOp<bool, const Derived> > >
all(const Dims& dims) const {
return cast<bool>().reduce(dims, internal::AndReducer());
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const TensorReductionOp<internal::AndReducer, const DimensionList<Index, NumDimensions>, const typename internal::conditional<internal::is_same<bool, CoeffReturnType>::value, Derived, TensorConversionOp<bool, const Derived> >::type >
const TensorReductionOp<internal::AndReducer, const DimensionList<Index, NumDimensions>, const std::conditional_t<internal::is_same<bool, CoeffReturnType>::value, Derived, TensorConversionOp<bool, const Derived> > >
all() const {
DimensionList<Index, NumDimensions> in_dims;
return cast<bool>().reduce(in_dims, internal::AndReducer());
}
template <typename Dims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const TensorReductionOp<internal::OrReducer, const Dims, const typename internal::conditional<internal::is_same<bool, CoeffReturnType>::value, Derived, TensorConversionOp<bool, const Derived> >::type >
const TensorReductionOp<internal::OrReducer, const Dims, const std::conditional_t<internal::is_same<bool, CoeffReturnType>::value, Derived, TensorConversionOp<bool, const Derived> > >
any(const Dims& dims) const {
return cast<bool>().reduce(dims, internal::OrReducer());
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const TensorReductionOp<internal::OrReducer, const DimensionList<Index, NumDimensions>, const typename internal::conditional<internal::is_same<bool, CoeffReturnType>::value, Derived, TensorConversionOp<bool, const Derived> >::type >
const TensorReductionOp<internal::OrReducer, const DimensionList<Index, NumDimensions>, const std::conditional_t<internal::is_same<bool, CoeffReturnType>::value, Derived, TensorConversionOp<bool, const Derived> > >
any() const {
DimensionList<Index, NumDimensions> in_dims;
return cast<bool>().reduce(in_dims, internal::OrReducer());
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const TensorTupleReducerOp<
internal::ArgMaxTupleReducer<Tuple<Index, CoeffReturnType> >,
const TensorPairReducerOp<
internal::ArgMaxPairReducer<Pair<Index, CoeffReturnType> >,
const array<Index, NumDimensions>, const Derived>
argmax() const {
array<Index, NumDimensions> in_dims;
for (Index d = 0; d < NumDimensions; ++d) in_dims[d] = d;
return TensorTupleReducerOp<
internal::ArgMaxTupleReducer<Tuple<Index, CoeffReturnType> >,
return TensorPairReducerOp<
internal::ArgMaxPairReducer<Pair<Index, CoeffReturnType> >,
const array<Index, NumDimensions>,
const Derived>(derived(), internal::ArgMaxTupleReducer<Tuple<Index, CoeffReturnType> >(), -1, in_dims);
const Derived>(derived(), internal::ArgMaxPairReducer<Pair<Index, CoeffReturnType> >(), -1, in_dims);
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const TensorTupleReducerOp<
internal::ArgMinTupleReducer<Tuple<Index, CoeffReturnType> >,
const TensorPairReducerOp<
internal::ArgMinPairReducer<Pair<Index, CoeffReturnType> >,
const array<Index, NumDimensions>, const Derived>
argmin() const {
array<Index, NumDimensions> in_dims;
for (Index d = 0; d < NumDimensions; ++d) in_dims[d] = d;
return TensorTupleReducerOp<
internal::ArgMinTupleReducer<Tuple<Index, CoeffReturnType> >,
return TensorPairReducerOp<
internal::ArgMinPairReducer<Pair<Index, CoeffReturnType> >,
const array<Index, NumDimensions>,
const Derived>(derived(), internal::ArgMinTupleReducer<Tuple<Index, CoeffReturnType> >(), -1, in_dims);
const Derived>(derived(), internal::ArgMinPairReducer<Pair<Index, CoeffReturnType> >(), -1, in_dims);
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const TensorTupleReducerOp<
internal::ArgMaxTupleReducer<Tuple<Index, CoeffReturnType> >,
const TensorPairReducerOp<
internal::ArgMaxPairReducer<Pair<Index, CoeffReturnType> >,
const array<Index, 1>, const Derived>
argmax(const Index return_dim) const {
array<Index, 1> in_dims;
in_dims[0] = return_dim;
return TensorTupleReducerOp<
internal::ArgMaxTupleReducer<Tuple<Index, CoeffReturnType> >,
return TensorPairReducerOp<
internal::ArgMaxPairReducer<Pair<Index, CoeffReturnType> >,
const array<Index, 1>,
const Derived>(derived(), internal::ArgMaxTupleReducer<Tuple<Index, CoeffReturnType> >(), return_dim, in_dims);
const Derived>(derived(), internal::ArgMaxPairReducer<Pair<Index, CoeffReturnType> >(), return_dim, in_dims);
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const TensorTupleReducerOp<
internal::ArgMinTupleReducer<Tuple<Index, CoeffReturnType> >,
const TensorPairReducerOp<
internal::ArgMinPairReducer<Pair<Index, CoeffReturnType> >,
const array<Index, 1>, const Derived>
argmin(const Index return_dim) const {
array<Index, 1> in_dims;
in_dims[0] = return_dim;
return TensorTupleReducerOp<
internal::ArgMinTupleReducer<Tuple<Index, CoeffReturnType> >,
return TensorPairReducerOp<
internal::ArgMinPairReducer<Pair<Index, CoeffReturnType> >,
const array<Index, 1>,
const Derived>(derived(), internal::ArgMinTupleReducer<Tuple<Index, CoeffReturnType> >(), return_dim, in_dims);
const Derived>(derived(), internal::ArgMinPairReducer<Pair<Index, CoeffReturnType> >(), return_dim, in_dims);
}
template <typename Reducer, typename Dims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
@@ -935,11 +945,11 @@ class TensorBase<Derived, ReadOnlyAccessors>
return TensorInflationOp<const Strides, const Derived>(derived(), strides);
}
// Returns a tensor containing index/value tuples
// Returns a tensor containing index/value pairs
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const TensorIndexTupleOp<const Derived>
index_tuples() const {
return TensorIndexTupleOp<const Derived>(derived());
const TensorIndexPairOp<const Derived>
index_pairs() const {
return TensorIndexPairOp<const Derived>(derived());
}
// Support for custom unary and binary operations
@@ -960,6 +970,15 @@ class TensorBase<Derived, ReadOnlyAccessors>
return TensorForcedEvalOp<const Derived>(derived());
}
// Returns a formatted tensor ready for printing to a stream
inline const TensorWithFormat<Derived,DerivedTraits::Layout,DerivedTraits::NumDimensions> format(const TensorIOFormat& fmt) const {
return TensorWithFormat<Derived,DerivedTraits::Layout,DerivedTraits::NumDimensions>(derived(), fmt);
}
#ifdef EIGEN_READONLY_TENSORBASE_PLUGIN
#include EIGEN_READONLY_TENSORBASE_PLUGIN
#endif
protected:
template <typename Scalar, int NumIndices, int Options, typename IndexType> friend class Tensor;
template <typename Scalar, typename Dimensions, int Option, typename IndexTypes> friend class TensorFixedSize;
@@ -977,7 +996,7 @@ class TensorBase : public TensorBase<Derived, ReadOnlyAccessors> {
typedef typename DerivedTraits::Scalar Scalar;
typedef typename DerivedTraits::Index Index;
typedef Scalar CoeffReturnType;
static const int NumDimensions = DerivedTraits::NumDimensions;
static constexpr int NumDimensions = DerivedTraits::NumDimensions;
template <typename Scalar, int NumIndices, int Options, typename IndexType> friend class Tensor;
template <typename Scalar, typename Dimensions, int Option, typename IndexTypes> friend class TensorFixedSize;
@@ -1001,7 +1020,6 @@ class TensorBase : public TensorBase<Derived, ReadOnlyAccessors> {
return derived() = this->template random<RandomGenerator>();
}
#if EIGEN_HAS_VARIADIC_TEMPLATES
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Derived& setValues(
const typename internal::Initializer<Derived, NumDimensions>::InitList& vals) {
@@ -1009,7 +1027,6 @@ class TensorBase : public TensorBase<Derived, ReadOnlyAccessors> {
internal::initialize_tensor<Derived, NumDimensions>(eval, vals);
return derived();
}
#endif // EIGEN_HAS_VARIADIC_TEMPLATES
template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Derived& operator+=(const OtherDerived& other) {
@@ -1152,6 +1169,10 @@ class TensorBase : public TensorBase<Derived, ReadOnlyAccessors> {
return TensorAsyncDevice<Derived, DeviceType, DoneCallback>(dev, derived(), std::move(done));
}
#ifdef EIGEN_TENSORBASE_PLUGIN
#include EIGEN_TENSORBASE_PLUGIN
#endif
protected:
EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(TensorBase)
EIGEN_DEFAULT_COPY_CONSTRUCTOR(TensorBase)

View File

@@ -8,6 +8,8 @@
#ifndef EIGEN_CXX11_TENSOR_TENSOR_BLOCK_H
#define EIGEN_CXX11_TENSOR_TENSOR_BLOCK_H
#include "./InternalHeaderCheck.h"
namespace Eigen {
namespace internal {
@@ -242,7 +244,7 @@ class TensorBlockDescriptor {
const DestinationBufferKind& kind() const { return m_kind; }
private:
friend class TensorBlockDescriptor;
friend class TensorBlockDescriptor<NumDims, IndexType>;
DestinationBuffer() : m_data(NULL), m_data_type_size(0), m_kind(kEmpty) {}
@@ -706,7 +708,7 @@ class TensorMaterializedBlock {
}
private:
friend class TensorMaterializedBlock;
friend class TensorMaterializedBlock<Scalar, NumDims, Layout, IndexType>;
Storage(Scalar* data, const Dimensions& dimensions,
const Dimensions& strides, bool materialized_in_output,
@@ -833,14 +835,14 @@ class TensorMaterializedBlock {
template <typename UnaryOp, typename ArgTensorBlock>
class TensorCwiseUnaryBlock {
static const bool NoArgBlockAccess =
static constexpr bool NoArgBlockAccess =
internal::is_void<typename ArgTensorBlock::XprType>::value;
public:
typedef typename conditional<
typedef std::conditional_t<
NoArgBlockAccess, void,
TensorCwiseUnaryOp<UnaryOp, const typename ArgTensorBlock::XprType> >::
type XprType;
TensorCwiseUnaryOp<UnaryOp, const typename ArgTensorBlock::XprType> >
XprType;
typedef typename XprScalar<XprType>::type Scalar;
@@ -864,15 +866,15 @@ class TensorCwiseUnaryBlock {
template <typename BinaryOp, typename LhsTensorBlock, typename RhsTensorBlock>
class TensorCwiseBinaryBlock {
static const bool NoArgBlockAccess =
static constexpr bool NoArgBlockAccess =
internal::is_void<typename LhsTensorBlock::XprType>::value ||
internal::is_void<typename RhsTensorBlock::XprType>::value;
public:
typedef typename conditional<
typedef std::conditional_t<
NoArgBlockAccess, void,
TensorCwiseBinaryOp<BinaryOp, const typename LhsTensorBlock::XprType,
const typename RhsTensorBlock::XprType> >::type
const typename RhsTensorBlock::XprType> >
XprType;
typedef typename XprScalar<XprType>::type Scalar;
@@ -911,12 +913,12 @@ class TensorCwiseBinaryBlock {
template <typename BlockFactory, typename ArgTensorBlock>
class TensorUnaryExprBlock {
typedef typename ArgTensorBlock::XprType ArgXprType;
static const bool NoArgBlockAccess = internal::is_void<ArgXprType>::value;
static constexpr bool NoArgBlockAccess = internal::is_void<ArgXprType>::value;
public:
typedef typename conditional<
typedef std::conditional_t<
NoArgBlockAccess, void,
typename BlockFactory::template XprType<ArgXprType>::type>::type XprType;
typename BlockFactory::template XprType<ArgXprType>::type> XprType;
typedef typename XprScalar<XprType>::type Scalar;
@@ -945,15 +947,15 @@ class TensorTernaryExprBlock {
typedef typename Arg2TensorBlock::XprType Arg2XprType;
typedef typename Arg3TensorBlock::XprType Arg3XprType;
static const bool NoArgBlockAccess = internal::is_void<Arg1XprType>::value ||
internal::is_void<Arg2XprType>::value ||
internal::is_void<Arg3XprType>::value;
static constexpr bool NoArgBlockAccess = internal::is_void<Arg1XprType>::value ||
internal::is_void<Arg2XprType>::value ||
internal::is_void<Arg3XprType>::value;
public:
typedef typename conditional<
typedef std::conditional_t<
NoArgBlockAccess, void,
typename BlockFactory::template XprType<Arg1XprType, Arg2XprType,
Arg3XprType>::type>::type XprType;
Arg3XprType>::type> XprType;
typedef typename XprScalar<XprType>::type Scalar;
@@ -1141,7 +1143,7 @@ class StridedLinearBufferCopy {
template <typename Scalar, typename IndexType, int NumDims, int Layout>
class TensorBlockIO {
static const bool IsColMajor = (Layout == ColMajor);
static constexpr bool IsColMajor = (Layout == ColMajor);
typedef StridedLinearBufferCopy<Scalar, IndexType> LinCopy;

View File

@@ -10,6 +10,8 @@
#ifndef EIGEN_CXX11_TENSOR_TENSOR_BROADCASTING_H
#define EIGEN_CXX11_TENSOR_TENSOR_BROADCASTING_H
#include "./InternalHeaderCheck.h"
namespace Eigen {
/** \class TensorBroadcasting
@@ -28,9 +30,9 @@ struct traits<TensorBroadcastingOp<Broadcast, XprType> > : public traits<XprType
typedef typename XprTraits::StorageKind StorageKind;
typedef typename XprTraits::Index Index;
typedef typename XprType::Nested Nested;
typedef typename remove_reference<Nested>::type _Nested;
static const int NumDimensions = XprTraits::NumDimensions;
static const int Layout = XprTraits::Layout;
typedef std::remove_reference_t<Nested> Nested_;
static constexpr int NumDimensions = XprTraits::NumDimensions;
static constexpr int Layout = XprTraits::Layout;
typedef typename XprTraits::PointerType PointerType;
};
@@ -83,7 +85,7 @@ class TensorBroadcastingOp : public TensorBase<TensorBroadcastingOp<Broadcast, X
const Broadcast& broadcast() const { return m_broadcast; }
EIGEN_DEVICE_FUNC
const typename internal::remove_all<typename XprType::Nested>::type&
const internal::remove_all_t<typename XprType::Nested>&
expression() const { return m_xpr; }
protected:
@@ -98,14 +100,14 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
{
typedef TensorBroadcastingOp<Broadcast, ArgType> XprType;
typedef typename XprType::Index Index;
static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
static constexpr int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
typedef DSizes<Index, NumDims> Dimensions;
typedef typename XprType::Scalar Scalar;
typedef typename TensorEvaluator<ArgType, Device>::Dimensions InputDimensions;
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
protected: // all the non-static fields must have the same access control, otherwise the TensorEvaluator wont be standard layout;
static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
protected: // all the non-static fields must have the same access control, otherwise the TensorEvaluator won't be standard layout;
bool isCopy, nByOne, oneByN;
public:
typedef StorageMemory<CoeffReturnType, Device> Storage;
@@ -116,18 +118,18 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
PreferBlockAccess = true,
Layout = TensorEvaluator<ArgType, Device>::Layout,
RawAccess = false
};
static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
typedef std::remove_const_t<Scalar> ScalarNoConst;
// We do block based broadcasting using a trick with 2x tensor rank and 0
// strides. See block method implementation for details.
typedef DSizes<Index, 2 * NumDims> BroadcastDimensions;
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
typedef typename TensorEvaluator<const ArgType, Device>::TensorBlock
@@ -144,7 +146,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
{
// The broadcasting op doesn't change the rank of the tensor. One can't broadcast a scalar
// and store the result in a scalar. Instead one should reshape the scalar into a a N-D
// and store the result in a scalar. Instead one should reshape the scalar into a N-D
// tensor with N >= 1 of 1 element first and then broadcast.
EIGEN_STATIC_ASSERT((NumDims > 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
const InputDimensions& input_dims = m_impl.dimensions();
@@ -229,7 +231,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE CoeffReturnType coeff(Index index) const
{
if (internal::is_input_scalar<typename internal::remove_all<InputDimensions>::type>::value) {
if (internal::is_input_scalar<internal::remove_all_t<InputDimensions>>::value) {
return m_impl.coeff(0);
}
@@ -322,7 +324,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
template<int LoadMode>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketReturnType packet(Index index) const
{
if (internal::is_input_scalar<typename internal::remove_all<InputDimensions>::type>::value) {
if (internal::is_input_scalar<internal::remove_all_t<InputDimensions>>::value) {
return internal::pset1<PacketReturnType>(m_impl.coeff(0));
}
@@ -368,10 +370,9 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetOneByNByOne
(Index index) const
{
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
Index startDim, endDim;
Index inputIndex, outputOffset, batchedIndex;
@@ -410,25 +411,23 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
template<int LoadMode>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetOneByN(Index index) const
{
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
// Consider the flattened tensor [v0, ..., vN],
// Concatenates m_broadcast[dim] copies,
// [v0, ..., vN, v0, ..., vN, ... ]
// with dim == NumDims - 1 for col-major, dim == 0 for row-major.
eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
Index dim, inputIndex;
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
dim = NumDims - 1;
} else {
dim = 0;
}
inputIndex = index % m_inputStrides[dim];
if (inputIndex + PacketSize <= m_inputStrides[dim]) {
// Size of flattened tensor.
const Index M = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ?
m_inputStrides[NumDims - 1] : m_inputStrides[0];
Index inputIndex = index % M;
if (inputIndex + PacketSize <= M) {
return m_impl.template packet<Unaligned>(inputIndex);
} else {
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
EIGEN_UNROLL_LOOP
for (int i = 0; i < PacketSize; ++i) {
if (inputIndex > m_inputStrides[dim]-1) {
if (inputIndex > M - 1) {
inputIndex = 0;
}
values[i] = m_impl.coeff(inputIndex++);
@@ -440,32 +439,29 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
template<int LoadMode>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetNByOne(Index index) const
{
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
// Consider the flattened tensor [v0, ..., vN],
// Interleaves m_broadcast[dim] copies,
// [v0, v0, ..., v1, v1, ..., vN, vN, ... ]
// with dim == 0 for col-major, dim == NumDims - 1 for row-major.
eigen_assert(index + PacketSize-1 < dimensions().TotalSize());
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
Index dim, inputIndex, outputOffset;
const Index M = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ?
m_broadcast[0] : m_broadcast[NumDims - 1];
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
dim = 1;
} else {
dim = NumDims - 2;
}
inputIndex = index / m_outputStrides[dim];
outputOffset = index % m_outputStrides[dim];
if (outputOffset + PacketSize <= m_outputStrides[dim]) {
values[0] = m_impl.coeff(inputIndex);
return internal::pload1<PacketReturnType>(values);
Index inputIndex = index / M;
Index outputOffset = index % M;
if (outputOffset + PacketSize <= M) {
return internal::pset1<PacketReturnType>(m_impl.coeff(inputIndex));
} else {
EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
EIGEN_UNROLL_LOOP
for (int i = 0, cur = 0; i < PacketSize; ++i, ++cur) {
if (outputOffset + cur < m_outputStrides[dim]) {
for (int i = 0; i < PacketSize; ++i) {
if (outputOffset < M) {
values[i] = m_impl.coeff(inputIndex);
++outputOffset;
} else {
values[i] = m_impl.coeff(++inputIndex);
outputOffset = 0;
cur = 0;
outputOffset = 1; // Next offset.
}
}
return internal::pload<PacketReturnType>(values);
@@ -477,7 +473,6 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
template<int LoadMode>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetColMajor(Index index) const
{
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
const Index originalIndex = index;
@@ -517,7 +512,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
if (innermostLoc + PacketSize <= m_impl.dimensions()[0]) {
return m_impl.template packet<Unaligned>(inputIndex);
} else {
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
values[0] = m_impl.coeff(inputIndex);
EIGEN_UNROLL_LOOP
for (int i = 1; i < PacketSize; ++i) {
@@ -535,7 +530,6 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
template<int LoadMode>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetRowMajor(Index index) const
{
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
const Index originalIndex = index;
@@ -575,7 +569,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
if (innermostLoc + PacketSize <= m_impl.dimensions()[NumDims-1]) {
return m_impl.template packet<Unaligned>(inputIndex);
} else {
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
values[0] = m_impl.coeff(inputIndex);
EIGEN_UNROLL_LOOP
for (int i = 1; i < PacketSize; ++i) {
@@ -701,7 +695,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
}
#endif
private:
static const bool IsColMajor =
static constexpr bool IsColMajor =
static_cast<int>(Layout) == static_cast<int>(ColMajor);
// We will build a general case block broadcasting on top of broadcasting
@@ -1080,7 +1074,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
protected:
const Device EIGEN_DEVICE_REF m_device;
const typename internal::remove_reference<Broadcast>::type m_broadcast;
const std::remove_reference_t<Broadcast> m_broadcast;
Dimensions m_dimensions;
array<Index, NumDims> m_outputStrides;
array<Index, NumDims> m_inputStrides;

View File

@@ -10,6 +10,8 @@
#ifndef EIGEN_CXX11_TENSOR_TENSOR_CHIPPING_H
#define EIGEN_CXX11_TENSOR_TENSOR_CHIPPING_H
#include "./InternalHeaderCheck.h"
namespace Eigen {
/** \class TensorKChippingReshaping
@@ -29,9 +31,9 @@ struct traits<TensorChippingOp<DimId, XprType> > : public traits<XprType>
typedef typename XprTraits::StorageKind StorageKind;
typedef typename XprTraits::Index Index;
typedef typename XprType::Nested Nested;
typedef typename remove_reference<Nested>::type _Nested;
static const int NumDimensions = XprTraits::NumDimensions - 1;
static const int Layout = XprTraits::Layout;
typedef std::remove_reference_t<Nested> Nested_;
static constexpr int NumDimensions = XprTraits::NumDimensions - 1;
static constexpr int Layout = XprTraits::Layout;
typedef typename XprTraits::PointerType PointerType;
};
@@ -98,7 +100,7 @@ class TensorChippingOp : public TensorBase<TensorChippingOp<DimId, XprType> >
const Index dim() const { return m_dim.actualDim(); }
EIGEN_DEVICE_FUNC
const typename internal::remove_all<typename XprType::Nested>::type&
const internal::remove_all_t<typename XprType::Nested>&
expression() const { return m_xpr; }
EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorChippingOp)
@@ -115,31 +117,31 @@ template<DenseIndex DimId, typename ArgType, typename Device>
struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
{
typedef TensorChippingOp<DimId, ArgType> XprType;
static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
static const int NumDims = NumInputDims-1;
static constexpr int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
static constexpr int NumDims = NumInputDims-1;
typedef typename XprType::Index Index;
typedef DSizes<Index, NumDims> Dimensions;
typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
typedef StorageMemory<CoeffReturnType, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
enum {
// Alignment can't be guaranteed at compile time since it depends on the
// slice offsets.
IsAligned = false,
Layout = TensorEvaluator<ArgType, Device>::Layout,
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
// Chipping of outer-most dimension is a trivial operation, because we can
// read and write directly from the underlying tensor using single offset.
IsOuterChipping = (static_cast<int>(Layout) == ColMajor && DimId == NumInputDims - 1) ||
(static_cast<int>(Layout) == RowMajor && DimId == 0),
IsOuterChipping = (Layout == ColMajor && DimId == NumInputDims - 1) ||
(Layout == RowMajor && DimId == 0),
// Chipping inner-most dimension.
IsInnerChipping = (static_cast<int>(Layout) == ColMajor && DimId == 0) ||
(static_cast<int>(Layout) == RowMajor && DimId == NumInputDims - 1),
IsInnerChipping = (Layout == ColMajor && DimId == 0) ||
(Layout == RowMajor && DimId == NumInputDims - 1),
// Prefer block access if the underlying expression prefers it, otherwise
// only if chipping is not trivial.
PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess ||
@@ -148,7 +150,7 @@ struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
RawAccess = false
};
typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
typedef std::remove_const_t<Scalar> ScalarNoConst;
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
@@ -217,14 +219,13 @@ struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
template<int LoadMode>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
{
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
if (isInnerChipping()) {
// m_stride is equal to 1, so let's avoid the integer division.
eigen_assert(m_stride == 1);
Index inputIndex = index * m_inputStride + m_inputOffset;
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
EIGEN_UNROLL_LOOP
for (int i = 0; i < PacketSize; ++i) {
values[i] = m_impl.coeff(inputIndex);
@@ -244,7 +245,7 @@ struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
return m_impl.template packet<LoadMode>(inputIndex);
} else {
// Cross the stride boundary. Fallback to slow path.
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
EIGEN_UNROLL_LOOP
for (int i = 0; i < PacketSize; ++i) {
values[i] = coeff(index);
@@ -412,14 +413,14 @@ struct TensorEvaluator<TensorChippingOp<DimId, ArgType>, Device>
{
typedef TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device> Base;
typedef TensorChippingOp<DimId, ArgType> XprType;
static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
static const int NumDims = NumInputDims-1;
static constexpr int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
static constexpr int NumDims = NumInputDims-1;
typedef typename XprType::Index Index;
typedef DSizes<Index, NumDims> Dimensions;
typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
enum {
IsAligned = false,
@@ -445,12 +446,10 @@ struct TensorEvaluator<TensorChippingOp<DimId, ArgType>, Device>
template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void writePacket(Index index, const PacketReturnType& x)
{
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
if (this->isInnerChipping()) {
// m_stride is equal to 1, so let's avoid the integer division.
eigen_assert(this->m_stride == 1);
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
Index inputIndex = index * this->m_inputStride + this->m_inputOffset;
EIGEN_UNROLL_LOOP
@@ -470,7 +469,7 @@ struct TensorEvaluator<TensorChippingOp<DimId, ArgType>, Device>
this->m_impl.template writePacket<StoreMode>(inputIndex, x);
} else {
// Cross stride boundary. Fallback to slow path.
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
EIGEN_UNROLL_LOOP
for (int i = 0; i < PacketSize; ++i) {
@@ -484,7 +483,7 @@ struct TensorEvaluator<TensorChippingOp<DimId, ArgType>, Device>
template <typename TensorBlock>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock(
const TensorBlockDesc& desc, const TensorBlock& block) {
assert(this->m_impl.data() != NULL);
eigen_assert(this->m_impl.data() != NULL);
const Index chip_dim = this->m_dim.actualDim();

View File

@@ -10,6 +10,8 @@
#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONCATENATION_H
#define EIGEN_CXX11_TENSOR_TENSOR_CONCATENATION_H
#include "./InternalHeaderCheck.h"
namespace Eigen {
/** \class TensorConcatenationOp
@@ -32,13 +34,13 @@ struct traits<TensorConcatenationOp<Axis, LhsXprType, RhsXprType> >
typename traits<RhsXprType>::Index>::type Index;
typedef typename LhsXprType::Nested LhsNested;
typedef typename RhsXprType::Nested RhsNested;
typedef typename remove_reference<LhsNested>::type _LhsNested;
typedef typename remove_reference<RhsNested>::type _RhsNested;
static const int NumDimensions = traits<LhsXprType>::NumDimensions;
static const int Layout = traits<LhsXprType>::Layout;
typedef std::remove_reference_t<LhsNested> LhsNested_;
typedef std::remove_reference_t<RhsNested> RhsNested_;
static constexpr int NumDimensions = traits<LhsXprType>::NumDimensions;
static constexpr int Layout = traits<LhsXprType>::Layout;
enum { Flags = 0 };
typedef typename conditional<Pointer_type_promotion<typename LhsXprType::Scalar, Scalar>::val,
typename traits<LhsXprType>::PointerType, typename traits<RhsXprType>::PointerType>::type PointerType;
typedef std::conditional_t<Pointer_type_promotion<typename LhsXprType::Scalar, Scalar>::val,
typename traits<LhsXprType>::PointerType, typename traits<RhsXprType>::PointerType> PointerType;
};
template<typename Axis, typename LhsXprType, typename RhsXprType>
@@ -73,11 +75,11 @@ class TensorConcatenationOp : public TensorBase<TensorConcatenationOp<Axis, LhsX
: m_lhs_xpr(lhs), m_rhs_xpr(rhs), m_axis(axis) {}
EIGEN_DEVICE_FUNC
const typename internal::remove_all<typename LhsXprType::Nested>::type&
const internal::remove_all_t<typename LhsXprType::Nested>&
lhsExpression() const { return m_lhs_xpr; }
EIGEN_DEVICE_FUNC
const typename internal::remove_all<typename RhsXprType::Nested>::type&
const internal::remove_all_t<typename RhsXprType::Nested>&
rhsExpression() const { return m_rhs_xpr; }
EIGEN_DEVICE_FUNC const Axis& axis() const { return m_axis; }
@@ -96,14 +98,15 @@ struct TensorEvaluator<const TensorConcatenationOp<Axis, LeftArgType, RightArgTy
{
typedef TensorConcatenationOp<Axis, LeftArgType, RightArgType> XprType;
typedef typename XprType::Index Index;
static const int NumDims = internal::array_size<typename TensorEvaluator<LeftArgType, Device>::Dimensions>::value;
static const int RightNumDims = internal::array_size<typename TensorEvaluator<RightArgType, Device>::Dimensions>::value;
static constexpr int NumDims = internal::array_size<typename TensorEvaluator<LeftArgType, Device>::Dimensions>::value;
static constexpr int RightNumDims = internal::array_size<typename TensorEvaluator<RightArgType, Device>::Dimensions>::value;
typedef DSizes<Index, NumDims> Dimensions;
typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
typedef StorageMemory<CoeffReturnType, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
static constexpr int Layout = TensorEvaluator<LeftArgType, Device>::Layout;
enum {
IsAligned = false,
PacketAccess = TensorEvaluator<LeftArgType, Device>::PacketAccess &&
@@ -111,7 +114,6 @@ struct TensorEvaluator<const TensorConcatenationOp<Axis, LeftArgType, RightArgTy
BlockAccess = false,
PreferBlockAccess = TensorEvaluator<LeftArgType, Device>::PreferBlockAccess ||
TensorEvaluator<RightArgType, Device>::PreferBlockAccess,
Layout = TensorEvaluator<LeftArgType, Device>::Layout,
RawAccess = false
};
@@ -303,6 +305,7 @@ template<typename Axis, typename LeftArgType, typename RightArgType, typename De
typedef TensorEvaluator<const TensorConcatenationOp<Axis, LeftArgType, RightArgType>, Device> Base;
typedef TensorConcatenationOp<Axis, LeftArgType, RightArgType> XprType;
typedef typename Base::Dimensions Dimensions;
static constexpr int Layout = TensorEvaluator<LeftArgType, Device>::Layout;
enum {
IsAligned = false,
PacketAccess = TensorEvaluator<LeftArgType, Device>::PacketAccess &&
@@ -310,7 +313,6 @@ template<typename Axis, typename LeftArgType, typename RightArgType, typename De
BlockAccess = false,
PreferBlockAccess = TensorEvaluator<LeftArgType, Device>::PreferBlockAccess ||
TensorEvaluator<RightArgType, Device>::PreferBlockAccess,
Layout = TensorEvaluator<LeftArgType, Device>::Layout,
RawAccess = false
};

View File

@@ -10,6 +10,8 @@
#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_H
#define EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_H
#include "./InternalHeaderCheck.h"
namespace Eigen {
/** \class TensorContraction
@@ -25,8 +27,8 @@ template<typename Dimensions, typename LhsXprType, typename RhsXprType, typename
struct traits<TensorContractionOp<Dimensions, LhsXprType, RhsXprType, OutputKernelType> >
{
// Type promotion to handle the case where the types of the lhs and the rhs are different.
typedef typename gebp_traits<typename remove_const<typename LhsXprType::Scalar>::type,
typename remove_const<typename RhsXprType::Scalar>::type>::ResScalar Scalar;
typedef typename gebp_traits<std::remove_const_t<typename LhsXprType::Scalar>,
std::remove_const_t<typename RhsXprType::Scalar>>::ResScalar Scalar;
typedef typename promote_storage_type<typename traits<LhsXprType>::StorageKind,
typename traits<RhsXprType>::StorageKind>::ret StorageKind;
@@ -34,15 +36,15 @@ struct traits<TensorContractionOp<Dimensions, LhsXprType, RhsXprType, OutputKern
typename traits<RhsXprType>::Index>::type Index;
typedef typename LhsXprType::Nested LhsNested;
typedef typename RhsXprType::Nested RhsNested;
typedef typename remove_reference<LhsNested>::type _LhsNested;
typedef typename remove_reference<RhsNested>::type _RhsNested;
typedef std::remove_reference_t<LhsNested> LhsNested_;
typedef std::remove_reference_t<RhsNested> RhsNested_;
// From NumDims below.
static const int NumDimensions = traits<LhsXprType>::NumDimensions + traits<RhsXprType>::NumDimensions - 2 * array_size<Dimensions>::value;
static const int Layout = traits<LhsXprType>::Layout;
typedef typename conditional<Pointer_type_promotion<typename LhsXprType::Scalar, Scalar>::val,
typename traits<LhsXprType>::PointerType,
typename traits<RhsXprType>::PointerType>::type
static constexpr int NumDimensions = traits<LhsXprType>::NumDimensions + traits<RhsXprType>::NumDimensions - 2 * array_size<Dimensions>::value;
static constexpr int Layout = traits<LhsXprType>::Layout;
typedef std::conditional_t<Pointer_type_promotion<typename LhsXprType::Scalar, Scalar>::val,
typename traits<LhsXprType>::PointerType,
typename traits<RhsXprType>::PointerType>
PointerType;
enum {
@@ -71,7 +73,7 @@ struct traits<TensorEvaluator<const TensorContractionOp<Indices_, LeftArgType_,
typedef Device_ Device;
// From NumDims below.
static const int NumDimensions = traits<LeftArgType_>::NumDimensions + traits<RightArgType_>::NumDimensions - 2 * array_size<Indices_>::value;
static constexpr int NumDimensions = traits<LeftArgType_>::NumDimensions + traits<RightArgType_>::NumDimensions - 2 * array_size<Indices_>::value;
};
// Helper class to allocate and deallocate temporary memory for packed buffers.
@@ -343,11 +345,11 @@ class TensorContractionOp : public TensorBase<TensorContractionOp<Indices, LhsXp
/** \returns the nested expressions */
EIGEN_DEVICE_FUNC
const typename internal::remove_all<typename LhsXprType::Nested>::type&
const internal::remove_all_t<typename LhsXprType::Nested>&
lhsExpression() const { return m_lhs_xpr; }
EIGEN_DEVICE_FUNC
const typename internal::remove_all<typename RhsXprType::Nested>::type&
const internal::remove_all_t<typename RhsXprType::Nested>&
rhsExpression() const { return m_rhs_xpr; }
EIGEN_DEVICE_FUNC
@@ -371,19 +373,19 @@ struct TensorContractionEvaluatorBase : internal::no_assignment_operator
typedef typename internal::traits<Derived>::Device Device;
typedef TensorContractionOp<Indices, LeftArgType, RightArgType, OutputKernelType> XprType;
typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar;
typedef std::remove_const_t<typename XprType::Scalar> Scalar;
typedef typename XprType::Index Index;
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
typedef StorageMemory<Scalar, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
static constexpr int Layout = TensorEvaluator<LeftArgType, Device>::Layout;
enum {
IsAligned = true,
PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
BlockAccess = false,
PreferBlockAccess = false,
Layout = TensorEvaluator<LeftArgType, Device>::Layout,
CoordAccess = false, // to be implemented
RawAccess = true
};
@@ -396,20 +398,20 @@ struct TensorContractionEvaluatorBase : internal::no_assignment_operator
// inputs are RowMajor, we will "cheat" by swapping the LHS and RHS:
// If we want to compute A * B = C, where A is LHS and B is RHS, the code
// will pretend B is LHS and A is RHS.
typedef typename internal::conditional<
static_cast<int>(Layout) == static_cast<int>(ColMajor), LeftArgType, RightArgType>::type EvalLeftArgType;
typedef typename internal::conditional<
static_cast<int>(Layout) == static_cast<int>(ColMajor), RightArgType, LeftArgType>::type EvalRightArgType;
typedef std::conditional_t<
static_cast<int>(Layout) == static_cast<int>(ColMajor), LeftArgType, RightArgType> EvalLeftArgType;
typedef std::conditional_t<
static_cast<int>(Layout) == static_cast<int>(ColMajor), RightArgType, LeftArgType> EvalRightArgType;
typedef TensorEvaluator<EvalLeftArgType, Device> LeftEvaluatorType;
typedef TensorEvaluator<EvalRightArgType, Device> RightEvaluatorType;
static const int LDims =
static constexpr int LDims =
internal::array_size<typename TensorEvaluator<EvalLeftArgType, Device>::Dimensions>::value;
static const int RDims =
static constexpr int RDims =
internal::array_size<typename TensorEvaluator<EvalRightArgType, Device>::Dimensions>::value;
static const int ContractDims = internal::array_size<Indices>::value;
static const int NumDims = LDims + RDims - 2 * ContractDims;
static constexpr int ContractDims = internal::array_size<Indices>::value;
static constexpr int NumDims = LDims + RDims - 2 * ContractDims;
typedef array<Index, ContractDims> contract_t;
typedef array<Index, LDims - ContractDims> left_nocontract_t;
@@ -733,8 +735,8 @@ struct TensorContractionEvaluatorBase : internal::no_assignment_operator
const Index rows = m_i_size;
const Index cols = m_k_size;
typedef typename internal::remove_const<typename EvalLeftArgType::Scalar>::type LhsScalar;
typedef typename internal::remove_const<typename EvalRightArgType::Scalar>::type RhsScalar;
typedef std::remove_const_t<typename EvalLeftArgType::Scalar> LhsScalar;
typedef std::remove_const_t<typename EvalRightArgType::Scalar> RhsScalar;
typedef TensorEvaluator<EvalLeftArgType, Device> LeftEvaluator;
typedef TensorEvaluator<EvalRightArgType, Device> RightEvaluator;
const Index lhs_packet_size = internal::unpacket_traits<typename LeftEvaluator::PacketReturnType>::size;
@@ -762,7 +764,7 @@ struct TensorContractionEvaluatorBase : internal::no_assignment_operator
const Index resIncr(1);
// zero out the result buffer (which must be of size at least rows * sizeof(Scalar)
m_device.memset(buffer, 0, rows * sizeof(Scalar));
m_device.fill(buffer, buffer + rows, Scalar(0));
internal::general_matrix_vector_product<Index,LhsScalar,LhsMapper,ColMajor,false,RhsScalar,RhsMapper,false>::run(
rows, cols, lhs, rhs,
@@ -810,8 +812,8 @@ struct TensorContractionEvaluatorBase : internal::no_assignment_operator
const Index n = this->m_j_size;
// define data mappers for Lhs and Rhs
typedef typename internal::remove_const<typename EvalLeftArgType::Scalar>::type LhsScalar;
typedef typename internal::remove_const<typename EvalRightArgType::Scalar>::type RhsScalar;
typedef std::remove_const_t<typename EvalLeftArgType::Scalar> LhsScalar;
typedef std::remove_const_t<typename EvalRightArgType::Scalar> RhsScalar;
typedef TensorEvaluator<EvalLeftArgType, Device> LeftEvaluator;
typedef TensorEvaluator<EvalRightArgType, Device> RightEvaluator;
@@ -869,7 +871,7 @@ struct TensorContractionEvaluatorBase : internal::no_assignment_operator
// If a contraction kernel does not support beta, explicitly initialize
// output buffer with zeroes.
if (!TensorContractionKernel::HasBeta) {
this->m_device.memset(buffer, 0, m * n * sizeof(Scalar));
this->m_device.fill(buffer, buffer + m * n, Scalar(0));
}
for(Index i2=0; i2<m; i2+=mc)
@@ -976,35 +978,31 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
typedef TensorContractionEvaluatorBase<Self> Base;
typedef TensorContractionOp<Indices, LeftArgType, RightArgType, OutputKernelType> XprType;
typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar;
typedef std::remove_const_t<typename XprType::Scalar> Scalar;
typedef typename XprType::Index Index;
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
enum {
Layout = TensorEvaluator<LeftArgType, Device>::Layout
};
static constexpr int Layout = TensorEvaluator<LeftArgType, Device>::Layout;
// Most of the code is assuming that both input tensors are ColMajor. If the
// inputs are RowMajor, we will "cheat" by swapping the LHS and RHS:
// If we want to compute A * B = C, where A is LHS and B is RHS, the code
// will pretend B is LHS and A is RHS.
typedef typename internal::conditional<
static_cast<int>(Layout) == static_cast<int>(ColMajor), LeftArgType, RightArgType>::type EvalLeftArgType;
typedef typename internal::conditional<
static_cast<int>(Layout) == static_cast<int>(ColMajor), RightArgType, LeftArgType>::type EvalRightArgType;
typedef std::conditional_t<Layout == static_cast<int>(ColMajor), LeftArgType, RightArgType> EvalLeftArgType;
typedef std::conditional_t<Layout == static_cast<int>(ColMajor), RightArgType, LeftArgType> EvalRightArgType;
static const int LDims =
static constexpr int LDims =
internal::array_size<typename TensorEvaluator<EvalLeftArgType, Device>::Dimensions>::value;
static const int RDims =
static constexpr int RDims =
internal::array_size<typename TensorEvaluator<EvalRightArgType, Device>::Dimensions>::value;
static const int ContractDims = internal::array_size<Indices>::value;
static constexpr int ContractDims = internal::array_size<Indices>::value;
typedef array<Index, ContractDims> contract_t;
typedef array<Index, LDims - ContractDims> left_nocontract_t;
typedef array<Index, RDims - ContractDims> right_nocontract_t;
static const int NumDims = LDims + RDims - 2 * ContractDims;
static constexpr int NumDims = LDims + RDims - 2 * ContractDims;
// Could we use NumDimensions here?
typedef DSizes<Index, NumDims> Dimensions;

View File

@@ -11,6 +11,8 @@
#define EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_BLOCKING_H
#include "./InternalHeaderCheck.h"
namespace Eigen {
namespace internal {

View File

@@ -14,6 +14,8 @@
#if defined(EIGEN_USE_GPU) && defined(EIGEN_GPUCC)
#include "./InternalHeaderCheck.h"
namespace Eigen {
template<typename Scalar, typename Index, typename LhsMapper,
@@ -233,7 +235,7 @@ EigenContractionKernelInternal(const LhsMapper lhs, const RhsMapper rhs,
} \
} \
#define writeRegToShmem(_) \
#define writeRegToShmem() \
lhs_shmem[lhs_store_idx_0] = lhs_pf0; \
rhs_shmem[rhs_store_idx_0] = rhs_pf0; \
\
@@ -1225,29 +1227,25 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
typedef TensorContractionEvaluatorBase<Self> Base;
typedef TensorContractionOp<Indices, LeftArgType, RightArgType, OutputKernelType> XprType;
typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar;
typedef std::remove_const_t<typename XprType::Scalar> Scalar;
typedef typename XprType::Index Index;
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, GpuDevice>::type PacketReturnType;
enum {
Layout = TensorEvaluator<LeftArgType, Device>::Layout,
};
static constexpr int Layout = TensorEvaluator<LeftArgType, Device>::Layout;
// Most of the code is assuming that both input tensors are ColMajor. If the
// inputs are RowMajor, we will "cheat" by swapping the LHS and RHS:
// If we want to compute A * B = C, where A is LHS and B is RHS, the code
// will pretend B is LHS and A is RHS.
typedef typename internal::conditional<
static_cast<int>(Layout) == static_cast<int>(ColMajor), LeftArgType, RightArgType>::type EvalLeftArgType;
typedef typename internal::conditional<
static_cast<int>(Layout) == static_cast<int>(ColMajor), RightArgType, LeftArgType>::type EvalRightArgType;
typedef std::conditional_t<Layout == static_cast<int>(ColMajor), LeftArgType, RightArgType> EvalLeftArgType;
typedef std::conditional_t<Layout == static_cast<int>(ColMajor), RightArgType, LeftArgType> EvalRightArgType;
static const int LDims =
static constexpr int LDims =
internal::array_size<typename TensorEvaluator<EvalLeftArgType, Device>::Dimensions>::value;
static const int RDims =
static constexpr int RDims =
internal::array_size<typename TensorEvaluator<EvalRightArgType, Device>::Dimensions>::value;
static const int ContractDims = internal::array_size<Indices>::value;
static constexpr int ContractDims = internal::array_size<Indices>::value;
typedef array<Index, LDims> left_dim_mapper_t;
typedef array<Index, RDims> right_dim_mapper_t;
@@ -1256,13 +1254,13 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
typedef array<Index, LDims - ContractDims> left_nocontract_t;
typedef array<Index, RDims - ContractDims> right_nocontract_t;
static const int NumDims = LDims + RDims - 2 * ContractDims;
static constexpr int NumDims = LDims + RDims - 2 * ContractDims;
typedef DSizes<Index, NumDims> Dimensions;
// typedefs needed in evalTo
typedef typename internal::remove_const<typename EvalLeftArgType::Scalar>::type LhsScalar;
typedef typename internal::remove_const<typename EvalRightArgType::Scalar>::type RhsScalar;
typedef std::remove_const_t<typename EvalLeftArgType::Scalar> LhsScalar;
typedef std::remove_const_t<typename EvalRightArgType::Scalar> RhsScalar;
typedef TensorEvaluator<EvalLeftArgType, Device> LeftEvaluator;
typedef TensorEvaluator<EvalRightArgType, Device> RightEvaluator;
@@ -1370,8 +1368,8 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
// columns in right side
const Index n = this->m_j_size;
// zero out the result buffer (which must be of size at least m * n * sizeof(Scalar)
this->m_device.memset(buffer, 0, m * n * sizeof(Scalar));
// zero out the result buffer (which must be of size at least m * n * sizeof(Scalar))
this->m_device.fill(buffer, buffer + m * n, Scalar(0));
typedef internal::TensorContractionInputMapper<LhsScalar, Index, internal::Lhs,
LeftEvaluator, left_nocontract_t,

View File

@@ -10,6 +10,8 @@
#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_MAPPER_H
#define EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_MAPPER_H
#include "./InternalHeaderCheck.h"
namespace Eigen {
namespace internal {
@@ -294,7 +296,7 @@ class BaseTensorContractionMapper : public SimpleTensorContractionMapper<Scalar,
template <typename PacketT,int AlignmentType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
typename internal::enable_if<internal::unpacket_traits<PacketT>::size==packet_size,PacketT>::type
std::enable_if_t<internal::unpacket_traits<PacketT>::size==packet_size,PacketT>
load(Index i, Index j) const
{
// whole method makes column major assumption
@@ -340,7 +342,7 @@ class BaseTensorContractionMapper : public SimpleTensorContractionMapper<Scalar,
template <typename PacketT,int AlignmentType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
typename internal::enable_if<internal::unpacket_traits<PacketT>::size!=packet_size,PacketT>::type
std::enable_if_t<internal::unpacket_traits<PacketT>::size!=packet_size,PacketT>
load(Index i, Index j) const
{
const Index requested_packet_size = internal::unpacket_traits<PacketT>::size;
@@ -414,6 +416,7 @@ class TensorContractionSubMapper {
typedef BaseTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment, MakePointer_> ParentMapper;
typedef TensorContractionSubMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment, MakePointer_> Self;
typedef Self LinearMapper;
typedef Self SubMapper;
enum {
// We can use direct offsets iff the parent mapper supports then and we can compute the strides.
@@ -483,6 +486,13 @@ class TensorContractionSubMapper {
return LinearMapper(m_base_mapper, i + m_vert_offset, j + m_horiz_offset);
}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE SubMapper getSubMapper(Index i, Index j) const {
if (UseDirectOffsets) {
return SubMapper(m_base_mapper, i, j);
}
return SubMapper(m_base_mapper, i + m_vert_offset, j + m_horiz_offset);
}
template <typename PacketT, int AlignmentType>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketT load(Index i) const {
EIGEN_STATIC_ASSERT((internal::is_same<PacketT, PacketT>::value), YOU_MADE_A_PROGRAMMING_MISTAKE);
@@ -529,6 +539,7 @@ class TensorContractionInputMapper
typedef BaseTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment, MakePointer_> Base;
typedef TensorContractionSubMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment, MakePointer_> SubMapper;
typedef SubMapper VectorMapper;
typedef SubMapper LinearMapper;
EIGEN_DEVICE_FUNC TensorContractionInputMapper(const Tensor& tensor,
const nocontract_t& nocontract_strides,
@@ -542,6 +553,10 @@ class TensorContractionInputMapper
return SubMapper(*this, i, j);
}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE LinearMapper getLinearMapper(Index i, Index j) const {
return LinearMapper(*this, i, j);
}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE VectorMapper getVectorMapper(Index i, Index j) const {
return VectorMapper(*this, i, j);
}

View File

@@ -19,6 +19,8 @@
#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_SYCL_H
#define EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_SYCL_H
#include "./InternalHeaderCheck.h"
namespace Eigen {
namespace TensorSycl {
@@ -110,7 +112,7 @@ struct TTPanelSize {
// BC : determines if supporting bank conflict is required
static EIGEN_CONSTEXPR bool BC = true;
// DoubleBuffer: determines if double buffering technique should be used (This can be disabled by
// EIGEN_SYCL_DISABLE_DOUBLE_BUFFER macro when the device doesnot have sufficient local memory)
// EIGEN_SYCL_DISABLE_DOUBLE_BUFFER macro when the device does not have sufficient local memory)
static EIGEN_CONSTEXPR bool DoubleBuffer =
#ifdef EIGEN_SYCL_DISABLE_DOUBLE_BUFFER
false;
@@ -156,7 +158,7 @@ enum class data_source { global_mem, local_mem, private_mem };
*/
template <bool PacketLoad, bool is_coalesced_layout, bool, typename PacketType, typename TensorMapper,
typename StorageIndex>
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename ::Eigen::internal::enable_if<PacketLoad, PacketType>::type read(
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::enable_if_t<PacketLoad, PacketType> read(
const TensorMapper &tensorMapper, const StorageIndex &NCIndex, const StorageIndex &CIndex, const StorageIndex &ld) {
const StorageIndex row = (is_coalesced_layout) ? NCIndex : CIndex;
const StorageIndex col = (is_coalesced_layout) ? CIndex : NCIndex;
@@ -186,7 +188,7 @@ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename ::Eigen::internal::enable_
* \param CIndex: is the contracting dim index
*/
template <bool PacketLoad, bool, bool IsRhs, typename PacketType, typename TensorMapper, typename StorageIndex>
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename ::Eigen::internal::enable_if<!PacketLoad, PacketType>::type read(
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::enable_if_t<!PacketLoad, PacketType> read(
const TensorMapper &tensorMapper, const StorageIndex &NCIndex, const StorageIndex &CIndex, const StorageIndex &) {
const StorageIndex row = (IsRhs) ? CIndex : NCIndex;
const StorageIndex col = (IsRhs) ? NCIndex : CIndex;
@@ -216,7 +218,7 @@ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename ::Eigen::internal::enable_
template <typename StorageIndex, StorageIndex ld, data_source dt, typename PacketType, typename DataScalar>
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
typename ::Eigen::internal::enable_if<dt != data_source::global_mem, void>::type
std::enable_if_t<dt != data_source::global_mem, void>
write(PacketType &packet_data, DataScalar ptr) {
EIGEN_CONSTEXPR int PacketSize = Eigen::internal::unpacket_traits<PacketType>::size;
EIGEN_UNROLL_LOOP
@@ -242,8 +244,8 @@ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
*/
template <data_source dt, typename PacketType, typename DataScalar>
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename ::Eigen::internal::enable_if<
Eigen::internal::unpacket_traits<PacketType>::size != 1 && dt == data_source::global_mem, void>::type
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename std::enable_if_t<
Eigen::internal::unpacket_traits<PacketType>::size != 1 && dt == data_source::global_mem, void>
write(PacketType &packet_data, DataScalar *ptr) {
::Eigen::internal::pstoreu<DataScalar, PacketType>(ptr, packet_data);
}
@@ -262,8 +264,8 @@ write(PacketType &packet_data, DataScalar *ptr) {
* \param ptr: a pointer to the local memory
*/
template <data_source dt, typename PacketType, typename DataScalar>
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename ::Eigen::internal::enable_if<
Eigen::internal::unpacket_traits<PacketType>::size == 1 && dt == data_source::global_mem, void>::type
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename std::enable_if_t<
Eigen::internal::unpacket_traits<PacketType>::size == 1 && dt == data_source::global_mem, void>
write(PacketType &packet_data, DataScalar *ptr) {
*ptr = packet_data;
}
@@ -319,7 +321,7 @@ struct BlockProperties {
static EIGEN_CONSTEXPR bool packet_load = packet_load_;
typedef typename Eigen::internal::unpacket_traits<PacketType>::type OutScalar;
static EIGEN_CONSTEXPR bool is_rhs = is_rhs_;
typedef typename Eigen::internal::conditional<packet_load, PacketType, OutScalar>::type OutType;
typedef std::conditional_t<packet_load, PacketType, OutScalar> OutType;
static EIGEN_CONSTEXPR int elements_per_access = Eigen::internal::unpacket_traits<OutType>::size;
static EIGEN_CONSTEXPR bool is_coalesced_layout = !(is_transposed ^ is_rhs);
static EIGEN_CONSTEXPR int nc_stride = (is_coalesced_layout ? elements_per_access : 1);
@@ -428,7 +430,7 @@ struct ThreadProperties {
Otherwise, the result of contraction will be written iin a temporary buffer. This is the case when Tall/Skinny
contraction is used. So in this case, a final reduction step is required to compute final output.
* \tparam contraction_tp: it is an enum value representing whether the local memroy/no local memory implementation of
* \tparam contraction_tp: it is an enum value representing whether the local memory/no local memory implementation of
the algorithm to be used
*
* \param scratch: local memory containing tiles of LHS and RHS tensors for each work-group
@@ -475,8 +477,7 @@ class TensorContractionKernel {
typedef cl::sycl::accessor<OutScalar, 1, cl::sycl::access::mode::read_write, cl::sycl::access::target::local> Scratch;
typedef cl::sycl::multi_ptr<OutScalar, cl::sycl::access::address_space::local_space> local_ptr;
typedef OutScalar * /*cl::sycl::multi_ptr<OutScalar, cl::sycl::access::address_space::private_space>*/ private_ptr;
typedef
typename ::Eigen::internal::conditional<contraction_tp == contraction_type::local, local_ptr, private_ptr>::type
typedef std::conditional_t<contraction_tp == contraction_type::local, local_ptr, private_ptr>
tile_ptr;
static EIGEN_CONSTEXPR StorageIndex LSDL = contraction_tp == contraction_type::local
? Properties::TileSizeDimM + Properties::BC
@@ -493,7 +494,7 @@ class TensorContractionKernel {
* the TiledMemory for both local and private memory, the MemHolder structs is used as a helper to abstract out
* different type of memory needed when local/no_local memory computation is called.
*
* \tparam contraction_type: it is an enum value representing whether the local memroy/no local memory implementation
* \tparam contraction_type: it is an enum value representing whether the local memory/no local memory implementation
of the algorithm to be used
* \tparam the private memory size
* \param ptr the tile memory pointer type
@@ -520,10 +521,10 @@ class TensorContractionKernel {
* \param rhs_scratch_extract : determines the RHS tile memory. It is either private or local memory based on the
* selected contraction_type.
*
* \param lhs_extract_index: determins the position of each thread on a local memory for lhs input. When private
* \param lhs_extract_index: determines the position of each thread on a local memory for lhs input. When private
* memory is used this is set to zero as this is not applicable in case of private memory.
*
* \param rhs_extract_index: determins the position of each thread on a local memory for rhs input. When private
* \param rhs_extract_index: determines the position of each thread on a local memory for rhs input. When private
* memory is used this is set to zero as this is not applicable in case of private memory.
*
* \param lhs_scratch_compute : determines the location to load for computation for lhs_local memory. This is the
@@ -542,7 +543,7 @@ class TensorContractionKernel {
template <contraction_type tp = contraction_tp>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
TiledMemory(const ThreadProperties<StorageIndex> &, local_ptr,
typename ::Eigen::internal::enable_if<tp == contraction_type::no_local>::type * = 0)
std::enable_if_t<tp == contraction_type::no_local> * = 0)
: lhs_scratch_extract{},
rhs_scratch_extract{},
lhs_scratch_ptr_compute(lhs_scratch_extract.ptr),
@@ -553,7 +554,7 @@ class TensorContractionKernel {
template <contraction_type tp = contraction_tp>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
TiledMemory(const ThreadProperties<StorageIndex> &thread_properties, local_ptr block_start_ptr,
typename ::Eigen::internal::enable_if<tp == contraction_type::local>::type * = 0)
std::enable_if_t<tp == contraction_type::local> * = 0)
: lhs_scratch_extract{block_start_ptr},
rhs_scratch_extract{lhs_scratch_extract.ptr +
((Properties::DoubleBuffer + 1) * LSDL * Properties::TileSizeDimK)},
@@ -710,7 +711,7 @@ class TensorContractionKernel {
template <typename InputBlockProperties, bool is_internal_block, typename Input, typename PrivateReg,
contraction_type contract_tp = contraction_tp>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
typename ::Eigen::internal::enable_if<contract_tp == contraction_type::no_local>::type
std::enable_if_t<contract_tp == contraction_type::no_local>
extract_block(const Input &inpt, PrivateReg private_ptr, const std::pair<StorageIndex, StorageIndex> &,
const StorageIndex &ncOffset, const StorageIndex cOffset) {
EIGEN_CONSTEXPR StorageIndex LocalThreadSizeNC =
@@ -783,28 +784,28 @@ class TensorContractionKernel {
template <bool db = Properties::DoubleBuffer, contraction_type ctp = contraction_tp>
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
typename ::Eigen::internal::enable_if<db && ctp == contraction_type::local>::type
std::enable_if_t<db && ctp == contraction_type::local>
sync_mem(const cl::sycl::nd_item<1> &, bool &db_offset) noexcept {
db_offset = !db_offset;
}
template <bool db = Properties::DoubleBuffer, contraction_type ctp = contraction_tp>
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
typename ::Eigen::internal::enable_if<!db && ctp == contraction_type::local>::type
std::enable_if_t<!db && ctp == contraction_type::local>
sync_mem(const cl::sycl::nd_item<1> &itemID, bool &) noexcept {
itemID.barrier(cl::sycl::access::fence_space::local_space);
}
template <contraction_type ctp = contraction_tp>
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
typename ::Eigen::internal::enable_if<ctp == contraction_type::no_local>::type
std::enable_if_t<ctp == contraction_type::no_local>
sync_mem(const cl::sycl::nd_item<1> &, bool &) noexcept {
return;
}
template <bool need_sync, contraction_type ctp = contraction_tp>
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
typename ::Eigen::internal::enable_if<need_sync && ctp == contraction_type::no_local>::type
std::enable_if_t<need_sync && ctp == contraction_type::no_local>
sync_thread(const cl::sycl::nd_item<1> &
#ifdef EIGEN_SYCL_ARM_GPU_CACHE_OPTIMISATION
itemID
@@ -818,12 +819,12 @@ class TensorContractionKernel {
}
template <bool need_sync, contraction_type ctp = contraction_tp>
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
typename ::Eigen::internal::enable_if<need_sync && ctp == contraction_type::local>::type
std::enable_if_t<need_sync && ctp == contraction_type::local>
sync_thread(const cl::sycl::nd_item<1> &itemID) {
itemID.barrier(cl::sycl::access::fence_space::local_space);
}
template <bool need_sync>
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename ::Eigen::internal::enable_if<!need_sync>::type sync_thread(
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::enable_if_t<!need_sync> sync_thread(
const cl::sycl::nd_item<1> &) {
return;
}
@@ -894,7 +895,7 @@ class TensorContractionKernel {
template <typename InputBlockProperties, bool is_internal_block, typename Input, typename Local,
contraction_type contract_tp = contraction_tp>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
typename ::Eigen::internal::enable_if<contract_tp == contraction_type::local>::type
std::enable_if_t<contract_tp == contraction_type::local>
extract_block(const Input &inpt, Local local_ptr, const std::pair<StorageIndex, StorageIndex>& local_index,
const StorageIndex &ncOffset, const StorageIndex cOffset) {
EIGEN_CONSTEXPR StorageIndex TileSizeDimNC =
@@ -1234,7 +1235,7 @@ struct GeneralVectorTensor {
*
* \param out_res: determines the output tensor containing the contraction result
*
* \param rng: determins the total input data size
* \param rng: determines the total input data size
*/
template <typename OutScalar, typename LhsScalar, typename RhsScalar, typename OutAccessor, typename LhsMapper,
typename RhsMapper, typename StorageIndex, bool Vectorizable>
@@ -1292,7 +1293,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
typedef TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType, OutputKernelType>, Device> Self;
typedef TensorContractionEvaluatorBase<Self> Base;
typedef TensorContractionOp<Indices, LeftArgType, RightArgType, OutputKernelType> XprType;
typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar;
typedef std::remove_const_t<typename XprType::Scalar> Scalar;
typedef typename XprType::Index StorageIndex;
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
@@ -1305,14 +1306,14 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
TripleDim(const StorageIndex M_, const StorageIndex N_, const StorageIndex K_) : M(M_), N(N_), K(K_) {}
};
enum {
Layout = TensorEvaluator<LeftArgType, Device>::Layout,
PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
BlockAccess = false,
};
static EIGEN_CONSTEXPR int LDims = Base::LDims;
static EIGEN_CONSTEXPR int RDims = Base::RDims;
static EIGEN_CONSTEXPR int ContractDims = Base::ContractDims;
static constexpr int Layout = TensorEvaluator<LeftArgType, Device>::Layout;
static constexpr int LDims = Base::LDims;
static constexpr int RDims = Base::RDims;
static constexpr int ContractDims = Base::ContractDims;
typedef array<StorageIndex, LDims> left_dim_mapper_t;
typedef array<StorageIndex, RDims> right_dim_mapper_t;
@@ -1321,14 +1322,14 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
typedef array<StorageIndex, LDims - ContractDims> left_nocontract_t;
typedef array<StorageIndex, RDims - ContractDims> right_nocontract_t;
static const int NumDims = LDims + RDims - 2 * ContractDims;
static constexpr int NumDims = LDims + RDims - 2 * ContractDims;
typedef DSizes<StorageIndex, NumDims> Dimensions;
typedef TensorEvaluator<typename Base::EvalLeftArgType, Device> LeftEvaluator;
typedef TensorEvaluator<typename Base::EvalRightArgType, Device> RightEvaluator;
typedef typename Eigen::internal::remove_const<typename LeftEvaluator::CoeffReturnType>::type LhsScalar;
typedef typename Eigen::internal::remove_const<typename RightEvaluator::CoeffReturnType>::type RhsScalar;
typedef std::remove_const_t<typename LeftEvaluator::CoeffReturnType> LhsScalar;
typedef std::remove_const_t<typename RightEvaluator::CoeffReturnType> RhsScalar;
typedef typename LeftEvaluator::Dimensions LeftDimensions;
typedef typename RightEvaluator::Dimensions RightDimensions;

View File

@@ -13,6 +13,8 @@
// evaluator for thread pool device
#ifdef EIGEN_USE_THREADS
#include "./InternalHeaderCheck.h"
namespace Eigen {
template<typename Indices, typename LeftArgType, typename RightArgType, typename OutputKernelType>
@@ -25,29 +27,27 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
typedef TensorContractionEvaluatorBase<Self> Base;
typedef TensorContractionOp<Indices, LeftArgType, RightArgType, OutputKernelType> XprType;
typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar;
typedef std::remove_const_t<typename XprType::Scalar> Scalar;
typedef typename XprType::Index Index;
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
enum {
Layout = TensorEvaluator<LeftArgType, Device>::Layout,
};
static constexpr int Layout = TensorEvaluator<LeftArgType, Device>::Layout;
// Most of the code is assuming that both input tensors are ColMajor. If the
// inputs are RowMajor, we will "cheat" by swapping the LHS and RHS:
// If we want to compute A * B = C, where A is LHS and B is RHS, the code
// will pretend B is LHS and A is RHS.
typedef typename internal::conditional<
static_cast<int>(Layout) == static_cast<int>(ColMajor), LeftArgType, RightArgType>::type EvalLeftArgType;
typedef typename internal::conditional<
static_cast<int>(Layout) == static_cast<int>(ColMajor), RightArgType, LeftArgType>::type EvalRightArgType;
typedef std::conditional_t<
static_cast<int>(Layout) == static_cast<int>(ColMajor), LeftArgType, RightArgType> EvalLeftArgType;
typedef std::conditional_t<
static_cast<int>(Layout) == static_cast<int>(ColMajor), RightArgType, LeftArgType> EvalRightArgType;
static const int LDims =
static constexpr int LDims =
internal::array_size<typename TensorEvaluator<EvalLeftArgType, Device>::Dimensions>::value;
static const int RDims =
static constexpr int RDims =
internal::array_size<typename TensorEvaluator<EvalRightArgType, Device>::Dimensions>::value;
static const int ContractDims = internal::array_size<Indices>::value;
static constexpr int ContractDims = internal::array_size<Indices>::value;
typedef array<Index, LDims> left_dim_mapper_t;
typedef array<Index, RDims> right_dim_mapper_t;
@@ -56,13 +56,13 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
typedef array<Index, LDims - ContractDims> left_nocontract_t;
typedef array<Index, RDims - ContractDims> right_nocontract_t;
static const int NumDims = LDims + RDims - 2 * ContractDims;
static constexpr int NumDims = LDims + RDims - 2 * ContractDims;
typedef DSizes<Index, NumDims> Dimensions;
// typedefs needed in evalTo
typedef typename internal::remove_const<typename EvalLeftArgType::Scalar>::type LhsScalar;
typedef typename internal::remove_const<typename EvalRightArgType::Scalar>::type RhsScalar;
typedef std::remove_const_t<typename EvalLeftArgType::Scalar> LhsScalar;
typedef std::remove_const_t<typename EvalRightArgType::Scalar> RhsScalar;
typedef typename internal::gebp_traits<LhsScalar, RhsScalar> Traits;
typedef TensorEvaluator<EvalLeftArgType, Device> LeftEvaluator;
@@ -96,7 +96,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
// context from the heap.
//
// (*) EvalParallelContext & EvalShardedByInnerDimContext owns all the state
// and temporary buffers, requried for executing the tensor contraction.
// and temporary buffers, required for executing the tensor contraction.
// They are responsible for cleaning it up after contraction is done.
static const bool IsEvalInSyncMode =
std::is_same<DoneCallback, NoCallback>::value;
@@ -599,7 +599,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
// These variable are rolling over 3 consecutive k slices: first two we are
// actively executing + one to track completion of kernels in the second
// slice.
static const Index P = 3;
static constexpr Index P = 3;
// Handle to the allocated temporary storage for Lhs/Rhs blocks.
BlockMemHandle packed_mem_;
@@ -698,7 +698,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
!is_rhs && std::is_same<BlockType, LhsBlock>::value;
static const bool kIsRhs =
is_rhs && std::is_same<BlockType, RhsBlock>::value;
static_assert(kIsLhs || kIsRhs, "Unkown block type");
static_assert(kIsLhs || kIsRhs, "Unknown block type");
using Blocks = ThreadLocalBlocks<BlockType>;
@@ -874,7 +874,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
lhs_.getSubMapper(m1 * bm_, k * bk_), bk(k), bm(m1));
if (!parallel_pack_ && shard_by_col_) {
assert(!use_thread_local);
eigen_assert(!use_thread_local);
signal_packing(k);
} else {
signal_switch(k + 1);
@@ -895,7 +895,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
} else {
// If we can't guarantee that all kernels in `k` slice will be
// executed sequentially in current thread, it's no longer safe to use
// thread local memory in followig slices along the k dimensions.
// thread local memory in following slices along the k dimensions.
eigen_assert(k > 0);
can_use_thread_local_packed_[n].store(false,
std::memory_order_relaxed);
@@ -912,9 +912,9 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
// On 10000x2x10000 mm zeroing can easily take half of time. Zero (bn
// x m) row. Safe to do here because all kernels that will write to
// this memory depend on completion of this task. Note: don't call
// device_.memset() here. device_.memset() blocks on thread pool
// device_.fill() here. device_.fill() blocks on thread pool
// worker thread, which can lead to underutilization and deadlocks.
memset(buffer_ + n1 * bn_ * m_, 0, bn(n1) * m_ * sizeof(Scalar));
std::fill_n(buffer_ + n1 * bn_ * m_, bn(n1) * m_, Scalar(0));
}
kernel_.packRhs(&packed_rhs(n, k, n1, use_thread_local),
rhs_.getSubMapper(k * bk_, n1 * bn_), bk(k), bn(n1));
@@ -927,7 +927,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
signal_kernel(m, n, k, sync, use_thread_local);
}
} else {
assert(!use_thread_local);
eigen_assert(!use_thread_local);
signal_packing(k);
}
}

View File

@@ -10,6 +10,8 @@
#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H
#define EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H
#include "./InternalHeaderCheck.h"
namespace Eigen {
/** \class TensorConversionOp
@@ -28,9 +30,9 @@ struct traits<TensorConversionOp<TargetType, XprType> >
typedef typename traits<XprType>::StorageKind StorageKind;
typedef typename traits<XprType>::Index Index;
typedef typename XprType::Nested Nested;
typedef typename remove_reference<Nested>::type _Nested;
static const int NumDimensions = traits<XprType>::NumDimensions;
static const int Layout = traits<XprType>::Layout;
typedef std::remove_reference_t<Nested> Nested_;
static constexpr int NumDimensions = traits<XprType>::NumDimensions;
static constexpr int Layout = traits<XprType>::Layout;
enum { Flags = 0 };
typedef typename TypeConversion<Scalar, typename traits<XprType>::PointerType>::type PointerType;
};
@@ -187,7 +189,7 @@ class TensorConversionOp : public TensorBase<TensorConversionOp<TargetType, XprT
: m_xpr(xpr) {}
EIGEN_DEVICE_FUNC
const typename internal::remove_all<typename XprType::Nested>::type&
const internal::remove_all_t<typename XprType::Nested>&
expression() const { return m_xpr; }
protected:
@@ -250,12 +252,12 @@ struct PacketConv {
typedef typename internal::unpacket_traits<SrcPacket>::type SrcType;
typedef typename internal::unpacket_traits<TargetPacket>::type TargetType;
static const int PacketSize = internal::unpacket_traits<TargetPacket>::size;
static constexpr int PacketSize = internal::unpacket_traits<TargetPacket>::size;
template <typename ArgType, typename Device>
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetPacket run(const TensorEvaluator<ArgType, Device>& impl, Index index) {
internal::scalar_cast_op<SrcType, TargetType> converter;
EIGEN_ALIGN_MAX typename internal::remove_const<TargetType>::type values[PacketSize];
EIGEN_ALIGN_MAX std::remove_const_t<TargetType> values[PacketSize];
EIGEN_UNROLL_LOOP
for (int i = 0; i < PacketSize; ++i) {
values[i] = converter(impl.coeff(index+i));
@@ -283,11 +285,11 @@ struct PacketConv<SrcPacket, TargetPacket, LoadMode, true, IsSameT> {
template <typename SrcPacket, typename TargetPacket, int LoadMode>
struct PacketConv<SrcPacket, TargetPacket, LoadMode, /*ActuallyVectorize=*/false, /*IsSameT=*/true> {
typedef typename internal::unpacket_traits<TargetPacket>::type TargetType;
static const int PacketSize = internal::unpacket_traits<TargetPacket>::size;
static constexpr int PacketSize = internal::unpacket_traits<TargetPacket>::size;
template <typename ArgType, typename Device>
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetPacket run(const TensorEvaluator<ArgType, Device>& impl, Index index) {
EIGEN_ALIGN_MAX typename internal::remove_const<TargetType>::type values[PacketSize];
EIGEN_ALIGN_MAX std::remove_const_t<TargetType> values[PacketSize];
for (int i = 0; i < PacketSize; ++i) values[i] = impl.coeff(index+i);
return internal::pload<TargetPacket>(values);
}
@@ -312,11 +314,11 @@ struct TensorEvaluator<const TensorConversionOp<TargetType, ArgType>, Device>
typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
typedef TargetType Scalar;
typedef TargetType CoeffReturnType;
typedef typename internal::remove_all<typename internal::traits<ArgType>::Scalar>::type SrcType;
typedef internal::remove_all_t<typename internal::traits<ArgType>::Scalar> SrcType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
typedef typename PacketType<SrcType, Device>::type PacketSourceType;
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
static const bool IsSameType = internal::is_same<TargetType, SrcType>::value;
static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
static constexpr bool IsSameType = internal::is_same<TargetType, SrcType>::value;
typedef StorageMemory<CoeffReturnType, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
@@ -331,11 +333,11 @@ struct TensorEvaluator<const TensorConversionOp<TargetType, ArgType>, Device>
#endif
BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
Layout = TensorEvaluator<ArgType, Device>::Layout,
RawAccess = false
};
static const int NumDims = internal::array_size<Dimensions>::value;
static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
static constexpr int NumDims = internal::array_size<Dimensions>::value;
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;

View File

@@ -10,6 +10,8 @@
#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONVOLUTION_H
#define EIGEN_CXX11_TENSOR_TENSOR_CONVOLUTION_H
#include "./InternalHeaderCheck.h"
namespace Eigen {
/** \class TensorConvolution
@@ -206,7 +208,7 @@ class IndexMapper {
}
private:
static const int NumDims = internal::array_size<InputDims>::value;
static constexpr int NumDims = internal::array_size<InputDims>::value;
array<Index, NumDims> m_inputStrides;
array<Index, NumDims> m_outputStrides;
array<Index, NumDims> m_gpuInputStrides;
@@ -227,12 +229,12 @@ struct traits<TensorConvolutionOp<Dimensions, InputXprType, KernelXprType> >
typename traits<KernelXprType>::Index>::type Index;
typedef typename InputXprType::Nested LhsNested;
typedef typename KernelXprType::Nested RhsNested;
typedef typename remove_reference<LhsNested>::type _LhsNested;
typedef typename remove_reference<RhsNested>::type _RhsNested;
static const int NumDimensions = traits<InputXprType>::NumDimensions;
static const int Layout = traits<InputXprType>::Layout;
typedef typename conditional<Pointer_type_promotion<typename InputXprType::Scalar, Scalar>::val,
typename traits<InputXprType>::PointerType, typename traits<KernelXprType>::PointerType>::type PointerType;
typedef std::remove_reference_t<LhsNested> LhsNested_;
typedef std::remove_reference_t<RhsNested> RhsNested_;
static constexpr int NumDimensions = traits<InputXprType>::NumDimensions;
static constexpr int Layout = traits<InputXprType>::Layout;
typedef std::conditional_t<Pointer_type_promotion<typename InputXprType::Scalar, Scalar>::val,
typename traits<InputXprType>::PointerType, typename traits<KernelXprType>::PointerType> PointerType;
enum {
Flags = 0
@@ -275,11 +277,11 @@ class TensorConvolutionOp : public TensorBase<TensorConvolutionOp<Indices, Input
/** \returns the nested expressions */
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const typename internal::remove_all<typename InputXprType::Nested>::type&
const internal::remove_all_t<typename InputXprType::Nested>&
inputExpression() const { return m_input_xpr; }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const typename internal::remove_all<typename KernelXprType::Nested>::type&
const internal::remove_all_t<typename KernelXprType::Nested>&
kernelExpression() const { return m_kernel_xpr; }
protected:
@@ -294,24 +296,24 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
{
typedef TensorConvolutionOp<Indices, InputArgType, KernelArgType> XprType;
static const int NumDims = internal::array_size<typename TensorEvaluator<InputArgType, Device>::Dimensions>::value;
static const int NumKernelDims = internal::array_size<Indices>::value;
static constexpr int NumDims = internal::array_size<typename TensorEvaluator<InputArgType, Device>::Dimensions>::value;
static constexpr int NumKernelDims = internal::array_size<Indices>::value;
typedef typename XprType::Index Index;
typedef DSizes<Index, NumDims> Dimensions;
typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
typedef StorageMemory<Scalar, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
static constexpr int Layout = TensorEvaluator<InputArgType, Device>::Layout;
enum {
IsAligned = int(TensorEvaluator<InputArgType, Device>::IsAligned) & int(TensorEvaluator<KernelArgType, Device>::IsAligned),
PacketAccess = int(TensorEvaluator<InputArgType, Device>::PacketAccess) & int(TensorEvaluator<KernelArgType, Device>::PacketAccess),
BlockAccess = false,
PreferBlockAccess = false,
Layout = TensorEvaluator<InputArgType, Device>::Layout,
CoordAccess = false, // to be implemented
RawAccess = false
};
@@ -777,18 +779,18 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
{
typedef TensorConvolutionOp<Indices, InputArgType, KernelArgType> XprType;
static const int NumDims = internal::array_size<typename TensorEvaluator<InputArgType, GpuDevice>::Dimensions>::value;
static const int NumKernelDims = internal::array_size<Indices>::value;
static constexpr int NumDims = internal::array_size<typename TensorEvaluator<InputArgType, GpuDevice>::Dimensions>::value;
static constexpr int NumKernelDims = internal::array_size<Indices>::value;
typedef typename XprType::Index Index;
typedef DSizes<Index, NumDims> Dimensions;
typedef typename TensorEvaluator<KernelArgType, GpuDevice>::Dimensions KernelDimensions;
static constexpr int Layout = TensorEvaluator<InputArgType, GpuDevice>::Layout;
enum {
IsAligned = TensorEvaluator<InputArgType, GpuDevice>::IsAligned & TensorEvaluator<KernelArgType, GpuDevice>::IsAligned,
PacketAccess = false,
BlockAccess = false,
PreferBlockAccess = false,
Layout = TensorEvaluator<InputArgType, GpuDevice>::Layout,
CoordAccess = false, // to be implemented
RawAccess = false
};
@@ -818,7 +820,7 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, GpuDevice>::type PacketReturnType;
typedef typename InputArgType::Scalar Scalar;
static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
static constexpr int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_dimensions; }

View File

@@ -15,6 +15,8 @@
#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONVOLUTION_SYCL_H
#define EIGEN_CXX11_TENSOR_TENSOR_CONVOLUTION_SYCL_H
#include "./InternalHeaderCheck.h"
namespace Eigen {
/** \class TensorConvolution
@@ -275,9 +277,9 @@ template <typename Indices, typename InputArgType, typename KernelArgType>
struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelArgType>, Eigen::SyclDevice> {
typedef TensorConvolutionOp<Indices, InputArgType, KernelArgType> XprType;
static const int NumDims =
static constexpr int NumDims =
internal::array_size<typename TensorEvaluator<InputArgType, Eigen::SyclDevice>::Dimensions>::value;
static const int NumKernelDims = internal::array_size<Indices>::value;
static constexpr int NumKernelDims = internal::array_size<Indices>::value;
typedef typename XprType::Index Index;
typedef DSizes<Index, NumDims> Dimensions;
typedef typename TensorEvaluator<KernelArgType, Eigen::SyclDevice>::Dimensions KernelDimensions;
@@ -285,18 +287,18 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Eigen::SyclDevice>::type PacketReturnType;
typedef typename InputArgType::Scalar Scalar;
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
typedef StorageMemory<CoeffReturnType, Eigen::SyclDevice> Storage;
typedef typename Storage::Type EvaluatorPointerType;
typedef StorageMemory<const CoeffReturnType, Eigen::SyclDevice> KernelStorage;
static constexpr int Layout = TensorEvaluator<InputArgType, Eigen::SyclDevice>::Layout;
enum {
IsAligned = TensorEvaluator<InputArgType, Eigen::SyclDevice>::IsAligned &
TensorEvaluator<KernelArgType, Eigen::SyclDevice>::IsAligned,
PacketAccess = false,
BlockAccess = false,
PreferBlockAccess = false,
Layout = TensorEvaluator<InputArgType, Eigen::SyclDevice>::Layout,
CoordAccess = false, // to be implemented
RawAccess = false
};
@@ -392,8 +394,8 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
const size_t numX = dimensions()[m_indices[0]];
const size_t numP = dimensions().TotalSize() / numX;
const auto input_dim = std::array<size_t, 2>{numX, numP};
auto global_range = cl::sycl::range<2>{};
auto local_range = cl::sycl::range<2>{};
auto global_range = cl::sycl::range<2>{1, 1};
auto local_range = cl::sycl::range<2>{1, 1};
const size_t kernel_size = m_kernelImpl.dimensions().TotalSize();
m_device.parallel_for_setup(input_dim, global_range, local_range);
@@ -423,8 +425,8 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
const size_t numP = dimensions().TotalSize() / (numX * numY);
auto input_dim = std::array<size_t, 3>{numX, numY, numP};
auto global_range = cl::sycl::range<3>{};
auto local_range = cl::sycl::range<3>{};
auto global_range = cl::sycl::range<3>{1, 1, 1};
auto local_range = cl::sycl::range<3>{1, 1, 1};
m_device.parallel_for_setup(input_dim, global_range, local_range);
@@ -467,8 +469,8 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
internal::IndexMapper<Index, InputDims, 3, Layout> indexMapper(m_inputImpl.dimensions(), kernel_dims, indices);
auto global_range = cl::sycl::range<3>{};
auto local_range = cl::sycl::range<3>{};
auto global_range = cl::sycl::range<3>{1, 1, 1};
auto local_range = cl::sycl::range<3>{1, 1, 1};
m_device.parallel_for_setup(input_dim, global_range, local_range);
auto local_memory_range = (local_range + kernel_size - 1);

View File

@@ -10,6 +10,8 @@
#ifndef EIGEN_CXX11_TENSOR_TENSOR_COST_MODEL_H
#define EIGEN_CXX11_TENSOR_TENSOR_COST_MODEL_H
#include "./InternalHeaderCheck.h"
namespace Eigen {
/** \class TensorEvaluator

View File

@@ -10,6 +10,8 @@
#ifndef EIGEN_CXX11_TENSOR_TENSOR_CUSTOM_OP_H
#define EIGEN_CXX11_TENSOR_TENSOR_CUSTOM_OP_H
#include "./InternalHeaderCheck.h"
namespace Eigen {
/** \class TensorCustomUnaryOp
@@ -27,9 +29,9 @@ struct traits<TensorCustomUnaryOp<CustomUnaryFunc, XprType> >
typedef typename XprType::StorageKind StorageKind;
typedef typename XprType::Index Index;
typedef typename XprType::Nested Nested;
typedef typename remove_reference<Nested>::type _Nested;
static const int NumDimensions = traits<XprType>::NumDimensions;
static const int Layout = traits<XprType>::Layout;
typedef std::remove_reference_t<Nested> Nested_;
static constexpr int NumDimensions = traits<XprType>::NumDimensions;
static constexpr int Layout = traits<XprType>::Layout;
typedef typename traits<XprType>::PointerType PointerType;
};
@@ -67,7 +69,7 @@ class TensorCustomUnaryOp : public TensorBase<TensorCustomUnaryOp<CustomUnaryFun
const CustomUnaryFunc& func() const { return m_func; }
EIGEN_DEVICE_FUNC
const typename internal::remove_all<typename XprType::Nested>::type&
const internal::remove_all_t<typename XprType::Nested>&
expression() const { return m_expr; }
protected:
@@ -82,22 +84,22 @@ struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Devi
{
typedef TensorCustomUnaryOp<CustomUnaryFunc, XprType> ArgType;
typedef typename internal::traits<ArgType>::Index Index;
static const int NumDims = internal::traits<ArgType>::NumDimensions;
static constexpr int NumDims = internal::traits<ArgType>::NumDimensions;
typedef DSizes<Index, NumDims> Dimensions;
typedef typename internal::remove_const<typename ArgType::Scalar>::type Scalar;
typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
typedef std::remove_const_t<typename ArgType::Scalar> Scalar;
typedef std::remove_const_t<typename XprType::CoeffReturnType> CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
typedef typename Eigen::internal::traits<XprType>::PointerType TensorPointerType;
typedef StorageMemory<CoeffReturnType, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
static constexpr int Layout = TensorEvaluator<XprType, Device>::Layout;
enum {
IsAligned = false,
PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
BlockAccess = false,
PreferBlockAccess = false,
Layout = TensorEvaluator<XprType, Device>::Layout,
CoordAccess = false, // to be implemented
RawAccess = false
};
@@ -191,12 +193,12 @@ struct traits<TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType> >
typename traits<RhsXprType>::Index>::type Index;
typedef typename LhsXprType::Nested LhsNested;
typedef typename RhsXprType::Nested RhsNested;
typedef typename remove_reference<LhsNested>::type _LhsNested;
typedef typename remove_reference<RhsNested>::type _RhsNested;
static const int NumDimensions = traits<LhsXprType>::NumDimensions;
static const int Layout = traits<LhsXprType>::Layout;
typedef typename conditional<Pointer_type_promotion<typename LhsXprType::Scalar, Scalar>::val,
typename traits<LhsXprType>::PointerType, typename traits<RhsXprType>::PointerType>::type PointerType;
typedef std::remove_reference_t<LhsNested> LhsNested_;
typedef std::remove_reference_t<RhsNested> RhsNested_;
static constexpr int NumDimensions = traits<LhsXprType>::NumDimensions;
static constexpr int Layout = traits<LhsXprType>::Layout;
typedef std::conditional_t<Pointer_type_promotion<typename LhsXprType::Scalar, Scalar>::val,
typename traits<LhsXprType>::PointerType, typename traits<RhsXprType>::PointerType> PointerType;
};
template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType>
@@ -234,11 +236,11 @@ class TensorCustomBinaryOp : public TensorBase<TensorCustomBinaryOp<CustomBinary
const CustomBinaryFunc& func() const { return m_func; }
EIGEN_DEVICE_FUNC
const typename internal::remove_all<typename LhsXprType::Nested>::type&
const internal::remove_all_t<typename LhsXprType::Nested>&
lhsExpression() const { return m_lhs_xpr; }
EIGEN_DEVICE_FUNC
const typename internal::remove_all<typename RhsXprType::Nested>::type&
const internal::remove_all_t<typename RhsXprType::Nested>&
rhsExpression() const { return m_rhs_xpr; }
protected:
@@ -254,23 +256,23 @@ struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType,
{
typedef TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType> XprType;
typedef typename internal::traits<XprType>::Index Index;
static const int NumDims = internal::traits<XprType>::NumDimensions;
static constexpr int NumDims = internal::traits<XprType>::NumDimensions;
typedef DSizes<Index, NumDims> Dimensions;
typedef typename XprType::Scalar Scalar;
typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
typedef std::remove_const_t<typename XprType::CoeffReturnType> CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
typedef typename Eigen::internal::traits<XprType>::PointerType TensorPointerType;
typedef StorageMemory<CoeffReturnType, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
static constexpr int Layout = TensorEvaluator<LhsXprType, Device>::Layout;
enum {
IsAligned = false,
PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
BlockAccess = false,
PreferBlockAccess = false,
Layout = TensorEvaluator<LhsXprType, Device>::Layout,
CoordAccess = false, // to be implemented
RawAccess = false
};

View File

@@ -10,6 +10,8 @@
#ifndef EIGEN_CXX11_TENSOR_TENSOR_DEVICE_H
#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_H
#include "./InternalHeaderCheck.h"
namespace Eigen {
/** \class TensorDevice

View File

@@ -11,6 +11,8 @@
#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_DEFAULT_H
#include "./InternalHeaderCheck.h"
namespace Eigen {
// Default device for the machine (typically a single cpu core)
@@ -39,6 +41,17 @@ struct DefaultDevice {
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const {
::memset(buffer, c, n);
}
template<typename T>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void fill(T* begin, T* end, const T& value) const {
#ifdef EIGEN_GPU_COMPILE_PHASE
// std::fill is not a device function, so resort to simple loop.
for (T* it = begin; it != end; ++it) {
*it = value;
}
#else
std::fill(begin, end, value);
#endif
}
template<typename Type>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Type get(Type data) const {
return data;
@@ -82,6 +95,10 @@ struct DefaultDevice {
return firstLevelCacheSize();
#endif
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void synchronize() const {
// Nothing. Default device operations are synchronous.
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const {
#if !defined(EIGEN_GPU_COMPILE_PHASE)

View File

@@ -15,6 +15,8 @@
// A separate header (included at the end of this file) will undefine all
#include "TensorGpuHipCudaDefines.h"
#include "./InternalHeaderCheck.h"
namespace Eigen {
static const int kGpuScratchSize = 1024;
@@ -128,7 +130,13 @@ class GpuStreamDevice : public StreamInterface {
public:
// Use the default stream on the current device
GpuStreamDevice() : stream_(&default_stream), scratch_(NULL), semaphore_(NULL) {
gpuGetDevice(&device_);
gpuError_t status = gpuGetDevice(&device_);
if (status != gpuSuccess) {
std::cerr << "Failed to get the GPU devices "
<< gpuGetErrorString(status)
<< std::endl;
gpu_assert(status == gpuSuccess);
}
}
// Use the default stream on the specified device
GpuStreamDevice(int device) : stream_(&default_stream), device_(device), scratch_(NULL), semaphore_(NULL) {}
@@ -139,7 +147,13 @@ class GpuStreamDevice : public StreamInterface {
GpuStreamDevice(const gpuStream_t* stream, int device = -1)
: stream_(stream), device_(device), scratch_(NULL), semaphore_(NULL) {
if (device < 0) {
gpuGetDevice(&device_);
gpuError_t status = gpuGetDevice(&device_);
if (status != gpuSuccess) {
std::cerr << "Failed to get the GPU devices "
<< gpuGetErrorString(status)
<< std::endl;
gpu_assert(status == gpuSuccess);
}
} else {
int num_devices;
gpuError_t err = gpuGetDeviceCount(&num_devices);
@@ -281,10 +295,49 @@ struct GpuDevice {
EIGEN_UNUSED_VARIABLE(err)
gpu_assert(err == gpuSuccess);
#else
EIGEN_UNUSED_VARIABLE(buffer)
EIGEN_UNUSED_VARIABLE(c)
EIGEN_UNUSED_VARIABLE(n)
eigen_assert(false && "The default device should be used instead to generate kernel code");
#endif
}
template<typename T>
EIGEN_STRONG_INLINE void fill(T* begin, T* end, const T& value) const {
#ifndef EIGEN_GPU_COMPILE_PHASE
const size_t count = end - begin;
// Split value into bytes and run memset with stride.
const int value_size = sizeof(value);
char* buffer = (char*)begin;
char* value_bytes = (char*)(&value);
gpuError_t err;
EIGEN_UNUSED_VARIABLE(err)
// If all value bytes are equal, then a single memset can be much faster.
bool use_single_memset = true;
for (int i=1; i<value_size; ++i) {
if (value_bytes[i] != value_bytes[0]) {
use_single_memset = false;
}
}
if (use_single_memset) {
err = gpuMemsetAsync(buffer, value_bytes[0], count * sizeof(T), stream_->stream());
gpu_assert(err == gpuSuccess);
} else {
for (int b=0; b<value_size; ++b) {
err = gpuMemset2DAsync(buffer+b, value_size, value_bytes[b], 1, count, stream_->stream());
gpu_assert(err == gpuSuccess);
}
}
#else
EIGEN_UNUSED_VARIABLE(begin)
EIGEN_UNUSED_VARIABLE(end)
EIGEN_UNUSED_VARIABLE(value)
eigen_assert(false && "The default device should be used instead to generate kernel code");
#endif
}
EIGEN_STRONG_INLINE size_t numThreads() const {
// FIXME
return 32;

View File

@@ -16,6 +16,8 @@
#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_SYCL_H
#include <unordered_set>
#include "./InternalHeaderCheck.h"
namespace Eigen {
namespace TensorSycl {
@@ -134,6 +136,15 @@ class QueueInterface {
this->exception_caught_ = this->sycl_async_handler(l);
},
num_threads) {}
explicit QueueInterface(
const cl::sycl::queue& q, unsigned num_threads = std::thread::hardware_concurrency())
: m_queue(q),
#ifdef EIGEN_SYCL_USE_PROGRAM_CLASS
m_prog(m_queue.get_context(), get_sycl_supported_devices()),
#endif
m_thread_pool(num_threads),
m_device_info(m_queue) {}
#ifdef EIGEN_SYCL_USE_PROGRAM_CLASS
EIGEN_STRONG_INLINE cl::sycl::program &program() const { return m_prog; }
@@ -244,7 +255,7 @@ class QueueInterface {
}
/// The memcpyHostToDevice is used to copy the data from host to device
/// The destination pointer could be deleted before the copy happend which is
/// The destination pointer could be deleted before the copy happened which is
/// why a callback function is needed. By default if none is provided, the
/// function is blocking.
EIGEN_STRONG_INLINE void memcpyHostToDevice(
@@ -272,7 +283,7 @@ class QueueInterface {
}
/// The memcpyDeviceToHost is used to copy the data from device to host.
/// The source pointer could be deleted before the copy happend which is
/// The source pointer could be deleted before the copy happened which is
/// why a callback function is needed. By default if none is provided, the
/// function is blocking.
EIGEN_STRONG_INLINE void memcpyDeviceToHost(
@@ -327,13 +338,27 @@ class QueueInterface {
if (n == 0) {
return;
}
n /= sizeof(buffer_scalar_t);
auto f = [&](cl::sycl::handler &cgh) {
auto dst_acc = get_range_accessor<write_mode>(cgh, data, n);
// The cast to uint8_t is here to match the behaviour of the standard
// memset. The cast to buffer_scalar_t is needed to match the type of the
// accessor (in case buffer_scalar_t is not uint8_t)
cgh.fill(dst_acc, static_cast<buffer_scalar_t>(static_cast<uint8_t>(c)));
// Get a typed range accesser to ensure we fill each byte, in case
// `buffer_scalar_t` is not (u)int8_t.
auto dst_acc = get_typed_range_accessor<write_mode, uint8_t>(cgh, data, n);
cgh.fill(dst_acc, static_cast<uint8_t>(c));
};
cl::sycl::event e;
EIGEN_SYCL_TRY_CATCH(e = m_queue.submit(f));
async_synchronize(e);
}
template<typename T>
EIGEN_STRONG_INLINE void fill(T* begin, T* end, const T& value) const {
static const auto write_mode = cl::sycl::access::mode::discard_write;
if (begin == end) {
return;
}
const ptrdiff_t count = end - begin;
auto f = [&](cl::sycl::handler &cgh) {
auto dst_acc = get_typed_range_accessor<write_mode, T>(cgh, begin, count);
cgh.fill(dst_acc, value);
};
cl::sycl::event e;
EIGEN_SYCL_TRY_CATCH(e = m_queue.submit(f));
@@ -359,15 +384,17 @@ class QueueInterface {
auto original_buffer = pMapper.get_buffer(ptr);
const ptrdiff_t offset = pMapper.get_offset(ptr);
eigen_assert(offset % sizeof(T) == 0 && "The offset must be a multiple of sizeof(T)");
eigen_assert(original_buffer.get_size() % sizeof(T) == 0 && "The buffer size must be a multiple of sizeof(T)");
const ptrdiff_t typed_offset = offset / sizeof(T);
eigen_assert(typed_offset >= 0);
const auto typed_size = original_buffer.get_size() / sizeof(T);
auto buffer = original_buffer.template reinterpret<
typename Eigen::internal::remove_const<T>::type>(
std::remove_const_t<T>>(
cl::sycl::range<1>(typed_size));
const ptrdiff_t size = buffer.get_count() - typed_offset;
eigen_assert(size >= 0);
typedef cl::sycl::accessor<typename Eigen::internal::remove_const<T>::type,
typedef cl::sycl::accessor<std::remove_const_t<T>,
1, AcMd, global_access, is_place_holder>
placeholder_accessor_t;
const auto start_ptr = static_cast<internal_ptr_t>(ptr) - offset;
@@ -395,6 +422,40 @@ class QueueInterface {
cgh, cl::sycl::range<1>(n_bytes), cl::sycl::id<1>(offset));
}
/// Get a range accessor to the virtual pointer's device memory with a
/// specified type and count.
template <cl::sycl::access::mode AcMd, typename T, typename Index>
EIGEN_STRONG_INLINE cl::sycl::accessor<
T, 1, AcMd, cl::sycl::access::target::global_buffer>
get_typed_range_accessor(cl::sycl::handler &cgh, const void *ptr,
const Index count) const {
static const auto global_access = cl::sycl::access::target::global_buffer;
eigen_assert(count >= 0);
std::lock_guard<std::mutex> lock(pmapper_mutex_);
auto buffer = pMapper.get_buffer(ptr);
const ptrdiff_t offset = pMapper.get_offset(ptr);
eigen_assert(offset >= 0);
// Technically we should create a subbuffer for the desired range,
// then reinterpret that. However, I was not able to get changes to reflect
// in the original buffer (only the subbuffer and reinterpretted buffer).
// This current implementation now has the restriction that the buffer
// offset and original buffer size must be a multiple of sizeof(T).
// Note that get_range_accessor(void*) currently has the same restriction.
//
// auto subbuffer = cl::sycl::buffer<buffer_scalar_t, 1>(buffer,
// cl::sycl::id<1>(offset), cl::sycl::range<1>(n_bytes));
eigen_assert(offset % sizeof(T) == 0 && "The offset must be a multiple of sizeof(T)");
eigen_assert(buffer.get_size() % sizeof(T) == 0 && "The buffer size must be a multiple of sizeof(T)");
const ptrdiff_t typed_offset = offset / sizeof(T);
const size_t typed_size = buffer.get_size() / sizeof(T);
auto reint = buffer.template reinterpret<
std::remove_const_t<T>>(
cl::sycl::range<1>(typed_size));
return reint.template get_access<AcMd, global_access>(
cgh, cl::sycl::range<1>(count), cl::sycl::id<1>(typed_offset));
}
/// Creation of sycl accessor for a buffer. This function first tries to find
/// the buffer in the buffer_map. If found it gets the accessor from it, if
/// not, the function then adds an entry by creating a sycl buffer for that
@@ -663,7 +724,7 @@ class QueueInterface {
EIGEN_STRONG_INLINE int majorDeviceVersion() const { return 1; }
EIGEN_STRONG_INLINE unsigned long maxSyclThreadsPerMultiProcessor() const {
// OpenCL doesnot have such concept
// OpenCL does not have such a concept
return 2;
}
@@ -951,6 +1012,11 @@ struct SyclDevice : public SyclDeviceBase {
EIGEN_STRONG_INLINE void memset(void *data, int c, size_t n) const {
queue_stream()->memset(data, c, n);
}
/// the fill function
template<typename T>
EIGEN_STRONG_INLINE void fill(T* begin, T* end, const T& value) const {
queue_stream()->fill(begin, end, value);
}
/// returning the sycl queue
EIGEN_STRONG_INLINE cl::sycl::queue &sycl_queue() const {
return queue_stream()->sycl_queue();
@@ -978,7 +1044,7 @@ struct SyclDevice : public SyclDeviceBase {
return queue_stream()->maxWorkItemSizes();
}
EIGEN_STRONG_INLINE unsigned long maxSyclThreadsPerMultiProcessor() const {
// OpenCL doesnot have such concept
// OpenCL does not have such a concept
return queue_stream()->maxSyclThreadsPerMultiProcessor();
}
EIGEN_STRONG_INLINE size_t sharedMemPerBlock() const {

View File

@@ -10,6 +10,8 @@
#if defined(EIGEN_USE_THREADS) && !defined(EIGEN_CXX11_TENSOR_TENSOR_DEVICE_THREAD_POOL_H)
#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_THREAD_POOL_H
#include "./InternalHeaderCheck.h"
namespace Eigen {
// Runs an arbitrary function and then calls Notify() on the passed in
@@ -122,6 +124,11 @@ struct ThreadPoolDevice {
::memset(buffer, c, n);
}
template<typename T>
EIGEN_STRONG_INLINE void fill(T* begin, T* end, const T& value) const {
std::fill(begin, end, value);
}
EIGEN_STRONG_INLINE int numThreads() const {
return num_threads_;
}
@@ -140,6 +147,10 @@ struct ThreadPoolDevice {
// The l3 cache size is shared between all the cores.
return l3CacheSize() / num_threads_;
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void synchronize() const {
// Nothing. Threadpool device operations are synchronous.
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const {
// Should return an enum that encodes the ISA supported by the CPU

View File

@@ -10,6 +10,8 @@
#ifndef EIGEN_CXX11_TENSOR_TENSOR_DIMENSION_LIST_H
#define EIGEN_CXX11_TENSOR_TENSOR_DIMENSION_LIST_H
#include "./InternalHeaderCheck.h"
namespace Eigen {
/** \internal
@@ -43,8 +45,6 @@ template<DenseIndex n, typename Index, std::size_t Rank> const Index array_get(c
return n;
}
#if EIGEN_HAS_CONSTEXPR
template <typename Index, std::size_t Rank>
struct index_known_statically_impl<DimensionList<Index, Rank> > {
EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex) {
@@ -136,99 +136,6 @@ struct index_statically_lt_impl<const DimensionList<Index, Rank> > {
}
};
#else
template <typename Index, std::size_t Rank>
struct index_known_statically_impl<DimensionList<Index, Rank> > {
EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE bool run(const DenseIndex) {
return true;
}
};
template <typename Index, std::size_t Rank>
struct index_known_statically_impl<const DimensionList<Index, Rank> > {
EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE bool run(const DenseIndex) {
return true;
}
};
template <typename Index, std::size_t Rank>
struct all_indices_known_statically_impl<DimensionList<Index, Rank> > {
EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE bool run() {
return true;
}
};
template <typename Index, std::size_t Rank>
struct all_indices_known_statically_impl<const DimensionList<Index, Rank> > {
EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE bool run() {
return true;
}
};
template <typename Index, std::size_t Rank>
struct indices_statically_known_to_increase_impl<DimensionList<Index, Rank> > {
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run() {
return true;
}
};
template <typename Index, std::size_t Rank>
struct indices_statically_known_to_increase_impl<const DimensionList<Index, Rank> > {
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run() {
return true;
}
};
template <typename Index, std::size_t Rank>
struct index_statically_eq_impl<DimensionList<Index, Rank> > {
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) {
return false;
}
};
template <typename Index, std::size_t Rank>
struct index_statically_eq_impl<const DimensionList<Index, Rank> > {
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) {
return false;
}
};
template <typename Index, std::size_t Rank>
struct index_statically_ne_impl<DimensionList<Index, Rank> > {
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex){
return false;
}
};
template <typename Index, std::size_t Rank>
struct index_statically_ne_impl<const DimensionList<Index, Rank> > {
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) {
return false;
}
};
template <typename Index, std::size_t Rank>
struct index_statically_gt_impl<DimensionList<Index, Rank> > {
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) {
return false;
}
};
template <typename Index, std::size_t Rank>
struct index_statically_gt_impl<const DimensionList<Index, Rank> > {
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) {
return false;
}
};
template <typename Index, std::size_t Rank>
struct index_statically_lt_impl<DimensionList<Index, Rank> > {
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) {
return false;
}
};
template <typename Index, std::size_t Rank>
struct index_statically_lt_impl<const DimensionList<Index, Rank> > {
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) {
return false;
}
};
#endif
} // end namespace internal
} // end namespace Eigen

View File

@@ -11,6 +11,8 @@
#define EIGEN_CXX11_TENSOR_TENSOR_DIMENSIONS_H
#include "./InternalHeaderCheck.h"
namespace Eigen {
/** \internal
@@ -109,12 +111,10 @@ struct Sizes {
explicit EIGEN_DEVICE_FUNC Sizes(const array<DenseIndex, Base::count>& /*indices*/) {
// todo: add assertion
}
#if EIGEN_HAS_VARIADIC_TEMPLATES
template <typename... DenseIndex> EIGEN_DEVICE_FUNC Sizes(DenseIndex...) { }
explicit EIGEN_DEVICE_FUNC Sizes(std::initializer_list<std::ptrdiff_t> /*l*/) {
// todo: add assertion
}
#endif
template <typename T> Sizes& operator = (const T& /*other*/) {
// add assertion failure if the size of other is different
@@ -171,28 +171,16 @@ template <std::ptrdiff_t V1=0, std::ptrdiff_t V2=0, std::ptrdiff_t V3=0, std::pt
explicit Sizes(const array<DenseIndex, Base::count>& /*indices*/) {
// todo: add assertion
}
template <typename T> Sizes& operator = (const T& /*other*/) {
// add assertion failure if the size of other is different
return *this;
}
#if EIGEN_HAS_VARIADIC_TEMPLATES
template <typename... DenseIndex> Sizes(DenseIndex... /*indices*/) { }
explicit Sizes(std::initializer_list<std::ptrdiff_t>) {
// todo: add assertion
}
#else
EIGEN_DEVICE_FUNC explicit Sizes(const DenseIndex) {
}
EIGEN_DEVICE_FUNC Sizes(const DenseIndex, const DenseIndex) {
}
EIGEN_DEVICE_FUNC Sizes(const DenseIndex, const DenseIndex, const DenseIndex) {
}
EIGEN_DEVICE_FUNC Sizes(const DenseIndex, const DenseIndex, const DenseIndex, const DenseIndex) {
}
EIGEN_DEVICE_FUNC Sizes(const DenseIndex, const DenseIndex, const DenseIndex, const DenseIndex, const DenseIndex) {
}
#endif
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index operator[] (const Index index) const {
switch (index) {
@@ -296,20 +284,19 @@ struct DSizes : array<DenseIndex, NumDims> {
EIGEN_DEVICE_FUNC
explicit DSizes(const array<OtherIndex, NumDims>& other,
// Default template parameters require c++11.
typename internal::enable_if<
std::enable_if_t<
internal::is_same<
DenseIndex,
typename internal::promote_index_type<
DenseIndex,
OtherIndex
>::type
>::value, void*>::type = 0) {
>::value, void*> = 0) {
for (int i = 0; i < NumDims; ++i) {
(*this)[i] = static_cast<DenseIndex>(other[i]);
}
}
#ifdef EIGEN_HAS_INDEX_LIST
template <typename FirstType, typename... OtherTypes>
EIGEN_DEVICE_FUNC
explicit DSizes(const Eigen::IndexList<FirstType, OtherTypes...>& dimensions) {
@@ -317,7 +304,6 @@ struct DSizes : array<DenseIndex, NumDims> {
(*this)[i] = dimensions[i];
}
}
#endif
#ifndef EIGEN_EMULATE_CXX11_META_H
template <typename std::ptrdiff_t... Indices>
@@ -335,39 +321,10 @@ struct DSizes : array<DenseIndex, NumDims> {
}
#endif
#if EIGEN_HAS_VARIADIC_TEMPLATES
template<typename... IndexTypes> EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE explicit DSizes(DenseIndex firstDimension, DenseIndex secondDimension, IndexTypes... otherDimensions) : Base({{firstDimension, secondDimension, otherDimensions...}}) {
EIGEN_STATIC_ASSERT(sizeof...(otherDimensions) + 2 == NumDims, YOU_MADE_A_PROGRAMMING_MISTAKE)
}
#else
EIGEN_DEVICE_FUNC DSizes(const DenseIndex i0, const DenseIndex i1) {
eigen_assert(NumDims == 2);
(*this)[0] = i0;
(*this)[1] = i1;
}
EIGEN_DEVICE_FUNC DSizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2) {
eigen_assert(NumDims == 3);
(*this)[0] = i0;
(*this)[1] = i1;
(*this)[2] = i2;
}
EIGEN_DEVICE_FUNC DSizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2, const DenseIndex i3) {
eigen_assert(NumDims == 4);
(*this)[0] = i0;
(*this)[1] = i1;
(*this)[2] = i2;
(*this)[3] = i3;
}
EIGEN_DEVICE_FUNC DSizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2, const DenseIndex i3, const DenseIndex i4) {
eigen_assert(NumDims == 5);
(*this)[0] = i0;
(*this)[1] = i1;
(*this)[2] = i2;
(*this)[3] = i3;
(*this)[4] = i4;
}
#endif
EIGEN_DEVICE_FUNC DSizes& operator = (const array<DenseIndex, NumDims>& other) {
*static_cast<Base*>(this) = other;

View File

@@ -10,6 +10,8 @@
#ifndef EIGEN_CXX11_TENSOR_TENSOR_EVAL_TO_H
#define EIGEN_CXX11_TENSOR_TENSOR_EVAL_TO_H
#include "./InternalHeaderCheck.h"
namespace Eigen {
/** \class TensorForcedEval
@@ -29,9 +31,9 @@ struct traits<TensorEvalToOp<XprType, MakePointer_> >
typedef typename XprTraits::StorageKind StorageKind;
typedef typename XprTraits::Index Index;
typedef typename XprType::Nested Nested;
typedef typename remove_reference<Nested>::type _Nested;
static const int NumDimensions = XprTraits::NumDimensions;
static const int Layout = XprTraits::Layout;
typedef std::remove_reference_t<Nested> Nested_;
static constexpr int NumDimensions = XprTraits::NumDimensions;
static constexpr int Layout = XprTraits::Layout;
typedef typename MakePointer_<Scalar>::Type PointerType;
enum {
@@ -70,19 +72,19 @@ class TensorEvalToOp : public TensorBase<TensorEvalToOp<XprType, MakePointer_>,
public:
typedef typename Eigen::internal::traits<TensorEvalToOp>::Scalar Scalar;
typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
typedef std::remove_const_t<typename XprType::CoeffReturnType> CoeffReturnType;
typedef typename MakePointer_<CoeffReturnType>::Type PointerType;
typedef typename Eigen::internal::nested<TensorEvalToOp>::type Nested;
typedef typename Eigen::internal::traits<TensorEvalToOp>::StorageKind StorageKind;
typedef typename Eigen::internal::traits<TensorEvalToOp>::Index Index;
static const int NumDims = Eigen::internal::traits<TensorEvalToOp>::NumDimensions;
static constexpr int NumDims = Eigen::internal::traits<TensorEvalToOp>::NumDimensions;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvalToOp(PointerType buffer, const XprType& expr)
: m_xpr(expr), m_buffer(buffer) {}
EIGEN_DEVICE_FUNC
const typename internal::remove_all<typename XprType::Nested>::type&
const internal::remove_all_t<typename XprType::Nested>&
expression() const { return m_xpr; }
EIGEN_DEVICE_FUNC PointerType buffer() const { return m_buffer; }
@@ -101,9 +103,9 @@ struct TensorEvaluator<const TensorEvalToOp<ArgType, MakePointer_>, Device>
typedef typename ArgType::Scalar Scalar;
typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
typedef typename XprType::Index Index;
typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
typedef std::remove_const_t<typename XprType::CoeffReturnType> CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
typedef typename Eigen::internal::traits<XprType>::PointerType TensorPointerType;
typedef StorageMemory<CoeffReturnType, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
@@ -112,12 +114,12 @@ struct TensorEvaluator<const TensorEvalToOp<ArgType, MakePointer_>, Device>
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
BlockAccess = true,
PreferBlockAccess = false,
Layout = TensorEvaluator<ArgType, Device>::Layout,
CoordAccess = false, // to be implemented
RawAccess = true
};
static const int NumDims = internal::traits<ArgType>::NumDimensions;
static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
static constexpr int NumDims = internal::traits<ArgType>::NumDimensions;
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;

View File

@@ -10,6 +10,8 @@
#ifndef EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H
#define EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H
#include "./InternalHeaderCheck.h"
namespace Eigen {
/** \class TensorEvaluator
@@ -33,26 +35,26 @@ struct TensorEvaluator
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
typedef typename Derived::Dimensions Dimensions;
typedef Derived XprType;
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
typedef typename internal::traits<Derived>::template MakePointer<Scalar>::Type TensorPointerType;
typedef StorageMemory<Scalar, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
// NumDimensions is -1 for variable dim tensors
static const int NumCoords = internal::traits<Derived>::NumDimensions > 0 ?
internal::traits<Derived>::NumDimensions : 0;
static constexpr int NumCoords = internal::traits<Derived>::NumDimensions > 0 ?
internal::traits<Derived>::NumDimensions : 0;
static constexpr int Layout = Derived::Layout;
enum {
IsAligned = Derived::IsAligned,
PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
BlockAccess = internal::is_arithmetic<typename internal::remove_const<Scalar>::type>::value,
BlockAccess = internal::is_arithmetic<std::remove_const_t<Scalar>>::value,
PreferBlockAccess = false,
Layout = Derived::Layout,
CoordAccess = NumCoords > 0,
RawAccess = true
};
typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
typedef std::remove_const_t<Scalar> ScalarNoConst;
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
typedef internal::TensorBlockDescriptor<NumCoords, Index> TensorBlockDesc;
@@ -73,7 +75,7 @@ struct TensorEvaluator
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dims; }
EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType dest) {
if (!NumTraits<typename internal::remove_const<Scalar>::type>::RequireInitialization && dest) {
if (!NumTraits<std::remove_const_t<Scalar>>::RequireInitialization && dest) {
m_device.memcpy((void*)(m_device.get(dest)), m_device.get(m_data), m_dims.TotalSize() * sizeof(Scalar));
return false;
}
@@ -113,7 +115,7 @@ struct TensorEvaluator
// float element will be loaded, otherwise 0 will be loaded.
// Function has been templatized to enable Sfinae.
template <typename PacketReturnTypeT> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
typename internal::enable_if<internal::unpacket_traits<PacketReturnTypeT>::masked_load_available, PacketReturnTypeT>::type
std::enable_if_t<internal::unpacket_traits<PacketReturnTypeT>::masked_load_available, PacketReturnTypeT>
partialPacket(Index index, typename internal::unpacket_traits<PacketReturnTypeT>::mask_t umask) const
{
return internal::ploadu<PacketReturnTypeT>(m_data + index, umask);
@@ -157,14 +159,14 @@ struct TensorEvaluator
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
bool /*root_of_expr_ast*/ = false) const {
assert(m_data != NULL);
eigen_assert(m_data != NULL);
return TensorBlock::materialize(m_data, m_dims, desc, scratch);
}
template<typename TensorBlock>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock(
const TensorBlockDesc& desc, const TensorBlock& block) {
assert(m_data != NULL);
eigen_assert(m_data != NULL);
typedef typename TensorBlock::XprType TensorBlockExpr;
typedef internal::TensorBlockAssignment<Scalar, NumCoords, TensorBlockExpr,
@@ -192,7 +194,7 @@ struct TensorEvaluator
const Device EIGEN_DEVICE_REF m_device;
};
namespace {
namespace internal {
template <typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
T loadConstant(const T* address) {
return *address;
@@ -219,8 +221,7 @@ T &loadConstant(const Eigen::TensorSycl::internal::RangeAccess<AcMd, T> &address
return *address;
}
#endif
}
} // namespace internal
// Default evaluator for rvalues
template<typename Derived, typename Device>
@@ -236,19 +237,19 @@ struct TensorEvaluator<const Derived, Device>
typedef StorageMemory<const Scalar, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
typedef std::remove_const_t<Scalar> ScalarNoConst;
// NumDimensions is -1 for variable dim tensors
static const int NumCoords = internal::traits<Derived>::NumDimensions > 0 ?
internal::traits<Derived>::NumDimensions : 0;
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
static constexpr int NumCoords = internal::traits<Derived>::NumDimensions > 0 ?
internal::traits<Derived>::NumDimensions : 0;
static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
static constexpr int Layout = Derived::Layout;
enum {
IsAligned = Derived::IsAligned,
PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
BlockAccess = internal::is_arithmetic<ScalarNoConst>::value,
PreferBlockAccess = false,
Layout = Derived::Layout,
CoordAccess = NumCoords > 0,
RawAccess = true
};
@@ -269,7 +270,7 @@ struct TensorEvaluator<const Derived, Device>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dims; }
EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
if (!NumTraits<typename internal::remove_const<Scalar>::type>::RequireInitialization && data) {
if (!NumTraits<std::remove_const_t<Scalar>>::RequireInitialization && data) {
m_device.memcpy((void*)(m_device.get(data)),m_device.get(m_data), m_dims.TotalSize() * sizeof(Scalar));
return false;
}
@@ -289,7 +290,7 @@ struct TensorEvaluator<const Derived, Device>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
eigen_assert(m_data != NULL);
return loadConstant(m_data+index);
return internal::loadConstant(m_data+index);
}
template<int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
@@ -304,7 +305,7 @@ struct TensorEvaluator<const Derived, Device>
// float element will be loaded, otherwise 0 will be loaded.
// Function has been templatized to enable Sfinae.
template <typename PacketReturnTypeT> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
typename internal::enable_if<internal::unpacket_traits<PacketReturnTypeT>::masked_load_available, PacketReturnTypeT>::type
std::enable_if_t<internal::unpacket_traits<PacketReturnTypeT>::masked_load_available, PacketReturnTypeT>
partialPacket(Index index, typename internal::unpacket_traits<PacketReturnTypeT>::mask_t umask) const
{
return internal::ploadu<PacketReturnTypeT>(m_data + index, umask);
@@ -314,7 +315,7 @@ struct TensorEvaluator<const Derived, Device>
eigen_assert(m_data != NULL);
const Index index = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? m_dims.IndexOfColMajor(coords)
: m_dims.IndexOfRowMajor(coords);
return loadConstant(m_data+index);
return internal::loadConstant(m_data+index);
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
@@ -330,7 +331,7 @@ struct TensorEvaluator<const Derived, Device>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
bool /*root_of_expr_ast*/ = false) const {
assert(m_data != NULL);
eigen_assert(m_data != NULL);
return TensorBlock::materialize(m_data, m_dims, desc, scratch);
}
@@ -365,11 +366,12 @@ struct TensorEvaluator<const TensorCwiseNullaryOp<NullaryOp, ArgType>, Device>
typedef typename XprType::Scalar Scalar;
typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
typedef StorageMemory<CoeffReturnType, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
enum {
IsAligned = true,
PacketAccess = internal::functor_traits<NullaryOp>::PacketAccess
@@ -379,7 +381,6 @@ struct TensorEvaluator<const TensorCwiseNullaryOp<NullaryOp, ArgType>, Device>
,
BlockAccess = false,
PreferBlockAccess = false,
Layout = TensorEvaluator<ArgType, Device>::Layout,
CoordAccess = false, // to be implemented
RawAccess = false
};
@@ -443,13 +444,13 @@ struct TensorEvaluator<const TensorCwiseUnaryOp<UnaryOp, ArgType>, Device>
{
typedef TensorCwiseUnaryOp<UnaryOp, ArgType> XprType;
static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
enum {
IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
PacketAccess = int(TensorEvaluator<ArgType, Device>::PacketAccess) &
int(internal::functor_traits<UnaryOp>::PacketAccess),
BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
Layout = TensorEvaluator<ArgType, Device>::Layout,
CoordAccess = false, // to be implemented
RawAccess = false
};
@@ -462,14 +463,14 @@ struct TensorEvaluator<const TensorCwiseUnaryOp<UnaryOp, ArgType>, Device>
typedef typename XprType::Index Index;
typedef typename XprType::Scalar Scalar;
typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
typedef std::remove_const_t<Scalar> ScalarNoConst;
typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
typedef StorageMemory<CoeffReturnType, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
static const int NumDims = internal::array_size<Dimensions>::value;
static constexpr int NumDims = internal::array_size<Dimensions>::value;
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
@@ -555,6 +556,7 @@ struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArg
{
typedef TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArgType> XprType;
static constexpr int Layout = TensorEvaluator<LeftArgType, Device>::Layout;
enum {
IsAligned = int(TensorEvaluator<LeftArgType, Device>::IsAligned) &
int(TensorEvaluator<RightArgType, Device>::IsAligned),
@@ -565,7 +567,6 @@ struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArg
int(TensorEvaluator<RightArgType, Device>::BlockAccess),
PreferBlockAccess = int(TensorEvaluator<LeftArgType, Device>::PreferBlockAccess) |
int(TensorEvaluator<RightArgType, Device>::PreferBlockAccess),
Layout = TensorEvaluator<LeftArgType, Device>::Layout,
CoordAccess = false, // to be implemented
RawAccess = false
};
@@ -584,12 +585,12 @@ struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArg
typedef typename XprType::Scalar Scalar;
typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
typedef typename TensorEvaluator<LeftArgType, Device>::Dimensions Dimensions;
typedef StorageMemory<CoeffReturnType, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
static const int NumDims = internal::array_size<
static constexpr int NumDims = internal::array_size<
typename TensorEvaluator<LeftArgType, Device>::Dimensions>::value;
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
@@ -693,6 +694,7 @@ struct TensorEvaluator<const TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type,
{
typedef TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type, Arg3Type> XprType;
static constexpr int Layout = TensorEvaluator<Arg1Type, Device>::Layout;
enum {
IsAligned = TensorEvaluator<Arg1Type, Device>::IsAligned & TensorEvaluator<Arg2Type, Device>::IsAligned & TensorEvaluator<Arg3Type, Device>::IsAligned,
PacketAccess = TensorEvaluator<Arg1Type, Device>::PacketAccess &&
@@ -703,7 +705,6 @@ struct TensorEvaluator<const TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type,
PreferBlockAccess = TensorEvaluator<Arg1Type, Device>::PreferBlockAccess ||
TensorEvaluator<Arg2Type, Device>::PreferBlockAccess ||
TensorEvaluator<Arg3Type, Device>::PreferBlockAccess,
Layout = TensorEvaluator<Arg1Type, Device>::Layout,
CoordAccess = false, // to be implemented
RawAccess = false
};
@@ -736,7 +737,7 @@ struct TensorEvaluator<const TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type,
typedef typename XprType::Scalar Scalar;
typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
typedef typename TensorEvaluator<Arg1Type, Device>::Dimensions Dimensions;
typedef StorageMemory<CoeffReturnType, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
@@ -811,6 +812,7 @@ struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>
typedef TensorSelectOp<IfArgType, ThenArgType, ElseArgType> XprType;
typedef typename XprType::Scalar Scalar;
static constexpr int Layout = TensorEvaluator<IfArgType, Device>::Layout;
enum {
IsAligned = TensorEvaluator<ThenArgType, Device>::IsAligned &
TensorEvaluator<ElseArgType, Device>::IsAligned,
@@ -823,7 +825,6 @@ struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>
PreferBlockAccess = TensorEvaluator<IfArgType, Device>::PreferBlockAccess ||
TensorEvaluator<ThenArgType, Device>::PreferBlockAccess ||
TensorEvaluator<ElseArgType, Device>::PreferBlockAccess,
Layout = TensorEvaluator<IfArgType, Device>::Layout,
CoordAccess = false, // to be implemented
RawAccess = false
};
@@ -842,12 +843,12 @@ struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>
typedef typename XprType::Index Index;
typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
typedef typename TensorEvaluator<IfArgType, Device>::Dimensions Dimensions;
typedef StorageMemory<CoeffReturnType, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
static const int NumDims = internal::array_size<Dimensions>::value;
static constexpr int NumDims = internal::array_size<Dimensions>::value;
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;

View File

@@ -10,6 +10,8 @@
#ifndef EIGEN_CXX11_TENSOR_TENSOR_EXECUTOR_H
#define EIGEN_CXX11_TENSOR_TENSOR_EXECUTOR_H
#include "./InternalHeaderCheck.h"
namespace Eigen {
/**
@@ -165,12 +167,12 @@ class TensorExecutor<Expression, DefaultDevice, Vectorizable,
/*Tiling=*/TiledEvaluation::On> {
public:
typedef typename traits<Expression>::Scalar Scalar;
typedef typename remove_const<Scalar>::type ScalarNoConst;
typedef std::remove_const_t<Scalar> ScalarNoConst;
typedef TensorEvaluator<Expression, DefaultDevice> Evaluator;
typedef typename traits<Expression>::Index StorageIndex;
static const int NumDims = traits<Expression>::NumDimensions;
static constexpr int NumDims = traits<Expression>::NumDimensions;
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE void run(const Expression& expr,
@@ -282,7 +284,7 @@ struct EvalRange {
template <typename Evaluator, typename StorageIndex>
struct EvalRange<Evaluator, StorageIndex, /*Vectorizable*/ true> {
static const int PacketSize =
static constexpr int PacketSize =
unpacket_traits<typename Evaluator::PacketReturnType>::size;
static void run(Evaluator* evaluator_in, const StorageIndex firstIdx,
@@ -351,9 +353,9 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable,
public:
typedef typename traits<Expression>::Index IndexType;
typedef typename traits<Expression>::Scalar Scalar;
typedef typename remove_const<Scalar>::type ScalarNoConst;
typedef std::remove_const_t<Scalar> ScalarNoConst;
static const int NumDims = traits<Expression>::NumDimensions;
static constexpr int NumDims = traits<Expression>::NumDimensions;
typedef TensorEvaluator<Expression, ThreadPoolDevice> Evaluator;
typedef TensorBlockMapper<NumDims, Evaluator::Layout, IndexType> BlockMapper;
@@ -459,9 +461,9 @@ class TensorAsyncExecutor<Expression, ThreadPoolDevice, DoneCallback,
public:
typedef typename traits<Expression>::Index IndexType;
typedef typename traits<Expression>::Scalar Scalar;
typedef typename remove_const<Scalar>::type ScalarNoConst;
typedef std::remove_const_t<Scalar> ScalarNoConst;
static const int NumDims = traits<Expression>::NumDimensions;
static constexpr int NumDims = traits<Expression>::NumDimensions;
typedef TensorEvaluator<Expression, ThreadPoolDevice> Evaluator;
typedef TensorBlockMapper<NumDims, Evaluator::Layout, IndexType> BlockMapper;
@@ -551,11 +553,59 @@ class TensorExecutor<Expression, GpuDevice, Vectorizable, Tiling> {
};
#if defined(EIGEN_GPUCC)
// Returns 1 if lhs + rhs would overflow, -1 if it would underflow, otherwise 0.
template <typename Index>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE int sum_will_overflow(Index lhs,
Index rhs) {
const Index highest = NumTraits<Index>::highest();
const Index lowest = NumTraits<Index>::lowest();
if (lhs > 0 && rhs > 0) {
return lhs > highest - rhs ? 1 : 0;
} else if (lhs < 0 && rhs < 0) {
return lhs < lowest - rhs ? -1 : 0;
} else {
return 0;
}
}
// Returns lhs + rhs, saturating to the highest/lowest representable value on
// overflow/underflow respectively.
template <typename Index>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index saturate_add(Index lhs, Index rhs) {
const Index highest = NumTraits<Index>::highest();
const Index lowest = NumTraits<Index>::lowest();
int overflow = sum_will_overflow(lhs, rhs);
return overflow == 1 ? highest : overflow == -1 ? lowest : lhs + rhs;
}
// A functor that adds step_size to a given index, saturating to avoid
// overflow/underflow. If overflow/underflow is not possible, regular addition
// is used (for efficiency).
template <typename Index>
struct SafeStep {
// lastIdx is one past the end of the possible indexes.
// step_size is the value that will be added to the given index when the
// functor is called.
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE SafeStep(Index lastIdx, Index step_size)
: can_overflow_(sum_will_overflow(lastIdx, step_size)),
step_size_(step_size) {}
// Adds step_size to index, saturating on overflow (if overflow is possible).
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index operator()(Index index) const {
return can_overflow_ ? saturate_add(index, step_size_) : index + step_size_;
}
private:
const bool can_overflow_;
const Index step_size_;
};
template <typename Evaluator, typename StorageIndex, bool Vectorizable>
struct EigenMetaKernelEval {
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
void run(Evaluator& eval, StorageIndex firstIdx, StorageIndex lastIdx, StorageIndex step_size) {
for (StorageIndex i = firstIdx; i < lastIdx; i += step_size) {
SafeStep<StorageIndex> safe_step(lastIdx, step_size);
for (StorageIndex i = firstIdx; i < lastIdx; i = safe_step(i)) {
eval.evalScalar(i);
}
}
@@ -569,12 +619,16 @@ struct EigenMetaKernelEval<Evaluator, StorageIndex, true> {
const StorageIndex vectorized_size = (lastIdx / PacketSize) * PacketSize;
const StorageIndex vectorized_step_size = step_size * PacketSize;
SafeStep<StorageIndex> safe_vectorized_step(vectorized_size,
vectorized_step_size);
// Use the vector path
for (StorageIndex i = firstIdx * PacketSize; i < vectorized_size;
i += vectorized_step_size) {
i = safe_vectorized_step(i)) {
eval.evalPacket(i);
}
for (StorageIndex i = vectorized_size + firstIdx; i < lastIdx; i += step_size) {
SafeStep<StorageIndex> safe_step(lastIdx, step_size);
for (StorageIndex i = saturate_add(vectorized_size, firstIdx); i < lastIdx;
i = safe_step(i)) {
eval.evalScalar(i);
}
}
@@ -601,8 +655,11 @@ EIGEN_STRONG_INLINE void TensorExecutor<Expression, GpuDevice, Vectorizable, Til
if (needs_assign) {
const int block_size = device.maxGpuThreadsPerBlock();
const int max_blocks = device.getNumGpuMultiProcessors() *
device.maxGpuThreadsPerMultiProcessor() / block_size;
const int max_blocks =
numext::mini<int64_t>(device.getNumGpuMultiProcessors() *
device.maxGpuThreadsPerMultiProcessor(),
NumTraits<StorageIndex>::highest()) /
block_size;
const StorageIndex size = array_prod(evaluator.dimensions());
// Create a least one block to ensure we won't crash when tensorflow calls with tensors of size 0.
const int num_blocks = numext::maxi<int>(numext::mini<int>(max_blocks, divup<int>(size, block_size)), 1);
@@ -635,7 +692,7 @@ struct ExecExprFunctorKernel {
compute(itemID);
}
template <bool is_vec = Evaluator::PacketAccess>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE typename std::enable_if<!is_vec>::type
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE std::enable_if_t<!is_vec>
compute(const cl::sycl::nd_item<1>& itemID) {
Index gId = static_cast<Index>(itemID.get_global_linear_id());
Index total_threads = itemID.get_global_range(0);
@@ -645,7 +702,7 @@ struct ExecExprFunctorKernel {
}
}
template <bool is_vec = Evaluator::PacketAccess>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE typename std::enable_if<is_vec>::type
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE std::enable_if_t<is_vec>
compute(const cl::sycl::nd_item<1>& itemID) {
const Index vectorizedRange =
(range / Evaluator::PacketSize) * Evaluator::PacketSize;

View File

@@ -10,6 +10,8 @@
#ifndef EIGEN_CXX11_TENSOR_TENSOR_EXPR_H
#define EIGEN_CXX11_TENSOR_TENSOR_EXPR_H
#include "./InternalHeaderCheck.h"
namespace Eigen {
/** \class TensorExpr
@@ -35,9 +37,9 @@ struct traits<TensorCwiseNullaryOp<NullaryOp, XprType> >
typedef traits<XprType> XprTraits;
typedef typename XprType::Scalar Scalar;
typedef typename XprType::Nested XprTypeNested;
typedef typename remove_reference<XprTypeNested>::type _XprTypeNested;
static const int NumDimensions = XprTraits::NumDimensions;
static const int Layout = XprTraits::Layout;
typedef std::remove_reference_t<XprTypeNested> XprTypeNested_;
static constexpr int NumDimensions = XprTraits::NumDimensions;
static constexpr int Layout = XprTraits::Layout;
typedef typename XprTraits::PointerType PointerType;
enum {
Flags = 0
@@ -63,7 +65,7 @@ class TensorCwiseNullaryOp : public TensorBase<TensorCwiseNullaryOp<NullaryOp, X
: m_xpr(xpr), m_functor(func) {}
EIGEN_DEVICE_FUNC
const typename internal::remove_all<typename XprType::Nested>::type&
const internal::remove_all_t<typename XprType::Nested>&
nestedExpression() const { return m_xpr; }
EIGEN_DEVICE_FUNC
@@ -86,9 +88,9 @@ struct traits<TensorCwiseUnaryOp<UnaryOp, XprType> >
typedef typename result_of<UnaryOp(typename XprType::Scalar)>::type Scalar;
typedef traits<XprType> XprTraits;
typedef typename XprType::Nested XprTypeNested;
typedef typename remove_reference<XprTypeNested>::type _XprTypeNested;
static const int NumDimensions = XprTraits::NumDimensions;
static const int Layout = XprTraits::Layout;
typedef std::remove_reference_t<XprTypeNested> XprTypeNested_;
static constexpr int NumDimensions = XprTraits::NumDimensions;
static constexpr int Layout = XprTraits::Layout;
typedef typename TypeConversion<Scalar,
typename XprTraits::PointerType
>::type
@@ -132,7 +134,7 @@ class TensorCwiseUnaryOp : public TensorBase<TensorCwiseUnaryOp<UnaryOp, XprType
/** \returns the nested expression */
EIGEN_DEVICE_FUNC
const typename internal::remove_all<typename XprType::Nested>::type&
const internal::remove_all_t<typename XprType::Nested>&
nestedExpression() const { return m_xpr; }
protected:
@@ -161,14 +163,14 @@ struct traits<TensorCwiseBinaryOp<BinaryOp, LhsXprType, RhsXprType> >
typename traits<RhsXprType>::Index>::type Index;
typedef typename LhsXprType::Nested LhsNested;
typedef typename RhsXprType::Nested RhsNested;
typedef typename remove_reference<LhsNested>::type _LhsNested;
typedef typename remove_reference<RhsNested>::type _RhsNested;
static const int NumDimensions = XprTraits::NumDimensions;
static const int Layout = XprTraits::Layout;
typedef std::remove_reference_t<LhsNested> LhsNested_;
typedef std::remove_reference_t<RhsNested> RhsNested_;
static constexpr int NumDimensions = XprTraits::NumDimensions;
static constexpr int Layout = XprTraits::Layout;
typedef typename TypeConversion<Scalar,
typename conditional<Pointer_type_promotion<typename LhsXprType::Scalar, Scalar>::val,
std::conditional_t<Pointer_type_promotion<typename LhsXprType::Scalar, Scalar>::val,
typename traits<LhsXprType>::PointerType,
typename traits<RhsXprType>::PointerType>::type
typename traits<RhsXprType>::PointerType>
>::type
PointerType;
enum {
@@ -213,11 +215,11 @@ class TensorCwiseBinaryOp : public TensorBase<TensorCwiseBinaryOp<BinaryOp, LhsX
/** \returns the nested expressions */
EIGEN_DEVICE_FUNC
const typename internal::remove_all<typename LhsXprType::Nested>::type&
const internal::remove_all_t<typename LhsXprType::Nested>&
lhsExpression() const { return m_lhs_xpr; }
EIGEN_DEVICE_FUNC
const typename internal::remove_all<typename RhsXprType::Nested>::type&
const internal::remove_all_t<typename RhsXprType::Nested>&
rhsExpression() const { return m_rhs_xpr; }
protected:
@@ -242,15 +244,15 @@ struct traits<TensorCwiseTernaryOp<TernaryOp, Arg1XprType, Arg2XprType, Arg3XprT
typedef typename Arg1XprType::Nested Arg1Nested;
typedef typename Arg2XprType::Nested Arg2Nested;
typedef typename Arg3XprType::Nested Arg3Nested;
typedef typename remove_reference<Arg1Nested>::type _Arg1Nested;
typedef typename remove_reference<Arg2Nested>::type _Arg2Nested;
typedef typename remove_reference<Arg3Nested>::type _Arg3Nested;
static const int NumDimensions = XprTraits::NumDimensions;
static const int Layout = XprTraits::Layout;
typedef std::remove_reference_t<Arg1Nested> Arg1Nested_;
typedef std::remove_reference_t<Arg2Nested> Arg2Nested_;
typedef std::remove_reference_t<Arg3Nested> Arg3Nested_;
static constexpr int NumDimensions = XprTraits::NumDimensions;
static constexpr int Layout = XprTraits::Layout;
typedef typename TypeConversion<Scalar,
typename conditional<Pointer_type_promotion<typename Arg2XprType::Scalar, Scalar>::val,
std::conditional_t<Pointer_type_promotion<typename Arg2XprType::Scalar, Scalar>::val,
typename traits<Arg2XprType>::PointerType,
typename traits<Arg3XprType>::PointerType>::type
typename traits<Arg3XprType>::PointerType>
>::type
PointerType;
enum {
@@ -293,15 +295,15 @@ class TensorCwiseTernaryOp : public TensorBase<TensorCwiseTernaryOp<TernaryOp, A
/** \returns the nested expressions */
EIGEN_DEVICE_FUNC
const typename internal::remove_all<typename Arg1XprType::Nested>::type&
const internal::remove_all_t<typename Arg1XprType::Nested>&
arg1Expression() const { return m_arg1_xpr; }
EIGEN_DEVICE_FUNC
const typename internal::remove_all<typename Arg2XprType::Nested>::type&
const internal::remove_all_t<typename Arg2XprType::Nested>&
arg2Expression() const { return m_arg2_xpr; }
EIGEN_DEVICE_FUNC
const typename internal::remove_all<typename Arg3XprType::Nested>::type&
const internal::remove_all_t<typename Arg3XprType::Nested>&
arg3Expression() const { return m_arg3_xpr; }
protected:
@@ -326,11 +328,11 @@ struct traits<TensorSelectOp<IfXprType, ThenXprType, ElseXprType> >
typedef typename IfXprType::Nested IfNested;
typedef typename ThenXprType::Nested ThenNested;
typedef typename ElseXprType::Nested ElseNested;
static const int NumDimensions = XprTraits::NumDimensions;
static const int Layout = XprTraits::Layout;
typedef typename conditional<Pointer_type_promotion<typename ThenXprType::Scalar, Scalar>::val,
static constexpr int NumDimensions = XprTraits::NumDimensions;
static constexpr int Layout = XprTraits::Layout;
typedef std::conditional_t<Pointer_type_promotion<typename ThenXprType::Scalar, Scalar>::val,
typename traits<ThenXprType>::PointerType,
typename traits<ElseXprType>::PointerType>::type PointerType;
typename traits<ElseXprType>::PointerType> PointerType;
};
template<typename IfXprType, typename ThenXprType, typename ElseXprType>

View File

@@ -10,6 +10,8 @@
#ifndef EIGEN_CXX11_TENSOR_TENSOR_FFT_H
#define EIGEN_CXX11_TENSOR_TENSOR_FFT_H
#include "./InternalHeaderCheck.h"
namespace Eigen {
/** \class TensorFFT
@@ -60,13 +62,13 @@ struct traits<TensorFFTOp<FFT, XprType, FFTResultType, FFTDir> > : public traits
typedef typename NumTraits<typename XprTraits::Scalar>::Real RealScalar;
typedef typename std::complex<RealScalar> ComplexScalar;
typedef typename XprTraits::Scalar InputScalar;
typedef typename conditional<FFTResultType == RealPart || FFTResultType == ImagPart, RealScalar, ComplexScalar>::type OutputScalar;
typedef std::conditional_t<FFTResultType == RealPart || FFTResultType == ImagPart, RealScalar, ComplexScalar> OutputScalar;
typedef typename XprTraits::StorageKind StorageKind;
typedef typename XprTraits::Index Index;
typedef typename XprType::Nested Nested;
typedef typename remove_reference<Nested>::type _Nested;
static const int NumDimensions = XprTraits::NumDimensions;
static const int Layout = XprTraits::Layout;
typedef std::remove_reference_t<Nested> Nested_;
static constexpr int NumDimensions = XprTraits::NumDimensions;
static constexpr int Layout = XprTraits::Layout;
typedef typename traits<XprType>::PointerType PointerType;
};
@@ -88,7 +90,7 @@ class TensorFFTOp : public TensorBase<TensorFFTOp<FFT, XprType, FFTResultType, F
typedef typename Eigen::internal::traits<TensorFFTOp>::Scalar Scalar;
typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
typedef typename std::complex<RealScalar> ComplexScalar;
typedef typename internal::conditional<FFTResultType == RealPart || FFTResultType == ImagPart, RealScalar, ComplexScalar>::type OutputScalar;
typedef std::conditional_t<FFTResultType == RealPart || FFTResultType == ImagPart, RealScalar, ComplexScalar> OutputScalar;
typedef OutputScalar CoeffReturnType;
typedef typename Eigen::internal::nested<TensorFFTOp>::type Nested;
typedef typename Eigen::internal::traits<TensorFFTOp>::StorageKind StorageKind;
@@ -101,7 +103,7 @@ class TensorFFTOp : public TensorBase<TensorFFTOp<FFT, XprType, FFTResultType, F
const FFT& fft() const { return m_fft; }
EIGEN_DEVICE_FUNC
const typename internal::remove_all<typename XprType::Nested>::type& expression() const {
const internal::remove_all_t<typename XprType::Nested>& expression() const {
return m_xpr;
}
@@ -115,7 +117,7 @@ template <typename FFT, typename ArgType, typename Device, int FFTResultType, in
struct TensorEvaluator<const TensorFFTOp<FFT, ArgType, FFTResultType, FFTDir>, Device> {
typedef TensorFFTOp<FFT, ArgType, FFTResultType, FFTDir> XprType;
typedef typename XprType::Index Index;
static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
static constexpr int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
typedef DSizes<Index, NumDims> Dimensions;
typedef typename XprType::Scalar Scalar;
typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
@@ -123,19 +125,19 @@ struct TensorEvaluator<const TensorFFTOp<FFT, ArgType, FFTResultType, FFTDir>, D
typedef typename TensorEvaluator<ArgType, Device>::Dimensions InputDimensions;
typedef internal::traits<XprType> XprTraits;
typedef typename XprTraits::Scalar InputScalar;
typedef typename internal::conditional<FFTResultType == RealPart || FFTResultType == ImagPart, RealScalar, ComplexScalar>::type OutputScalar;
typedef std::conditional_t<FFTResultType == RealPart || FFTResultType == ImagPart, RealScalar, ComplexScalar> OutputScalar;
typedef OutputScalar CoeffReturnType;
typedef typename PacketType<OutputScalar, Device>::type PacketReturnType;
static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
typedef StorageMemory<CoeffReturnType, Device> Storage;
static constexpr int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
typedef StorageMemory<CoeffReturnType, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
enum {
IsAligned = false,
PacketAccess = true,
BlockAccess = false,
PreferBlockAccess = false,
Layout = TensorEvaluator<ArgType, Device>::Layout,
CoordAccess = false,
RawAccess = false
};

View File

@@ -10,6 +10,8 @@
#ifndef EIGEN_CXX11_TENSOR_TENSOR_FIXED_SIZE_H
#define EIGEN_CXX11_TENSOR_TENSOR_FIXED_SIZE_H
#include "./InternalHeaderCheck.h"
namespace Eigen {
/** \class TensorFixedSize
@@ -36,14 +38,14 @@ class TensorFixedSize : public TensorBase<TensorFixedSize<Scalar_, Dimensions_,
typedef typename NumTraits<Scalar>::Real RealScalar;
typedef typename Base::CoeffReturnType CoeffReturnType;
static const int Options = Options_;
static constexpr int Options = Options_;
static constexpr int Layout = Options_ & RowMajor ? RowMajor : ColMajor;
enum {
IsAligned = bool(EIGEN_MAX_ALIGN_BYTES>0),
PacketAccess = (internal::packet_traits<Scalar>::size > 1),
BlockAccess = false,
PreferBlockAccess = false,
Layout = Options_ & RowMajor ? RowMajor : ColMajor,
CoordAccess = true,
RawAccess = true
};
@@ -53,7 +55,7 @@ class TensorFixedSize : public TensorBase<TensorFixedSize<Scalar_, Dimensions_,
//===--------------------------------------------------------------------===//
typedef Dimensions_ Dimensions;
static const std::size_t NumIndices = Dimensions::count;
static constexpr std::size_t NumIndices = Dimensions::count;
protected:
TensorStorage<Scalar, Dimensions, Options> m_storage;
@@ -61,7 +63,7 @@ class TensorFixedSize : public TensorBase<TensorFixedSize<Scalar_, Dimensions_,
public:
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rank() const { return NumIndices; }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index dimension(std::size_t n) const { return m_storage.dimensions()[n]; }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_storage.dimensions(); }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions dimensions() const { return m_storage.dimensions(); }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index size() const { return m_storage.size(); }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar *data() { return m_storage.data(); }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar *data() const { return m_storage.data(); }
@@ -72,7 +74,6 @@ class TensorFixedSize : public TensorBase<TensorFixedSize<Scalar_, Dimensions_,
inline Self& base() { return *this; }
inline const Self& base() const { return *this; }
#if EIGEN_HAS_VARIADIC_TEMPLATES
template<typename... IndexTypes>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(Index firstIndex, IndexTypes... otherIndices) const
{
@@ -80,7 +81,6 @@ class TensorFixedSize : public TensorBase<TensorFixedSize<Scalar_, Dimensions_,
EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
return coeff(array<Index, NumIndices>{{firstIndex, otherIndices...}});
}
#endif
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const Scalar& coeff(const array<Index, NumIndices>& indices) const
@@ -104,7 +104,6 @@ class TensorFixedSize : public TensorBase<TensorFixedSize<Scalar_, Dimensions_,
}
#if EIGEN_HAS_VARIADIC_TEMPLATES
template<typename... IndexTypes>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index firstIndex, IndexTypes... otherIndices)
{
@@ -112,7 +111,6 @@ class TensorFixedSize : public TensorBase<TensorFixedSize<Scalar_, Dimensions_,
EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
return coeffRef(array<Index, NumIndices>{{firstIndex, otherIndices...}});
}
#endif
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Scalar& coeffRef(const array<Index, NumIndices>& indices)
@@ -135,7 +133,6 @@ class TensorFixedSize : public TensorBase<TensorFixedSize<Scalar_, Dimensions_,
return m_storage.data()[0];
}
#if EIGEN_HAS_VARIADIC_TEMPLATES
template<typename... IndexTypes>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(Index firstIndex, IndexTypes... otherIndices) const
{
@@ -143,53 +140,6 @@ class TensorFixedSize : public TensorBase<TensorFixedSize<Scalar_, Dimensions_,
EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
return this->operator()(array<Index, NumIndices>{{firstIndex, otherIndices...}});
}
#else
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1) const
{
if (Options&RowMajor) {
const Index index = i1 + i0 * m_storage.dimensions()[1];
return m_storage.data()[index];
} else {
const Index index = i0 + i1 * m_storage.dimensions()[0];
return m_storage.data()[index];
}
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2) const
{
if (Options&RowMajor) {
const Index index = i2 + m_storage.dimensions()[2] * (i1 + m_storage.dimensions()[1] * i0);
return m_storage.data()[index];
} else {
const Index index = i0 + m_storage.dimensions()[0] * (i1 + m_storage.dimensions()[1] * i2);
return m_storage.data()[index];
}
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2, Index i3) const
{
if (Options&RowMajor) {
const Index index = i3 + m_storage.dimensions()[3] * (i2 + m_storage.dimensions()[2] * (i1 + m_storage.dimensions()[1] * i0));
return m_storage.data()[index];
} else {
const Index index = i0 + m_storage.dimensions()[0] * (i1 + m_storage.dimensions()[1] * (i2 + m_storage.dimensions()[2] * i3));
return m_storage.data()[index];
}
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2, Index i3, Index i4) const
{
if (Options&RowMajor) {
const Index index = i4 + m_storage.dimensions()[4] * (i3 + m_storage.dimensions()[3] * (i2 + m_storage.dimensions()[2] * (i1 + m_storage.dimensions()[1] * i0)));
return m_storage.data()[index];
} else {
const Index index = i0 + m_storage.dimensions()[0] * (i1 + m_storage.dimensions()[1] * (i2 + m_storage.dimensions()[2] * (i3 + m_storage.dimensions()[3] * i4)));
return m_storage.data()[index];
}
}
#endif
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const Scalar& operator()(const array<Index, NumIndices>& indices) const
@@ -220,7 +170,6 @@ class TensorFixedSize : public TensorBase<TensorFixedSize<Scalar_, Dimensions_,
return coeff(index);
}
#if EIGEN_HAS_VARIADIC_TEMPLATES
template<typename... IndexTypes>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()(Index firstIndex, IndexTypes... otherIndices)
{
@@ -228,52 +177,6 @@ class TensorFixedSize : public TensorBase<TensorFixedSize<Scalar_, Dimensions_,
EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
return operator()(array<Index, NumIndices>{{firstIndex, otherIndices...}});
}
#else
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1)
{
if (Options&RowMajor) {
const Index index = i1 + i0 * m_storage.dimensions()[1];
return m_storage.data()[index];
} else {
const Index index = i0 + i1 * m_storage.dimensions()[0];
return m_storage.data()[index];
}
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2)
{
if (Options&RowMajor) {
const Index index = i2 + m_storage.dimensions()[2] * (i1 + m_storage.dimensions()[1] * i0);
return m_storage.data()[index];
} else {
const Index index = i0 + m_storage.dimensions()[0] * (i1 + m_storage.dimensions()[1] * i2);
return m_storage.data()[index];
}
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3)
{
if (Options&RowMajor) {
const Index index = i3 + m_storage.dimensions()[3] * (i2 + m_storage.dimensions()[2] * (i1 + m_storage.dimensions()[1] * i0));
return m_storage.data()[index];
} else {
const Index index = i0 + m_storage.dimensions()[0] * (i1 + m_storage.dimensions()[1] * (i2 + m_storage.dimensions()[2] * i3));
return m_storage.data()[index];
}
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3, Index i4)
{
if (Options&RowMajor) {
const Index index = i4 + m_storage.dimensions()[4] * (i3 + m_storage.dimensions()[3] * (i2 + m_storage.dimensions()[2] * (i1 + m_storage.dimensions()[1] * i0)));
return m_storage.data()[index];
} else {
const Index index = i0 + m_storage.dimensions()[0] * (i1 + m_storage.dimensions()[1] * (i2 + m_storage.dimensions()[2] * (i3 + m_storage.dimensions()[3] * i4)));
return m_storage.data()[index];
}
}
#endif
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Scalar& operator()(const array<Index, NumIndices>& indices)
@@ -312,16 +215,14 @@ class TensorFixedSize : public TensorBase<TensorFixedSize<Scalar_, Dimensions_,
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE TensorFixedSize(const Self& other)
: m_storage(other.m_storage)
: Base(other), m_storage(other.m_storage)
{
}
#if EIGEN_HAS_RVALUE_REFERENCES
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorFixedSize(Self&& other)
: m_storage(other.m_storage)
{
}
#endif
template<typename OtherDerived>
EIGEN_DEVICE_FUNC

View File

@@ -10,6 +10,8 @@
#ifndef EIGEN_CXX11_TENSOR_TENSOR_FORCED_EVAL_H
#define EIGEN_CXX11_TENSOR_TENSOR_FORCED_EVAL_H
#include "./InternalHeaderCheck.h"
namespace Eigen {
/** \class TensorForcedEval
@@ -29,9 +31,9 @@ struct traits<TensorForcedEvalOp<XprType> >
typedef typename traits<XprType>::StorageKind StorageKind;
typedef typename traits<XprType>::Index Index;
typedef typename XprType::Nested Nested;
typedef typename remove_reference<Nested>::type _Nested;
static const int NumDimensions = XprTraits::NumDimensions;
static const int Layout = XprTraits::Layout;
typedef std::remove_reference_t<Nested> Nested_;
static constexpr int NumDimensions = XprTraits::NumDimensions;
static constexpr int Layout = XprTraits::Layout;
typedef typename XprTraits::PointerType PointerType;
enum {
@@ -61,7 +63,7 @@ class TensorForcedEvalOp : public TensorBase<TensorForcedEvalOp<XprType>, ReadOn
public:
typedef typename Eigen::internal::traits<TensorForcedEvalOp>::Scalar Scalar;
typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
typedef std::remove_const_t<typename XprType::CoeffReturnType> CoeffReturnType;
typedef typename Eigen::internal::nested<TensorForcedEvalOp>::type Nested;
typedef typename Eigen::internal::traits<TensorForcedEvalOp>::StorageKind StorageKind;
typedef typename Eigen::internal::traits<TensorForcedEvalOp>::Index Index;
@@ -70,7 +72,7 @@ class TensorForcedEvalOp : public TensorBase<TensorForcedEvalOp<XprType>, ReadOn
: m_xpr(expr) {}
EIGEN_DEVICE_FUNC
const typename internal::remove_all<typename XprType::Nested>::type&
const internal::remove_all_t<typename XprType::Nested>&
expression() const { return m_xpr; }
protected:
@@ -103,14 +105,14 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(Index, StorageType) {
template<typename ArgType_, typename Device>
struct TensorEvaluator<const TensorForcedEvalOp<ArgType_>, Device>
{
typedef const typename internal::remove_all<ArgType_>::type ArgType;
typedef const internal::remove_all_t<ArgType_> ArgType;
typedef TensorForcedEvalOp<ArgType> XprType;
typedef typename ArgType::Scalar Scalar;
typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
typedef typename XprType::Index Index;
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
typedef typename Eigen::internal::traits<XprType>::PointerType TensorPointerType;
typedef StorageMemory<CoeffReturnType, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
@@ -120,11 +122,11 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType_>, Device>
PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
BlockAccess = internal::is_arithmetic<CoeffReturnType>::value,
PreferBlockAccess = false,
Layout = TensorEvaluator<ArgType, Device>::Layout,
RawAccess = true
};
static const int NumDims = internal::traits<ArgType>::NumDimensions;
static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
static constexpr int NumDims = internal::traits<ArgType>::NumDimensions;
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
@@ -148,11 +150,11 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType_>, Device>
internal::non_integral_type_placement_new<Device, CoeffReturnType>()(numValues, m_buffer);
typedef TensorEvalToOp< const typename internal::remove_const<ArgType>::type > EvalTo;
typedef TensorEvalToOp< const std::remove_const_t<ArgType> > EvalTo;
EvalTo evalToTmp(m_device.get(m_buffer), m_op);
internal::TensorExecutor<
const EvalTo, typename internal::remove_const<Device>::type,
const EvalTo, std::remove_const_t<Device>,
/*Vectorizable=*/internal::IsVectorizable<Device, const ArgType>::value,
/*Tiling=*/internal::IsTileable<Device, const ArgType>::value>::
run(evalToTmp, m_device);
@@ -167,14 +169,14 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType_>, Device>
const Index numValues = internal::array_prod(m_impl.dimensions());
m_buffer = m_device.get((CoeffReturnType*)m_device.allocate_temp(
numValues * sizeof(CoeffReturnType)));
typedef TensorEvalToOp<const typename internal::remove_const<ArgType>::type>
typedef TensorEvalToOp<const std::remove_const_t<ArgType>>
EvalTo;
EvalTo evalToTmp(m_device.get(m_buffer), m_op);
auto on_done = std::bind([](EvalSubExprsCallback done_) { done_(true); },
std::move(done));
internal::TensorAsyncExecutor<
const EvalTo, typename internal::remove_const<Device>::type,
const EvalTo, std::remove_const_t<Device>,
decltype(on_done),
/*Vectorizable=*/internal::IsVectorizable<Device, const ArgType>::value,
/*Tiling=*/internal::IsTileable<Device, const ArgType>::value>::
@@ -206,7 +208,7 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType_>, Device>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
bool /*root_of_expr_ast*/ = false) const {
assert(m_buffer != NULL);
eigen_assert(m_buffer != NULL);
return TensorBlock::materialize(m_buffer, m_impl.dimensions(), desc, scratch);
}

View File

@@ -10,6 +10,8 @@
#ifndef EIGEN_CXX11_TENSOR_TENSOR_FORWARD_DECLARATIONS_H
#define EIGEN_CXX11_TENSOR_TENSOR_FORWARD_DECLARATIONS_H
#include "./InternalHeaderCheck.h"
namespace Eigen {
// MakePointer class is used as a container of the address space of the pointer
@@ -29,7 +31,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T* constCast(const T* data) {
}
// The StorageMemory class is a container of the device specific pointer
// used for refering to a Pointer on TensorEvaluator class. While the TensorExpression
// used for referring to a Pointer on TensorEvaluator class. While the TensorExpression
// is a device-agnostic type and need MakePointer class for type conversion,
// the TensorEvaluator class can be specialized for a device, hence it is possible
// to construct different types of temproray storage memory in TensorEvaluator
@@ -61,8 +63,8 @@ template<typename BinaryOp, typename LeftXprType, typename RightXprType> class T
template<typename TernaryOp, typename Arg1XprType, typename Arg2XprType, typename Arg3XprType> class TensorCwiseTernaryOp;
template<typename IfXprType, typename ThenXprType, typename ElseXprType> class TensorSelectOp;
template<typename Op, typename Dims, typename XprType, template <class> class MakePointer_ = MakePointer > class TensorReductionOp;
template<typename XprType> class TensorIndexTupleOp;
template<typename ReduceOp, typename Dims, typename XprType> class TensorTupleReducerOp;
template<typename XprType> class TensorIndexPairOp;
template<typename ReduceOp, typename Dims, typename XprType> class TensorPairReducerOp;
template<typename Axis, typename LeftXprType, typename RightXprType> class TensorConcatenationOp;
template<typename Dimensions, typename LeftXprType, typename RightXprType, typename OutputKernelType> class TensorContractionOp;
template<typename TargetType, typename XprType> class TensorConversionOp;
@@ -165,7 +167,7 @@ struct IsTileable {
// Check that block evaluation is supported and it's a preferred option (at
// least one sub-expression has much faster block evaluation, e.g.
// broadcasting).
static const bool BlockAccess =
static constexpr bool BlockAccess =
TensorEvaluator<Expression, Device>::BlockAccess &&
TensorEvaluator<Expression, Device>::PreferBlockAccess;

View File

@@ -10,6 +10,8 @@
#ifndef EIGEN_CXX11_TENSOR_TENSOR_FUNCTORS_H
#define EIGEN_CXX11_TENSOR_TENSOR_FUNCTORS_H
#include "./InternalHeaderCheck.h"
namespace Eigen {
namespace internal {
@@ -33,7 +35,6 @@ struct functor_traits<scalar_mod_op<Scalar> >
*/
template <typename Scalar>
struct scalar_mod2_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_mod2_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a, const Scalar& b) const { return a % b; }
};
template <typename Scalar>
@@ -42,7 +43,6 @@ struct functor_traits<scalar_mod2_op<Scalar> >
template <typename Scalar>
struct scalar_fmod_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_fmod_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar
operator()(const Scalar& a, const Scalar& b) const {
return numext::fmod(a, b);
@@ -367,7 +367,7 @@ struct reducer_traits<OrReducer, Device> {
// Argmin/Argmax reducers. Returns the first occurrence if multiple locations
// contain the same min/max value.
template <typename T> struct ArgMaxTupleReducer
template <typename T> struct ArgMaxPairReducer
{
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const {
if (t.second < accum->second) {
@@ -385,7 +385,7 @@ template <typename T> struct ArgMaxTupleReducer
};
template <typename T, typename Device>
struct reducer_traits<ArgMaxTupleReducer<T>, Device> {
struct reducer_traits<ArgMaxPairReducer<T>, Device> {
enum {
Cost = NumTraits<T>::AddCost,
PacketAccess = false,
@@ -395,7 +395,7 @@ struct reducer_traits<ArgMaxTupleReducer<T>, Device> {
};
template <typename T> struct ArgMinTupleReducer
template <typename T> struct ArgMinPairReducer
{
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T& t, T* accum) const {
if (t.second > accum->second) {
@@ -413,7 +413,7 @@ template <typename T> struct ArgMinTupleReducer
};
template <typename T, typename Device>
struct reducer_traits<ArgMinTupleReducer<T>, Device> {
struct reducer_traits<ArgMinPairReducer<T>, Device> {
enum {
Cost = NumTraits<T>::AddCost,
PacketAccess = false,
@@ -426,7 +426,7 @@ struct reducer_traits<ArgMinTupleReducer<T>, Device> {
template <typename T, typename Index, size_t NumDims>
class GaussianGenerator {
public:
static const bool PacketAccess = false;
static constexpr bool PacketAccess = false;
EIGEN_DEVICE_FUNC GaussianGenerator(const array<T, NumDims>& means,
const array<T, NumDims>& std_devs)

View File

@@ -10,6 +10,8 @@
#ifndef EIGEN_CXX11_TENSOR_TENSOR_GENERATOR_H
#define EIGEN_CXX11_TENSOR_TENSOR_GENERATOR_H
#include "./InternalHeaderCheck.h"
namespace Eigen {
/** \class TensorGeneratorOp
@@ -28,9 +30,9 @@ struct traits<TensorGeneratorOp<Generator, XprType> > : public traits<XprType>
typedef typename XprTraits::StorageKind StorageKind;
typedef typename XprTraits::Index Index;
typedef typename XprType::Nested Nested;
typedef typename remove_reference<Nested>::type _Nested;
static const int NumDimensions = XprTraits::NumDimensions;
static const int Layout = XprTraits::Layout;
typedef std::remove_reference_t<Nested> Nested_;
static constexpr int NumDimensions = XprTraits::NumDimensions;
static constexpr int Layout = XprTraits::Layout;
typedef typename XprTraits::PointerType PointerType;
};
@@ -68,7 +70,7 @@ class TensorGeneratorOp : public TensorBase<TensorGeneratorOp<Generator, XprType
const Generator& generator() const { return m_generator; }
EIGEN_DEVICE_FUNC
const typename internal::remove_all<typename XprType::Nested>::type&
const internal::remove_all_t<typename XprType::Nested>&
expression() const { return m_xpr; }
protected:
@@ -84,18 +86,18 @@ struct TensorEvaluator<const TensorGeneratorOp<Generator, ArgType>, Device>
typedef TensorGeneratorOp<Generator, ArgType> XprType;
typedef typename XprType::Index Index;
typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
static const int NumDims = internal::array_size<Dimensions>::value;
static constexpr int NumDims = internal::array_size<Dimensions>::value;
typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
typedef StorageMemory<CoeffReturnType, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
enum {
IsAligned = false,
PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
BlockAccess = true,
PreferBlockAccess = true,
Layout = TensorEvaluator<ArgType, Device>::Layout,
CoordAccess = false, // to be implemented
RawAccess = false
};
@@ -153,10 +155,9 @@ struct TensorEvaluator<const TensorGeneratorOp<Generator, ArgType>, Device>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
{
const int packetSize = PacketType<CoeffReturnType, Device>::size;
EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
eigen_assert(index+packetSize-1 < dimensions().TotalSize());
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[packetSize];
EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[packetSize];
for (int i = 0; i < packetSize; ++i) {
values[i] = coeff(index+i);
}

View File

@@ -10,6 +10,8 @@
#ifndef EIGEN_CXX11_TENSOR_TENSOR_GLOBAL_FUNCTIONS_H
#define EIGEN_CXX11_TENSOR_TENSOR_GLOBAL_FUNCTIONS_H
#include "./InternalHeaderCheck.h"
namespace Eigen {
/** \cpp11 \returns an expression of the coefficient-wise betainc(\a x, \a a, \a b) to the given tensors.

View File

@@ -41,6 +41,7 @@
#define gpuMalloc hipMalloc
#define gpuFree hipFree
#define gpuMemsetAsync hipMemsetAsync
#define gpuMemset2DAsync hipMemset2DAsync
#define gpuMemcpyAsync hipMemcpyAsync
#define gpuMemcpyDeviceToDevice hipMemcpyDeviceToDevice
#define gpuMemcpyDeviceToHost hipMemcpyDeviceToHost
@@ -71,6 +72,7 @@
#define gpuMalloc cudaMalloc
#define gpuFree cudaFree
#define gpuMemsetAsync cudaMemsetAsync
#define gpuMemset2DAsync cudaMemset2DAsync
#define gpuMemcpyAsync cudaMemcpyAsync
#define gpuMemcpyDeviceToDevice cudaMemcpyDeviceToDevice
#define gpuMemcpyDeviceToHost cudaMemcpyDeviceToHost
@@ -91,7 +93,7 @@
// HIPCC do not support the use of assert on the GPU side.
#define gpu_assert(COND)
#else
#define gpu_assert(COND) assert(COND)
#define gpu_assert(COND) eigen_assert(COND)
#endif
#endif // gpu_assert

View File

@@ -26,6 +26,7 @@
#undef gpuMalloc
#undef gpuFree
#undef gpuMemsetAsync
#undef gpuMemset2DAsync
#undef gpuMemcpyAsync
#undef gpuMemcpyDeviceToDevice
#undef gpuMemcpyDeviceToHost

View File

@@ -10,70 +10,365 @@
#ifndef EIGEN_CXX11_TENSOR_TENSOR_IO_H
#define EIGEN_CXX11_TENSOR_TENSOR_IO_H
#include "./InternalHeaderCheck.h"
namespace Eigen {
struct TensorIOFormat;
namespace internal {
// Print the tensor as a 2d matrix
template <typename Tensor, int Rank>
struct TensorPrinter {
static void run (std::ostream& os, const Tensor& tensor) {
typedef typename internal::remove_const<typename Tensor::Scalar>::type Scalar;
typedef typename Tensor::Index Index;
const Index total_size = internal::array_prod(tensor.dimensions());
if (total_size > 0) {
const Index first_dim = Eigen::internal::array_get<0>(tensor.dimensions());
static const int layout = Tensor::Layout;
Map<const Array<Scalar, Dynamic, Dynamic, layout> > matrix(const_cast<Scalar*>(tensor.data()), first_dim, total_size/first_dim);
os << matrix;
}
}
};
// Print the tensor as a vector
template <typename Tensor>
struct TensorPrinter<Tensor, 1> {
static void run (std::ostream& os, const Tensor& tensor) {
typedef typename internal::remove_const<typename Tensor::Scalar>::type Scalar;
typedef typename Tensor::Index Index;
const Index total_size = internal::array_prod(tensor.dimensions());
if (total_size > 0) {
Map<const Array<Scalar, Dynamic, 1> > array(const_cast<Scalar*>(tensor.data()), total_size);
os << array;
}
}
};
// Print the tensor as a scalar
template <typename Tensor>
struct TensorPrinter<Tensor, 0> {
static void run (std::ostream& os, const Tensor& tensor) {
os << tensor.coeff(0);
}
};
template <typename Tensor, std::size_t rank>
struct TensorPrinter;
}
struct TensorIOFormat {
TensorIOFormat(const std::vector<std::string>& _separator, const std::vector<std::string>& _prefix,
const std::vector<std::string>& _suffix, int _precision = StreamPrecision, int _flags = 0,
const std::string& _tenPrefix = "", const std::string& _tenSuffix = "", const char _fill = ' ')
: tenPrefix(_tenPrefix),
tenSuffix(_tenSuffix),
prefix(_prefix),
suffix(_suffix),
separator(_separator),
fill(_fill),
precision(_precision),
flags(_flags) {
init_spacer();
}
TensorIOFormat(int _precision = StreamPrecision, int _flags = 0, const std::string& _tenPrefix = "",
const std::string& _tenSuffix = "", const char _fill = ' ')
: tenPrefix(_tenPrefix), tenSuffix(_tenSuffix), fill(_fill), precision(_precision), flags(_flags) {
// default values of prefix, suffix and separator
prefix = {"", "["};
suffix = {"", "]"};
separator = {", ", "\n"};
init_spacer();
}
void init_spacer() {
if ((flags & DontAlignCols)) return;
spacer.resize(prefix.size());
spacer[0] = "";
int i = int(tenPrefix.length()) - 1;
while (i >= 0 && tenPrefix[i] != '\n') {
spacer[0] += ' ';
i--;
}
for (std::size_t k = 1; k < prefix.size(); k++) {
int j = int(prefix[k].length()) - 1;
while (j >= 0 && prefix[k][j] != '\n') {
spacer[k] += ' ';
j--;
}
}
}
static inline const TensorIOFormat Numpy() {
std::vector<std::string> prefix = {"", "["};
std::vector<std::string> suffix = {"", "]"};
std::vector<std::string> separator = {" ", "\n"};
return TensorIOFormat(separator, prefix, suffix, StreamPrecision, 0, "[", "]");
}
static inline const TensorIOFormat Plain() {
std::vector<std::string> separator = {" ", "\n", "\n", ""};
std::vector<std::string> prefix = {""};
std::vector<std::string> suffix = {""};
return TensorIOFormat(separator, prefix, suffix, StreamPrecision, 0, "", "", ' ');
}
static inline const TensorIOFormat Native() {
std::vector<std::string> separator = {", ", ",\n", "\n"};
std::vector<std::string> prefix = {"", "{"};
std::vector<std::string> suffix = {"", "}"};
return TensorIOFormat(separator, prefix, suffix, StreamPrecision, 0, "{", "}", ' ');
}
static inline const TensorIOFormat Legacy() {
TensorIOFormat LegacyFormat(StreamPrecision, 0, "", "", ' ');
LegacyFormat.legacy_bit = true;
return LegacyFormat;
}
std::string tenPrefix;
std::string tenSuffix;
std::vector<std::string> prefix;
std::vector<std::string> suffix;
std::vector<std::string> separator;
char fill;
int precision;
int flags;
std::vector<std::string> spacer{};
bool legacy_bit = false;
};
template <typename T, int Layout, int rank>
class TensorWithFormat;
// specialize for Layout=ColMajor, Layout=RowMajor and rank=0.
template <typename T, int rank>
class TensorWithFormat<T, RowMajor, rank> {
public:
TensorWithFormat(const T& tensor, const TensorIOFormat& format) : t_tensor(tensor), t_format(format) {}
friend std::ostream& operator<<(std::ostream& os, const TensorWithFormat<T, RowMajor, rank>& wf) {
// Evaluate the expression if needed
typedef TensorEvaluator<const TensorForcedEvalOp<const T>, DefaultDevice> Evaluator;
TensorForcedEvalOp<const T> eval = wf.t_tensor.eval();
Evaluator tensor(eval, DefaultDevice());
tensor.evalSubExprsIfNeeded(NULL);
internal::TensorPrinter<Evaluator, rank>::run(os, tensor, wf.t_format);
// Cleanup.
tensor.cleanup();
return os;
}
protected:
T t_tensor;
TensorIOFormat t_format;
};
template <typename T, int rank>
class TensorWithFormat<T, ColMajor, rank> {
public:
TensorWithFormat(const T& tensor, const TensorIOFormat& format) : t_tensor(tensor), t_format(format) {}
friend std::ostream& operator<<(std::ostream& os, const TensorWithFormat<T, ColMajor, rank>& wf) {
// Switch to RowMajor storage and print afterwards
typedef typename T::Index IndexType;
std::array<IndexType, rank> shuffle;
std::array<IndexType, rank> id;
std::iota(id.begin(), id.end(), IndexType(0));
std::copy(id.begin(), id.end(), shuffle.rbegin());
auto tensor_row_major = wf.t_tensor.swap_layout().shuffle(shuffle);
// Evaluate the expression if needed
typedef TensorEvaluator<const TensorForcedEvalOp<const decltype(tensor_row_major)>, DefaultDevice> Evaluator;
TensorForcedEvalOp<const decltype(tensor_row_major)> eval = tensor_row_major.eval();
Evaluator tensor(eval, DefaultDevice());
tensor.evalSubExprsIfNeeded(NULL);
internal::TensorPrinter<Evaluator, rank>::run(os, tensor, wf.t_format);
// Cleanup.
tensor.cleanup();
return os;
}
protected:
T t_tensor;
TensorIOFormat t_format;
};
template <typename T>
std::ostream& operator << (std::ostream& os, const TensorBase<T, ReadOnlyAccessors>& expr) {
typedef TensorEvaluator<const TensorForcedEvalOp<const T>, DefaultDevice> Evaluator;
typedef typename Evaluator::Dimensions Dimensions;
class TensorWithFormat<T, ColMajor, 0> {
public:
TensorWithFormat(const T& tensor, const TensorIOFormat& format) : t_tensor(tensor), t_format(format) {}
// Evaluate the expression if needed
TensorForcedEvalOp<const T> eval = expr.eval();
Evaluator tensor(eval, DefaultDevice());
tensor.evalSubExprsIfNeeded(NULL);
friend std::ostream& operator<<(std::ostream& os, const TensorWithFormat<T, ColMajor, 0>& wf) {
// Evaluate the expression if needed
typedef TensorEvaluator<const TensorForcedEvalOp<const T>, DefaultDevice> Evaluator;
TensorForcedEvalOp<const T> eval = wf.t_tensor.eval();
Evaluator tensor(eval, DefaultDevice());
tensor.evalSubExprsIfNeeded(NULL);
internal::TensorPrinter<Evaluator, 0>::run(os, tensor, wf.t_format);
// Cleanup.
tensor.cleanup();
return os;
}
// Print the result
static const int rank = internal::array_size<Dimensions>::value;
internal::TensorPrinter<Evaluator, rank>::run(os, tensor);
protected:
T t_tensor;
TensorIOFormat t_format;
};
// Cleanup.
tensor.cleanup();
return os;
namespace internal {
template <typename Tensor, std::size_t rank>
struct TensorPrinter {
static void run(std::ostream& s, const Tensor& _t, const TensorIOFormat& fmt) {
typedef std::remove_const_t<typename Tensor::Scalar> Scalar;
typedef typename Tensor::Index IndexType;
static const int layout = Tensor::Layout;
// backwards compatibility case: print tensor after reshaping to matrix of size dim(0) x
// (dim(1)*dim(2)*...*dim(rank-1)).
if (fmt.legacy_bit) {
const IndexType total_size = internal::array_prod(_t.dimensions());
if (total_size > 0) {
const IndexType first_dim = Eigen::internal::array_get<0>(_t.dimensions());
Map<const Array<Scalar, Dynamic, Dynamic, layout> > matrix(_t.data(), first_dim,
total_size / first_dim);
s << matrix;
return;
}
}
eigen_assert(layout == RowMajor);
typedef std::conditional_t<is_same<Scalar, char>::value || is_same<Scalar, unsigned char>::value ||
is_same<Scalar, numext::int8_t>::value || is_same<Scalar, numext::uint8_t>::value,
int,
std::conditional_t<is_same<Scalar, std::complex<char> >::value ||
is_same<Scalar, std::complex<unsigned char> >::value ||
is_same<Scalar, std::complex<numext::int8_t> >::value ||
is_same<Scalar, std::complex<numext::uint8_t> >::value,
std::complex<int>, const Scalar&>> PrintType;
const IndexType total_size = array_prod(_t.dimensions());
std::streamsize explicit_precision;
if (fmt.precision == StreamPrecision) {
explicit_precision = 0;
} else if (fmt.precision == FullPrecision) {
if (NumTraits<Scalar>::IsInteger) {
explicit_precision = 0;
} else {
explicit_precision = significant_decimals_impl<Scalar>::run();
}
} else {
explicit_precision = fmt.precision;
}
std::streamsize old_precision = 0;
if (explicit_precision) old_precision = s.precision(explicit_precision);
IndexType width = 0;
bool align_cols = !(fmt.flags & DontAlignCols);
if (align_cols) {
// compute the largest width
for (IndexType i = 0; i < total_size; i++) {
std::stringstream sstr;
sstr.copyfmt(s);
sstr << static_cast<PrintType>(_t.data()[i]);
width = std::max<IndexType>(width, IndexType(sstr.str().length()));
}
}
std::streamsize old_width = s.width();
char old_fill_character = s.fill();
s << fmt.tenPrefix;
for (IndexType i = 0; i < total_size; i++) {
std::array<bool, rank> is_at_end{};
std::array<bool, rank> is_at_begin{};
// is the ith element the end of an coeff (always true), of a row, of a matrix, ...?
for (std::size_t k = 0; k < rank; k++) {
if ((i + 1) % (std::accumulate(_t.dimensions().rbegin(), _t.dimensions().rbegin() + k, 1,
std::multiplies<IndexType>())) ==
0) {
is_at_end[k] = true;
}
}
// is the ith element the begin of an coeff (always true), of a row, of a matrix, ...?
for (std::size_t k = 0; k < rank; k++) {
if (i % (std::accumulate(_t.dimensions().rbegin(), _t.dimensions().rbegin() + k, 1,
std::multiplies<IndexType>())) ==
0) {
is_at_begin[k] = true;
}
}
// do we have a line break?
bool is_at_begin_after_newline = false;
for (std::size_t k = 0; k < rank; k++) {
if (is_at_begin[k]) {
std::size_t separator_index = (k < fmt.separator.size()) ? k : fmt.separator.size() - 1;
if (fmt.separator[separator_index].find('\n') != std::string::npos) {
is_at_begin_after_newline = true;
}
}
}
bool is_at_end_before_newline = false;
for (std::size_t k = 0; k < rank; k++) {
if (is_at_end[k]) {
std::size_t separator_index = (k < fmt.separator.size()) ? k : fmt.separator.size() - 1;
if (fmt.separator[separator_index].find('\n') != std::string::npos) {
is_at_end_before_newline = true;
}
}
}
std::stringstream suffix, prefix, separator;
for (std::size_t k = 0; k < rank; k++) {
std::size_t suffix_index = (k < fmt.suffix.size()) ? k : fmt.suffix.size() - 1;
if (is_at_end[k]) {
suffix << fmt.suffix[suffix_index];
}
}
for (std::size_t k = 0; k < rank; k++) {
std::size_t separator_index = (k < fmt.separator.size()) ? k : fmt.separator.size() - 1;
if (is_at_end[k] &&
(!is_at_end_before_newline || fmt.separator[separator_index].find('\n') != std::string::npos)) {
separator << fmt.separator[separator_index];
}
}
for (std::size_t k = 0; k < rank; k++) {
std::size_t spacer_index = (k < fmt.spacer.size()) ? k : fmt.spacer.size() - 1;
if (i != 0 && is_at_begin_after_newline && (!is_at_begin[k] || k == 0)) {
prefix << fmt.spacer[spacer_index];
}
}
for (int k = rank - 1; k >= 0; k--) {
std::size_t prefix_index = (static_cast<std::size_t>(k) < fmt.prefix.size()) ? k : fmt.prefix.size() - 1;
if (is_at_begin[k]) {
prefix << fmt.prefix[prefix_index];
}
}
s << prefix.str();
if (width) {
s.fill(fmt.fill);
s.width(width);
s << std::right;
}
s << _t.data()[i];
s << suffix.str();
if (i < total_size - 1) {
s << separator.str();
}
}
s << fmt.tenSuffix;
if (explicit_precision) s.precision(old_precision);
if (width) {
s.fill(old_fill_character);
s.width(old_width);
}
}
};
template <typename Tensor>
struct TensorPrinter<Tensor, 0> {
static void run(std::ostream& s, const Tensor& _t, const TensorIOFormat& fmt) {
typedef typename Tensor::Scalar Scalar;
std::streamsize explicit_precision;
if (fmt.precision == StreamPrecision) {
explicit_precision = 0;
} else if (fmt.precision == FullPrecision) {
if (NumTraits<Scalar>::IsInteger) {
explicit_precision = 0;
} else {
explicit_precision = significant_decimals_impl<Scalar>::run();
}
} else {
explicit_precision = fmt.precision;
}
std::streamsize old_precision = 0;
if (explicit_precision) old_precision = s.precision(explicit_precision);
s << fmt.tenPrefix << _t.coeff(0) << fmt.tenSuffix;
if (explicit_precision) s.precision(old_precision);
}
};
} // end namespace internal
template <typename T>
std::ostream& operator<<(std::ostream& s, const TensorBase<T, ReadOnlyAccessors>& t) {
s << t.format(TensorIOFormat::Plain());
return s;
}
} // end namespace Eigen
} // end namespace Eigen
#endif // EIGEN_CXX11_TENSOR_TENSOR_IO_H
#endif // EIGEN_CXX11_TENSOR_TENSOR_IO_H

View File

@@ -10,6 +10,8 @@
#ifndef EIGEN_CXX11_TENSOR_TENSOR_IMAGE_PATCH_H
#define EIGEN_CXX11_TENSOR_TENSOR_IMAGE_PATCH_H
#include "./InternalHeaderCheck.h"
namespace Eigen {
/** \class TensorImagePatch
@@ -31,14 +33,14 @@ namespace internal {
template<DenseIndex Rows, DenseIndex Cols, typename XprType>
struct traits<TensorImagePatchOp<Rows, Cols, XprType> > : public traits<XprType>
{
typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar;
typedef std::remove_const_t<typename XprType::Scalar> Scalar;
typedef traits<XprType> XprTraits;
typedef typename XprTraits::StorageKind StorageKind;
typedef typename XprTraits::Index Index;
typedef typename XprType::Nested Nested;
typedef typename remove_reference<Nested>::type _Nested;
static const int NumDimensions = XprTraits::NumDimensions + 1;
static const int Layout = XprTraits::Layout;
typedef std::remove_reference_t<Nested> Nested_;
static constexpr int NumDimensions = XprTraits::NumDimensions + 1;
static constexpr int Layout = XprTraits::Layout;
typedef typename XprTraits::PointerType PointerType;
};
@@ -187,7 +189,7 @@ class TensorImagePatchOp : public TensorBase<TensorImagePatchOp<Rows, Cols, XprT
Scalar padding_value() const { return m_padding_value; }
EIGEN_DEVICE_FUNC
const typename internal::remove_all<typename XprType::Nested>::type&
const internal::remove_all_t<typename XprType::Nested>&
expression() const { return m_xpr; }
protected:
@@ -215,25 +217,25 @@ struct TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, Device>
{
typedef TensorImagePatchOp<Rows, Cols, ArgType> XprType;
typedef typename XprType::Index Index;
static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
static const int NumDims = NumInputDims + 1;
static constexpr int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
static constexpr int NumDims = NumInputDims + 1;
typedef DSizes<Index, NumDims> Dimensions;
typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar;
typedef std::remove_const_t<typename XprType::Scalar> Scalar;
typedef TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>,
Device> Self;
typedef TensorEvaluator<ArgType, Device> Impl;
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
typedef StorageMemory<CoeffReturnType, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
enum {
IsAligned = false,
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
BlockAccess = false,
PreferBlockAccess = true,
Layout = TensorEvaluator<ArgType, Device>::Layout,
CoordAccess = false,
RawAccess = false
};
@@ -447,7 +449,6 @@ struct TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, Device>
template<int LoadMode>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
{
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
if (m_in_row_strides != 1 || m_in_col_strides != 1 || m_row_inflate_strides != 1 || m_col_inflate_strides != 1) {
@@ -540,7 +541,7 @@ struct TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, Device>
protected:
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index) const
{
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
EIGEN_UNROLL_LOOP
for (int i = 0; i < PacketSize; ++i) {
values[i] = coeff(index+i);

View File

@@ -10,10 +10,7 @@
#ifndef EIGEN_CXX11_TENSOR_TENSOR_INDEX_LIST_H
#define EIGEN_CXX11_TENSOR_TENSOR_INDEX_LIST_H
#if EIGEN_HAS_CONSTEXPR && EIGEN_HAS_VARIADIC_TEMPLATES
#define EIGEN_HAS_INDEX_LIST
#include "./InternalHeaderCheck.h"
namespace Eigen {
@@ -246,7 +243,7 @@ struct tuple_coeff {
template <typename... T>
EIGEN_DEVICE_FUNC static constexpr bool value_known_statically(const Index i, const IndexTuple<T...>& t) {
return ((i == Idx) & is_compile_time_constant<typename IndexTupleExtractor<Idx, T...>::ValType>::value) ||
return ((i == Idx) && is_compile_time_constant<typename IndexTupleExtractor<Idx, T...>::ValType>::value) ||
tuple_coeff<Idx-1, ValueT>::value_known_statically(i, t);
}
@@ -308,6 +305,11 @@ struct IndexList : internal::IndexTuple<FirstType, OtherTypes...> {
return internal::tuple_coeff<internal::array_size<internal::IndexTuple<FirstType, OtherTypes...> >::value-1, Index>::set(i, *this, value);
}
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC constexpr std::size_t size() const {
return 1 + sizeof...(OtherTypes);
};
EIGEN_DEVICE_FUNC constexpr IndexList(const internal::IndexTuple<FirstType, OtherTypes...>& other) : internal::IndexTuple<FirstType, OtherTypes...>(other) { }
EIGEN_DEVICE_FUNC constexpr IndexList(FirstType& first, OtherTypes... other) : internal::IndexTuple<FirstType, OtherTypes...>(first, other...) { }
EIGEN_DEVICE_FUNC constexpr IndexList() : internal::IndexTuple<FirstType, OtherTypes...>() { }
@@ -379,10 +381,10 @@ template<typename FirstType, typename... OtherTypes> struct array_size<const Ind
};
template<typename FirstType, typename... OtherTypes> struct array_size<IndexPairList<FirstType, OtherTypes...> > {
static const size_t value = std::tuple_size<std::tuple<FirstType, OtherTypes...> >::value;
static const size_t value = 1 + sizeof...(OtherTypes);
};
template<typename FirstType, typename... OtherTypes> struct array_size<const IndexPairList<FirstType, OtherTypes...> > {
static const size_t value = std::tuple_size<std::tuple<FirstType, OtherTypes...> >::value;
static const size_t value = 1 + sizeof...(OtherTypes);
};
template<Index N, typename FirstType, typename... OtherTypes> EIGEN_DEVICE_FUNC constexpr Index array_get(IndexList<FirstType, OtherTypes...>& a) {
@@ -468,7 +470,7 @@ struct index_statically_eq_impl {
template <typename FirstType, typename... OtherTypes>
struct index_statically_eq_impl<IndexList<FirstType, OtherTypes...> > {
EIGEN_DEVICE_FUNC static constexpr bool run(const Index i, const Index value) {
return IndexList<FirstType, OtherTypes...>().value_known_statically(i) &
return IndexList<FirstType, OtherTypes...>().value_known_statically(i) &&
(IndexList<FirstType, OtherTypes...>().get(i) == value);
}
};
@@ -476,7 +478,7 @@ struct index_statically_eq_impl<IndexList<FirstType, OtherTypes...> > {
template <typename FirstType, typename... OtherTypes>
struct index_statically_eq_impl<const IndexList<FirstType, OtherTypes...> > {
EIGEN_DEVICE_FUNC static constexpr bool run(const Index i, const Index value) {
return IndexList<FirstType, OtherTypes...>().value_known_statically(i) &
return IndexList<FirstType, OtherTypes...>().value_known_statically(i) &&
(IndexList<FirstType, OtherTypes...>().get(i) == value);
}
};
@@ -492,7 +494,7 @@ struct index_statically_ne_impl {
template <typename FirstType, typename... OtherTypes>
struct index_statically_ne_impl<IndexList<FirstType, OtherTypes...> > {
EIGEN_DEVICE_FUNC static constexpr bool run(const Index i, const Index value) {
return IndexList<FirstType, OtherTypes...>().value_known_statically(i) &
return IndexList<FirstType, OtherTypes...>().value_known_statically(i) &&
(IndexList<FirstType, OtherTypes...>().get(i) != value);
}
};
@@ -500,7 +502,7 @@ struct index_statically_ne_impl<IndexList<FirstType, OtherTypes...> > {
template <typename FirstType, typename... OtherTypes>
struct index_statically_ne_impl<const IndexList<FirstType, OtherTypes...> > {
EIGEN_DEVICE_FUNC static constexpr bool run(const Index i, const Index value) {
return IndexList<FirstType, OtherTypes...>().value_known_statically(i) &
return IndexList<FirstType, OtherTypes...>().value_known_statically(i) &&
(IndexList<FirstType, OtherTypes...>().get(i) != value);
}
};
@@ -516,7 +518,7 @@ struct index_statically_gt_impl {
template <typename FirstType, typename... OtherTypes>
struct index_statically_gt_impl<IndexList<FirstType, OtherTypes...> > {
EIGEN_DEVICE_FUNC static constexpr bool run(const Index i, const Index value) {
return IndexList<FirstType, OtherTypes...>().value_known_statically(i) &
return IndexList<FirstType, OtherTypes...>().value_known_statically(i) &&
(IndexList<FirstType, OtherTypes...>().get(i) > value);
}
};
@@ -524,7 +526,7 @@ struct index_statically_gt_impl<IndexList<FirstType, OtherTypes...> > {
template <typename FirstType, typename... OtherTypes>
struct index_statically_gt_impl<const IndexList<FirstType, OtherTypes...> > {
EIGEN_DEVICE_FUNC static constexpr bool run(const Index i, const Index value) {
return IndexList<FirstType, OtherTypes...>().value_known_statically(i) &
return IndexList<FirstType, OtherTypes...>().value_known_statically(i) &&
(IndexList<FirstType, OtherTypes...>().get(i) > value);
}
};
@@ -541,7 +543,7 @@ struct index_statically_lt_impl {
template <typename FirstType, typename... OtherTypes>
struct index_statically_lt_impl<IndexList<FirstType, OtherTypes...> > {
EIGEN_DEVICE_FUNC static constexpr bool run(const Index i, const Index value) {
return IndexList<FirstType, OtherTypes...>().value_known_statically(i) &
return IndexList<FirstType, OtherTypes...>().value_known_statically(i) &&
(IndexList<FirstType, OtherTypes...>().get(i) < value);
}
};
@@ -549,7 +551,7 @@ struct index_statically_lt_impl<IndexList<FirstType, OtherTypes...> > {
template <typename FirstType, typename... OtherTypes>
struct index_statically_lt_impl<const IndexList<FirstType, OtherTypes...> > {
EIGEN_DEVICE_FUNC static constexpr bool run(const Index i, const Index value) {
return IndexList<FirstType, OtherTypes...>().value_known_statically(i) &
return IndexList<FirstType, OtherTypes...>().value_known_statically(i) &&
(IndexList<FirstType, OtherTypes...>().get(i) < value);
}
};
@@ -566,7 +568,7 @@ struct index_pair_first_statically_eq_impl {
template <typename FirstType, typename... OtherTypes>
struct index_pair_first_statically_eq_impl<IndexPairList<FirstType, OtherTypes...> > {
EIGEN_DEVICE_FUNC static constexpr bool run(const Index i, const Index value) {
return IndexPairList<FirstType, OtherTypes...>().value_known_statically(i) &
return IndexPairList<FirstType, OtherTypes...>().value_known_statically(i) &&
(IndexPairList<FirstType, OtherTypes...>().operator[](i).first == value);
}
};
@@ -574,7 +576,7 @@ struct index_pair_first_statically_eq_impl<IndexPairList<FirstType, OtherTypes..
template <typename FirstType, typename... OtherTypes>
struct index_pair_first_statically_eq_impl<const IndexPairList<FirstType, OtherTypes...> > {
EIGEN_DEVICE_FUNC static constexpr bool run(const Index i, const Index value) {
return IndexPairList<FirstType, OtherTypes...>().value_known_statically(i) &
return IndexPairList<FirstType, OtherTypes...>().value_known_statically(i) &&
(IndexPairList<FirstType, OtherTypes...>().operator[](i).first == value);
}
};
@@ -591,7 +593,7 @@ struct index_pair_second_statically_eq_impl {
template <typename FirstType, typename... OtherTypes>
struct index_pair_second_statically_eq_impl<IndexPairList<FirstType, OtherTypes...> > {
EIGEN_DEVICE_FUNC static constexpr bool run(const Index i, const Index value) {
return IndexPairList<FirstType, OtherTypes...>().value_known_statically(i) &
return IndexPairList<FirstType, OtherTypes...>().value_known_statically(i) &&
(IndexPairList<FirstType, OtherTypes...>().operator[](i).second == value);
}
};
@@ -599,7 +601,7 @@ struct index_pair_second_statically_eq_impl<IndexPairList<FirstType, OtherTypes.
template <typename FirstType, typename... OtherTypes>
struct index_pair_second_statically_eq_impl<const IndexPairList<FirstType, OtherTypes...> > {
EIGEN_DEVICE_FUNC static constexpr bool run(const Index i, const Index value) {
return IndexPairList<FirstType, OtherTypes...>().value_known_statically(i) &
return IndexPairList<FirstType, OtherTypes...>().value_known_statically(i) &&
(IndexPairList<FirstType, OtherTypes...>().operator[](i).second == value);
}
};
@@ -608,81 +610,6 @@ struct index_pair_second_statically_eq_impl<const IndexPairList<FirstType, Other
} // end namespace internal
} // end namespace Eigen
#else
namespace Eigen {
namespace internal {
template <typename T>
struct index_known_statically_impl {
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const Index) {
return false;
}
};
template <typename T>
struct all_indices_known_statically_impl {
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run() {
return false;
}
};
template <typename T>
struct indices_statically_known_to_increase_impl {
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run() {
return false;
}
};
template <typename T>
struct index_statically_eq_impl {
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(Index, Index) {
return false;
}
};
template <typename T>
struct index_statically_ne_impl {
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(Index, Index) {
return false;
}
};
template <typename T>
struct index_statically_gt_impl {
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(Index, Index) {
return false;
}
};
template <typename T>
struct index_statically_lt_impl {
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(Index, Index) {
return false;
}
};
template <typename Tx>
struct index_pair_first_statically_eq_impl {
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(Index, Index) {
return false;
}
};
template <typename Tx>
struct index_pair_second_statically_eq_impl {
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(Index, Index) {
return false;
}
};
} // end namespace internal
} // end namespace Eigen
#endif
namespace Eigen {
namespace internal {

View File

@@ -10,6 +10,8 @@
#ifndef EIGEN_CXX11_TENSOR_TENSOR_INFLATION_H
#define EIGEN_CXX11_TENSOR_TENSOR_INFLATION_H
#include "./InternalHeaderCheck.h"
namespace Eigen {
/** \class TensorInflation
@@ -28,9 +30,9 @@ struct traits<TensorInflationOp<Strides, XprType> > : public traits<XprType>
typedef typename XprTraits::StorageKind StorageKind;
typedef typename XprTraits::Index Index;
typedef typename XprType::Nested Nested;
typedef typename remove_reference<Nested>::type _Nested;
static const int NumDimensions = XprTraits::NumDimensions;
static const int Layout = XprTraits::Layout;
typedef std::remove_reference_t<Nested> Nested_;
static constexpr int NumDimensions = XprTraits::NumDimensions;
static constexpr int Layout = XprTraits::Layout;
typedef typename XprTraits::PointerType PointerType;
};
@@ -66,7 +68,7 @@ class TensorInflationOp : public TensorBase<TensorInflationOp<Strides, XprType>,
const Strides& strides() const { return m_strides; }
EIGEN_DEVICE_FUNC
const typename internal::remove_all<typename XprType::Nested>::type&
const internal::remove_all_t<typename XprType::Nested>&
expression() const { return m_xpr; }
protected:
@@ -80,21 +82,21 @@ struct TensorEvaluator<const TensorInflationOp<Strides, ArgType>, Device>
{
typedef TensorInflationOp<Strides, ArgType> XprType;
typedef typename XprType::Index Index;
static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
static constexpr int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
typedef DSizes<Index, NumDims> Dimensions;
typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
typedef StorageMemory<CoeffReturnType, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
enum {
IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/ false,
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
BlockAccess = false,
PreferBlockAccess = false,
Layout = TensorEvaluator<ArgType, Device>::Layout,
CoordAccess = false, // to be implemented
RawAccess = false
};
@@ -202,7 +204,7 @@ struct TensorEvaluator<const TensorInflationOp<Strides, ArgType>, Device>
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
EIGEN_UNROLL_LOOP
for (int i = 0; i < PacketSize; ++i) {
values[i] = coeff(index+i);

View File

@@ -10,10 +10,10 @@
#ifndef EIGEN_CXX11_TENSOR_TENSOR_INITIALIZER_H
#define EIGEN_CXX11_TENSOR_TENSOR_INITIALIZER_H
#if EIGEN_HAS_VARIADIC_TEMPLATES
#include <initializer_list>
#include "./InternalHeaderCheck.h"
namespace Eigen {
/** \class TensorInitializer
@@ -77,6 +77,4 @@ void initialize_tensor(TensorEvaluator<Derived, DefaultDevice>& tensor,
} // namespace internal
} // namespace Eigen
#endif // EIGEN_HAS_VARIADIC_TEMPLATES
#endif // EIGEN_CXX11_TENSOR_TENSOR_INITIALIZER_H

View File

@@ -11,6 +11,8 @@
#define EIGEN_CXX11_TENSOR_TENSOR_INTDIV_H
#include "./InternalHeaderCheck.h"
namespace Eigen {
/** \internal
@@ -28,12 +30,10 @@ namespace Eigen {
namespace internal {
namespace {
// Note: result is undefined if val == 0
template <typename T>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
typename internal::enable_if<sizeof(T)==4,int>::type count_leading_zeros(const T val)
std::enable_if_t<sizeof(T)==4,int> count_leading_zeros(const T val)
{
#ifdef EIGEN_GPU_COMPILE_PHASE
return __clz(val);
@@ -51,7 +51,7 @@ namespace {
template <typename T>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
typename internal::enable_if<sizeof(T)==8,int>::type count_leading_zeros(const T val)
std::enable_if_t<sizeof(T)==8,int> count_leading_zeros(const T val)
{
#ifdef EIGEN_GPU_COMPILE_PHASE
return __clzll(val);
@@ -79,13 +79,13 @@ namespace {
template <typename T>
struct UnsignedTraits {
typedef typename conditional<sizeof(T) == 8, uint64_t, uint32_t>::type type;
typedef std::conditional_t<sizeof(T) == 8, uint64_t, uint32_t> type;
};
template <typename T>
struct DividerTraits {
typedef typename UnsignedTraits<T>::type type;
static const int N = sizeof(T) * 8;
static constexpr int N = sizeof(T) * 8;
};
template <typename T>
@@ -135,8 +135,6 @@ namespace {
#endif
}
};
}
template <typename T, bool div_gt_one = false>
struct TensorIntDivisor {
@@ -252,7 +250,7 @@ private:
template <typename T, bool div_gt_one>
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator / (const T& numerator, const TensorIntDivisor<T, div_gt_one>& divisor) {
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator / (const T& numerator, const TensorIntDivisor<T, div_gt_one>& divisor) {
return divisor.divide(numerator);
}

View File

@@ -10,6 +10,8 @@
#ifndef EIGEN_CXX11_TENSOR_TENSOR_LAYOUT_SWAP_H
#define EIGEN_CXX11_TENSOR_TENSOR_LAYOUT_SWAP_H
#include "./InternalHeaderCheck.h"
namespace Eigen {
/** \class TensorLayoutSwap
@@ -43,9 +45,9 @@ struct traits<TensorLayoutSwapOp<XprType> > : public traits<XprType>
typedef typename XprTraits::StorageKind StorageKind;
typedef typename XprTraits::Index Index;
typedef typename XprType::Nested Nested;
typedef typename remove_reference<Nested>::type _Nested;
static const int NumDimensions = traits<XprType>::NumDimensions;
static const int Layout = (traits<XprType>::Layout == ColMajor) ? RowMajor : ColMajor;
typedef std::remove_reference_t<Nested> Nested_;
static constexpr int NumDimensions = traits<XprType>::NumDimensions;
static constexpr int Layout = (traits<XprType>::Layout == ColMajor) ? RowMajor : ColMajor;
typedef typename XprTraits::PointerType PointerType;
};
@@ -72,7 +74,7 @@ class TensorLayoutSwapOp : public TensorBase<TensorLayoutSwapOp<XprType>, WriteA
typedef TensorBase<TensorLayoutSwapOp<XprType>, WriteAccessors> Base;
typedef typename Eigen::internal::traits<TensorLayoutSwapOp>::Scalar Scalar;
typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
typedef std::remove_const_t<typename XprType::CoeffReturnType> CoeffReturnType;
typedef typename Eigen::internal::nested<TensorLayoutSwapOp>::type Nested;
typedef typename Eigen::internal::traits<TensorLayoutSwapOp>::StorageKind StorageKind;
typedef typename Eigen::internal::traits<TensorLayoutSwapOp>::Index Index;
@@ -81,7 +83,7 @@ class TensorLayoutSwapOp : public TensorBase<TensorLayoutSwapOp<XprType>, WriteA
: m_xpr(expr) {}
EIGEN_DEVICE_FUNC
const typename internal::remove_all<typename XprType::Nested>::type&
const internal::remove_all_t<typename XprType::Nested>&
expression() const { return m_xpr; }
EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorLayoutSwapOp)
@@ -96,15 +98,15 @@ struct TensorEvaluator<const TensorLayoutSwapOp<ArgType>, Device>
{
typedef TensorLayoutSwapOp<ArgType> XprType;
typedef typename XprType::Index Index;
static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
static constexpr int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
typedef DSizes<Index, NumDims> Dimensions;
static constexpr int Layout = (TensorEvaluator<ArgType, Device>::Layout == static_cast<int>(ColMajor)) ? RowMajor : ColMajor;
enum {
IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
BlockAccess = false,
PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
Layout = (static_cast<int>(TensorEvaluator<ArgType, Device>::Layout) == static_cast<int>(ColMajor)) ? RowMajor : ColMajor,
CoordAccess = false, // to be implemented
RawAccess = TensorEvaluator<ArgType, Device>::RawAccess
};
@@ -178,12 +180,12 @@ template<typename ArgType, typename Device>
typedef TensorEvaluator<const TensorLayoutSwapOp<ArgType>, Device> Base;
typedef TensorLayoutSwapOp<ArgType> XprType;
static constexpr int Layout = (TensorEvaluator<ArgType, Device>::Layout == static_cast<int>(ColMajor)) ? RowMajor : ColMajor;
enum {
IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
BlockAccess = false,
PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
Layout = (static_cast<int>(TensorEvaluator<ArgType, Device>::Layout) == static_cast<int>(ColMajor)) ? RowMajor : ColMajor,
CoordAccess = false // to be implemented
};

View File

@@ -14,7 +14,7 @@
/** use this macro in sfinae selection in templated functions
*
* template<typename T,
* typename std::enable_if< isBanana<T>::value , int >::type = 0
* std::enable_if_t< isBanana<T>::value , int > = 0
* >
* void foo(){}
*
@@ -26,22 +26,8 @@
* void foo(){}
*/
// SFINAE requires variadic templates
#if !defined(EIGEN_GPUCC)
#if EIGEN_HAS_VARIADIC_TEMPLATES
// SFINAE doesn't work for gcc <= 4.7
#ifdef EIGEN_COMP_GNUC
#if EIGEN_GNUC_AT_LEAST(4,8)
#define EIGEN_HAS_SFINAE
#endif
#else
#define EIGEN_HAS_SFINAE
#endif
#endif
#endif
#define EIGEN_SFINAE_ENABLE_IF( __condition__ ) \
typename internal::enable_if< ( __condition__ ) , int >::type = 0
std::enable_if_t< ( __condition__ ) , int > = 0
// Define a macro to use a reference on the host but a value on the device
#if defined(SYCL_DEVICE_ONLY)

View File

@@ -10,6 +10,8 @@
#ifndef EIGEN_CXX11_TENSOR_TENSOR_MAP_H
#define EIGEN_CXX11_TENSOR_TENSOR_MAP_H
#include "./InternalHeaderCheck.h"
namespace Eigen {
// FIXME use proper doxygen documentation (e.g. \tparam MakePointer_)
@@ -32,7 +34,7 @@ template<typename PlainObjectType, int Options_, template <class> class MakePoin
typedef TensorMap<PlainObjectType, Options_, MakePointer_> Self;
typedef TensorBase<TensorMap<PlainObjectType, Options_, MakePointer_> > Base;
#ifdef EIGEN_USE_SYCL
typedef typename Eigen::internal::remove_reference<typename Eigen::internal::nested<Self>::type>::type Nested;
typedef std::remove_reference_t<typename Eigen::internal::nested<Self>::type> Nested;
#else
typedef typename Eigen::internal::nested<Self>::type Nested;
#endif
@@ -49,29 +51,29 @@ template<typename PlainObjectType, int Options_, template <class> class MakePoin
// example in TensorMap<Tensor<const Scalar, ...>> expression. This type of
// expression should be illegal, but adding this restriction is not possible
// in practice (see https://bitbucket.org/eigen/eigen/pull-requests/488).
typedef typename internal::conditional<
typedef std::conditional_t<
bool(internal::is_lvalue<PlainObjectType>::value),
PointerType, // use simple pointer in lvalue expressions
PointerConstType // use const pointer in rvalue expressions
>::type StoragePointerType;
> StoragePointerType;
// If TensorMap was constructed over rvalue expression (e.g. const Tensor),
// we should return a reference to const from operator() (and others), even
// if TensorMap itself is not const.
typedef typename internal::conditional<
typedef std::conditional_t<
bool(internal::is_lvalue<PlainObjectType>::value),
Scalar&,
const Scalar&
>::type StorageRefType;
> StorageRefType;
static const int Options = Options_;
static constexpr int Options = Options_;
static const Index NumIndices = PlainObjectType::NumIndices;
static constexpr Index NumIndices = PlainObjectType::NumIndices;
typedef typename PlainObjectType::Dimensions Dimensions;
static constexpr int Layout = PlainObjectType::Layout;
enum {
IsAligned = ((int(Options_)&Aligned)==Aligned),
Layout = PlainObjectType::Layout,
CoordAccess = true,
RawAccess = true
};
@@ -82,35 +84,11 @@ template<typename PlainObjectType, int Options_, template <class> class MakePoin
EIGEN_STATIC_ASSERT((0 == NumIndices || NumIndices == Dynamic), YOU_MADE_A_PROGRAMMING_MISTAKE)
}
#if EIGEN_HAS_VARIADIC_TEMPLATES
template<typename... IndexTypes> EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE TensorMap(StoragePointerType dataPtr, Index firstDimension, IndexTypes... otherDimensions) : m_data(dataPtr), m_dimensions(firstDimension, otherDimensions...) {
// The number of dimensions used to construct a tensor must be equal to the rank of the tensor.
EIGEN_STATIC_ASSERT((sizeof...(otherDimensions) + 1 == NumIndices || NumIndices == Dynamic), YOU_MADE_A_PROGRAMMING_MISTAKE)
}
#else
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE TensorMap(StoragePointerType dataPtr, Index firstDimension) : m_data(dataPtr), m_dimensions(firstDimension) {
// The number of dimensions used to construct a tensor must be equal to the rank of the tensor.
EIGEN_STATIC_ASSERT((1 == NumIndices || NumIndices == Dynamic), YOU_MADE_A_PROGRAMMING_MISTAKE)
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE TensorMap(StoragePointerType dataPtr, Index dim1, Index dim2) : m_data(dataPtr), m_dimensions(dim1, dim2) {
EIGEN_STATIC_ASSERT(2 == NumIndices || NumIndices == Dynamic, YOU_MADE_A_PROGRAMMING_MISTAKE)
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE TensorMap(StoragePointerType dataPtr, Index dim1, Index dim2, Index dim3) : m_data(dataPtr), m_dimensions(dim1, dim2, dim3) {
EIGEN_STATIC_ASSERT(3 == NumIndices || NumIndices == Dynamic, YOU_MADE_A_PROGRAMMING_MISTAKE)
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE TensorMap(StoragePointerType dataPtr, Index dim1, Index dim2, Index dim3, Index dim4) : m_data(dataPtr), m_dimensions(dim1, dim2, dim3, dim4) {
EIGEN_STATIC_ASSERT(4 == NumIndices || NumIndices == Dynamic, YOU_MADE_A_PROGRAMMING_MISTAKE)
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE TensorMap(StoragePointerType dataPtr, Index dim1, Index dim2, Index dim3, Index dim4, Index dim5) : m_data(dataPtr), m_dimensions(dim1, dim2, dim3, dim4, dim5) {
EIGEN_STATIC_ASSERT(5 == NumIndices || NumIndices == Dynamic, YOU_MADE_A_PROGRAMMING_MISTAKE)
}
#endif
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorMap(StoragePointerType dataPtr, const array<Index, NumIndices>& dimensions)
: m_data(dataPtr), m_dimensions(dimensions)
@@ -165,7 +143,6 @@ template<typename PlainObjectType, int Options_, template <class> class MakePoin
return m_data[index];
}
#if EIGEN_HAS_VARIADIC_TEMPLATES
template<typename... IndexTypes> EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE StorageRefType operator()(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) const
{
@@ -179,52 +156,6 @@ template<typename PlainObjectType, int Options_, template <class> class MakePoin
return m_data[index];
}
}
#else
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE StorageRefType operator()(Index i0, Index i1) const
{
if (PlainObjectType::Options&RowMajor) {
const Index index = i1 + i0 * m_dimensions[1];
return m_data[index];
} else {
const Index index = i0 + i1 * m_dimensions[0];
return m_data[index];
}
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE StorageRefType operator()(Index i0, Index i1, Index i2) const
{
if (PlainObjectType::Options&RowMajor) {
const Index index = i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0);
return m_data[index];
} else {
const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * i2);
return m_data[index];
}
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE StorageRefType operator()(Index i0, Index i1, Index i2, Index i3) const
{
if (PlainObjectType::Options&RowMajor) {
const Index index = i3 + m_dimensions[3] * (i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0));
return m_data[index];
} else {
const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * (i2 + m_dimensions[2] * i3));
return m_data[index];
}
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE StorageRefType operator()(Index i0, Index i1, Index i2, Index i3, Index i4) const
{
if (PlainObjectType::Options&RowMajor) {
const Index index = i4 + m_dimensions[4] * (i3 + m_dimensions[3] * (i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0)));
return m_data[index];
} else {
const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * (i2 + m_dimensions[2] * (i3 + m_dimensions[3] * i4)));
return m_data[index];
}
}
#endif
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE StorageRefType operator()(const array<Index, NumIndices>& indices)
@@ -253,7 +184,6 @@ template<typename PlainObjectType, int Options_, template <class> class MakePoin
return m_data[index];
}
#if EIGEN_HAS_VARIADIC_TEMPLATES
template<typename... IndexTypes> EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE StorageRefType operator()(Index firstIndex, Index secondIndex, IndexTypes... otherIndices)
{
@@ -268,52 +198,6 @@ template<typename PlainObjectType, int Options_, template <class> class MakePoin
return m_data[index];
}
}
#else
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE StorageRefType operator()(Index i0, Index i1)
{
if (PlainObjectType::Options&RowMajor) {
const Index index = i1 + i0 * m_dimensions[1];
return m_data[index];
} else {
const Index index = i0 + i1 * m_dimensions[0];
return m_data[index];
}
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE StorageRefType operator()(Index i0, Index i1, Index i2)
{
if (PlainObjectType::Options&RowMajor) {
const Index index = i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0);
return m_data[index];
} else {
const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * i2);
return m_data[index];
}
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE StorageRefType operator()(Index i0, Index i1, Index i2, Index i3)
{
if (PlainObjectType::Options&RowMajor) {
const Index index = i3 + m_dimensions[3] * (i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0));
return m_data[index];
} else {
const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * (i2 + m_dimensions[2] * i3));
return m_data[index];
}
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE StorageRefType operator()(Index i0, Index i1, Index i2, Index i3, Index i4)
{
if (PlainObjectType::Options&RowMajor) {
const Index index = i4 + m_dimensions[4] * (i3 + m_dimensions[3] * (i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0)));
return m_data[index];
} else {
const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * (i2 + m_dimensions[2] * (i3 + m_dimensions[3] * i4)));
return m_data[index];
}
}
#endif
EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorMap)

View File

@@ -10,6 +10,8 @@
#ifndef EIGEN_CXX11_TENSOR_TENSOR_META_H
#define EIGEN_CXX11_TENSOR_TENSOR_META_H
#include "./InternalHeaderCheck.h"
namespace Eigen {
template<bool cond> struct Cond {};
@@ -28,13 +30,15 @@ const T2& choose(Cond<false>, const T1&, const T2& second) {
template <typename T, typename X, typename Y>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
T divup(const X x, const Y y) {
return static_cast<T>((x + y - 1) / y);
// Note: This form is used because it cannot overflow.
return static_cast<T>(x == 0 ? 0 : (x - 1) / y + 1);
}
template <typename T>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
T divup(const T x, const T y) {
return static_cast<T>((x + y - 1) / y);
// Note: This form is used because it cannot overflow.
return static_cast<T>(x == 0 ? 0 : (x - 1) / y + 1);
}
template <size_t n> struct max_n_1 {
@@ -52,7 +56,7 @@ struct PacketType : internal::packet_traits<Scalar> {
};
// For CUDA packet types when using a GpuDevice
#if defined(EIGEN_USE_GPU) && defined(EIGEN_HAS_GPU_FP16)
#if defined(EIGEN_USE_GPU) && defined(EIGEN_HAS_GPU_FP16) && defined(EIGEN_GPU_COMPILE_PHASE)
typedef ulonglong2 Packet4h2;
template<>
@@ -118,13 +122,13 @@ struct static_for<Index, end, end, step, StepOp> {
template <typename OutScalar, typename Device, bool Vectorizable>
struct Vectorise {
static const int PacketSize = 1;
static constexpr int PacketSize = 1;
typedef OutScalar PacketReturnType;
};
template <typename OutScalar, typename Device>
struct Vectorise<OutScalar, Device, true> {
static const int PacketSize = Eigen::PacketType<OutScalar, Device>::size;
static constexpr int PacketSize = Eigen::PacketType<OutScalar, Device>::size;
typedef typename Eigen::PacketType<OutScalar, Device>::type PacketReturnType;
};
@@ -207,9 +211,11 @@ template<> struct PacketType<const half, const SyclDevice>: PacketType<half, Syc
#endif
#endif
// Tuple mimics std::pair but works on e.g. nvcc.
template <typename U, typename V> struct Tuple {
// Pair mimics std::pair but works on e.g. nvcc.
template <typename U, typename V> struct Pair {
public:
EIGEN_MAKE_ALIGNED_OPERATOR_NEW
U first;
V second;
@@ -217,13 +223,13 @@ template <typename U, typename V> struct Tuple {
typedef V second_type;
EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Tuple() : first(), second() {}
Pair() : first(), second() {}
EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Tuple(const U& f, const V& s) : first(f), second(s) {}
Pair(const U& f, const V& s) : first(f), second(s) {}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void swap(Tuple& rhs) {
void swap(Pair& rhs) {
using numext::swap;
swap(first, rhs.first);
swap(second, rhs.second);
@@ -232,13 +238,13 @@ template <typename U, typename V> struct Tuple {
template <typename U, typename V>
EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
bool operator==(const Tuple<U, V>& x, const Tuple<U, V>& y) {
bool operator==(const Pair<U, V>& x, const Pair<U, V>& y) {
return (x.first == y.first && x.second == y.second);
}
template <typename U, typename V>
EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
bool operator!=(const Tuple<U, V>& x, const Tuple<U, V>& y) {
bool operator!=(const Pair<U, V>& x, const Pair<U, V>& y) {
return !(x == y);
}
@@ -258,13 +264,12 @@ template <typename Idx> struct IndexPair {
};
#ifdef EIGEN_HAS_SFINAE
namespace internal {
template<typename IndexType, typename Index, Index... Is>
template<typename IndexType, typename Index, Index First, Index... Is>
EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
array<Index, sizeof...(Is)> customIndices2Array(IndexType& idx, numeric_list<Index, Is...>) {
return { idx[Is]... };
array<Index, 1 + sizeof...(Is)> customIndices2Array(IndexType& idx, numeric_list<Index, First, Is...>) {
return { idx[First], idx[Is]... };
}
template<typename IndexType, typename Index>
EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
@@ -302,9 +307,6 @@ namespace internal {
};
}
#endif
} // namespace Eigen

View File

@@ -10,6 +10,8 @@
#ifndef EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H
#define EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H
#include "./InternalHeaderCheck.h"
namespace Eigen {
/** \class TensorReshaping
@@ -28,9 +30,9 @@ struct traits<TensorReshapingOp<NewDimensions, XprType> > : public traits<XprTyp
typedef typename XprTraits::StorageKind StorageKind;
typedef typename XprTraits::Index Index;
typedef typename XprType::Nested Nested;
typedef typename remove_reference<Nested>::type _Nested;
static const int NumDimensions = array_size<NewDimensions>::value;
static const int Layout = XprTraits::Layout;
typedef std::remove_reference_t<Nested> Nested_;
static constexpr int NumDimensions = array_size<NewDimensions>::value;
static constexpr int Layout = XprTraits::Layout;
typedef typename XprTraits::PointerType PointerType;
};
@@ -56,7 +58,7 @@ class TensorReshapingOp : public TensorBase<TensorReshapingOp<NewDimensions, Xpr
public:
typedef TensorBase<TensorReshapingOp<NewDimensions, XprType>, WriteAccessors> Base;
typedef typename Eigen::internal::traits<TensorReshapingOp>::Scalar Scalar;
typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
typedef std::remove_const_t<typename XprType::CoeffReturnType> CoeffReturnType;
typedef typename Eigen::internal::nested<TensorReshapingOp>::type Nested;
typedef typename Eigen::internal::traits<TensorReshapingOp>::StorageKind StorageKind;
typedef typename Eigen::internal::traits<TensorReshapingOp>::Index Index;
@@ -68,7 +70,7 @@ class TensorReshapingOp : public TensorBase<TensorReshapingOp<NewDimensions, Xpr
const NewDimensions& dimensions() const { return m_dims; }
EIGEN_DEVICE_FUNC
const typename internal::remove_all<typename XprType::Nested>::type&
const internal::remove_all_t<typename XprType::Nested>&
expression() const { return m_xpr; }
EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorReshapingOp)
@@ -92,10 +94,10 @@ struct TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
typedef StorageMemory<CoeffReturnType, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
typedef StorageMemory<typename internal::remove_const<CoeffReturnType>::type, Device> ConstCastStorage;
typedef StorageMemory<std::remove_const_t<CoeffReturnType>, Device> ConstCastStorage;
static const int NumOutputDims = internal::array_size<Dimensions>::value;
static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
static constexpr int NumOutputDims = internal::array_size<Dimensions>::value;
static constexpr int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
enum ReshapingKind {
// We do not use layout information to determine reshaping kind.
@@ -107,15 +109,12 @@ struct TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
// clang-format off
static const ReshapingKind kind =
#if defined(EIGEN_HAS_INDEX_LIST)
(NumOutputDims == 2 && internal::index_statically_eq<NewDimensions>(/*index=*/0, /*value=*/1)) ? OneByN
: (NumOutputDims == 2 && internal::index_statically_eq<NewDimensions>(/*index=*/1, /*value=*/1)) ? NByOne
: Runtime;
#else
Runtime;
#endif
// clang-format on
static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
enum {
IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
@@ -125,12 +124,11 @@ struct TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
BlockAccess = TensorEvaluator<ArgType, Device>::RawAccess &&
NumInputDims > 0 && NumOutputDims > 0,
PreferBlockAccess = false,
Layout = TensorEvaluator<ArgType, Device>::Layout,
CoordAccess = false, // to be implemented
RawAccess = TensorEvaluator<ArgType, Device>::RawAccess
};
typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
typedef std::remove_const_t<Scalar> ScalarNoConst;
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
typedef internal::TensorBlockDescriptor<NumOutputDims, Index> TensorBlockDesc;
@@ -245,12 +243,12 @@ template<typename NewDimensions, typename ArgType, typename Device>
typedef TensorReshapingOp<NewDimensions, ArgType> XprType;
typedef NewDimensions Dimensions;
static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
enum {
IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
BlockAccess = TensorEvaluator<ArgType, Device>::RawAccess,
PreferBlockAccess = false,
Layout = TensorEvaluator<ArgType, Device>::Layout,
CoordAccess = false, // to be implemented
RawAccess = TensorEvaluator<ArgType, Device>::RawAccess
};
@@ -283,7 +281,7 @@ template<typename NewDimensions, typename ArgType, typename Device>
template <typename TensorBlock>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock(
const TensorBlockDesc& desc, const TensorBlock& block) {
assert(this->m_impl.data() != NULL);
eigen_assert(this->m_impl.data() != NULL);
typedef typename TensorBlock::XprType TensorBlockExpr;
typedef internal::TensorBlockAssignment<
@@ -315,9 +313,9 @@ struct traits<TensorSlicingOp<StartIndices, Sizes, XprType> > : public traits<Xp
typedef typename XprTraits::StorageKind StorageKind;
typedef typename XprTraits::Index Index;
typedef typename XprType::Nested Nested;
typedef typename remove_reference<Nested>::type _Nested;
static const int NumDimensions = array_size<StartIndices>::value;
static const int Layout = XprTraits::Layout;
typedef std::remove_reference_t<Nested> Nested_;
static constexpr int NumDimensions = array_size<StartIndices>::value;
static constexpr int Layout = XprTraits::Layout;
typedef typename XprTraits::PointerType PointerType;
};
@@ -357,7 +355,7 @@ class TensorSlicingOp : public TensorBase<TensorSlicingOp<StartIndices, Sizes, X
const Sizes& sizes() const { return m_sizes; }
EIGEN_DEVICE_FUNC
const typename internal::remove_all<typename XprType::Nested>::type&
const internal::remove_all_t<typename XprType::Nested>&
expression() const { return m_xpr; }
EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorSlicingOp)
@@ -369,8 +367,9 @@ class TensorSlicingOp : public TensorBase<TensorSlicingOp<StartIndices, Sizes, X
};
namespace internal {
// Fixme: figure out the exact threshold
namespace {
template <typename Index, typename Device, bool BlockAccess> struct MemcpyTriggerForSlicing {
EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(const Device& device) : threshold_(2 * device.numThreads()) { }
EIGEN_DEVICE_FUNC bool operator ()(Index total, Index contiguous) const {
@@ -400,14 +399,14 @@ template <typename Index, bool BlockAccess> struct MemcpyTriggerForSlicing<Index
};
#endif
}
} // namespace internal
// Eval as rvalue
template<typename StartIndices, typename Sizes, typename ArgType, typename Device>
struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
{
typedef TensorSlicingOp<StartIndices, Sizes, ArgType> XprType;
static const int NumDims = internal::array_size<Sizes>::value;
static constexpr int NumDims = internal::array_size<Sizes>::value;
typedef typename XprType::Index Index;
typedef typename XprType::Scalar Scalar;
@@ -415,9 +414,10 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
typedef Sizes Dimensions;
typedef StorageMemory<CoeffReturnType, Device> Storage;
typedef StorageMemory<typename internal::remove_const<CoeffReturnType>::type, Device> ConstCastStorage;
typedef StorageMemory<std::remove_const_t<CoeffReturnType>, Device> ConstCastStorage;
typedef typename Storage::Type EvaluatorPointerType;
static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
enum {
// Alignment can't be guaranteed at compile time since it depends on the
// slice offsets and sizes.
@@ -425,14 +425,13 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess &&
// FIXME: Temporary workaround for bug in slicing of bool tensors.
!internal::is_same<typename internal::remove_const<Scalar>::type, bool>::value,
!internal::is_same<std::remove_const_t<Scalar>, bool>::value,
PreferBlockAccess = true,
Layout = TensorEvaluator<ArgType, Device>::Layout,
CoordAccess = false,
RawAccess = false
};
typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
typedef std::remove_const_t<Scalar> ScalarNoConst;
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
@@ -492,7 +491,7 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
m_impl.evalSubExprsIfNeeded(NULL);
if (!NumTraits<typename internal::remove_const<Scalar>::type>::RequireInitialization
if (!NumTraits<std::remove_const_t<Scalar>>::RequireInitialization
&& data && m_impl.data()) {
Index contiguous_values = 1;
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
@@ -511,7 +510,7 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
}
}
// Use memcpy if it's going to be faster than using the regular evaluation.
const MemcpyTriggerForSlicing<Index, Device, BlockAccess> trigger(m_device);
const internal::MemcpyTriggerForSlicing<Index, Device, BlockAccess> trigger(m_device);
if (trigger(internal::array_prod(dimensions()), contiguous_values)) {
EvaluatorPointerType src = (EvaluatorPointerType)m_impl.data();
for (Index i = 0; i < internal::array_prod(dimensions()); i += contiguous_values) {
@@ -588,7 +587,7 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
return rslt;
}
else {
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[packetSize];
EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[packetSize];
values[0] = m_impl.coeff(inputIndices[0]);
values[packetSize-1] = m_impl.coeff(inputIndices[1]);
EIGEN_UNROLL_LOOP
@@ -705,7 +704,7 @@ struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
{
typedef TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device> Base;
typedef TensorSlicingOp<StartIndices, Sizes, ArgType> XprType;
static const int NumDims = internal::array_size<Sizes>::value;
static constexpr int NumDims = internal::array_size<Sizes>::value;
typedef typename XprType::Index Index;
typedef typename XprType::Scalar Scalar;
@@ -713,17 +712,17 @@ struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
typedef Sizes Dimensions;
static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
enum {
IsAligned = false,
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
PreferBlockAccess = true,
Layout = TensorEvaluator<ArgType, Device>::Layout,
CoordAccess = false,
RawAccess = (NumDims == 1) & TensorEvaluator<ArgType, Device>::RawAccess
};
typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
typedef std::remove_const_t<Scalar> ScalarNoConst;
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
@@ -811,9 +810,9 @@ struct traits<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprTyp
typedef typename XprTraits::StorageKind StorageKind;
typedef typename XprTraits::Index Index;
typedef typename XprType::Nested Nested;
typedef typename remove_reference<Nested>::type _Nested;
static const int NumDimensions = array_size<StartIndices>::value;
static const int Layout = XprTraits::Layout;
typedef std::remove_reference_t<Nested> Nested_;
static constexpr int NumDimensions = array_size<StartIndices>::value;
static constexpr int Layout = XprTraits::Layout;
typedef typename XprTraits::PointerType PointerType;
};
@@ -857,7 +856,7 @@ class TensorStridingSlicingOp : public TensorBase<TensorStridingSlicingOp<StartI
const StartIndices& strides() const { return m_strides; }
EIGEN_DEVICE_FUNC
const typename internal::remove_all<typename XprType::Nested>::type&
const internal::remove_all_t<typename XprType::Nested>&
expression() const { return m_xpr; }
EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorStridingSlicingOp)
@@ -874,7 +873,7 @@ template<typename StartIndices, typename StopIndices, typename Strides, typename
struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device>
{
typedef TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType> XprType;
static const int NumDims = internal::array_size<Strides>::value;
static constexpr int NumDims = internal::array_size<Strides>::value;
typedef typename XprType::Index Index;
typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType;
@@ -883,6 +882,7 @@ struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices,
typedef typename Storage::Type EvaluatorPointerType;
typedef Strides Dimensions;
static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
enum {
// Alignment can't be guaranteed at compile time since it depends on the
// slice offsets and sizes.
@@ -890,7 +890,6 @@ struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices,
PacketAccess = false,
BlockAccess = false,
PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
Layout = TensorEvaluator<ArgType, Device>::Layout,
RawAccess = false
};
@@ -1060,14 +1059,14 @@ struct TensorEvaluator<TensorStridingSlicingOp<StartIndices, StopIndices, Stride
{
typedef TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device> Base;
typedef TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType> XprType;
static const int NumDims = internal::array_size<Strides>::value;
static constexpr int NumDims = internal::array_size<Strides>::value;
static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
enum {
IsAligned = false,
PacketAccess = false,
BlockAccess = false,
PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
Layout = TensorEvaluator<ArgType, Device>::Layout,
CoordAccess = TensorEvaluator<ArgType, Device>::CoordAccess,
RawAccess = false
};

View File

@@ -10,6 +10,8 @@
#ifndef EIGEN_CXX11_TENSOR_TENSOR_PADDING_H
#define EIGEN_CXX11_TENSOR_TENSOR_PADDING_H
#include "./InternalHeaderCheck.h"
namespace Eigen {
/** \class TensorPadding
@@ -28,9 +30,9 @@ struct traits<TensorPaddingOp<PaddingDimensions, XprType> > : public traits<XprT
typedef typename XprTraits::StorageKind StorageKind;
typedef typename XprTraits::Index Index;
typedef typename XprType::Nested Nested;
typedef typename remove_reference<Nested>::type _Nested;
static const int NumDimensions = XprTraits::NumDimensions;
static const int Layout = XprTraits::Layout;
typedef std::remove_reference_t<Nested> Nested_;
static constexpr int NumDimensions = XprTraits::NumDimensions;
static constexpr int Layout = XprTraits::Layout;
typedef typename XprTraits::PointerType PointerType;
};
@@ -70,7 +72,7 @@ class TensorPaddingOp : public TensorBase<TensorPaddingOp<PaddingDimensions, Xpr
Scalar padding_value() const { return m_padding_value; }
EIGEN_DEVICE_FUNC
const typename internal::remove_all<typename XprType::Nested>::type&
const internal::remove_all_t<typename XprType::Nested>&
expression() const { return m_xpr; }
protected:
@@ -86,26 +88,26 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
{
typedef TensorPaddingOp<PaddingDimensions, ArgType> XprType;
typedef typename XprType::Index Index;
static const int NumDims = internal::array_size<PaddingDimensions>::value;
static constexpr int NumDims = internal::array_size<PaddingDimensions>::value;
typedef DSizes<Index, NumDims> Dimensions;
typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
typedef StorageMemory<CoeffReturnType, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
enum {
IsAligned = true,
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
BlockAccess = TensorEvaluator<ArgType, Device>::RawAccess,
PreferBlockAccess = true,
Layout = TensorEvaluator<ArgType, Device>::Layout,
CoordAccess = true,
RawAccess = false
};
typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
typedef std::remove_const_t<Scalar> ScalarNoConst;
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
@@ -510,35 +512,20 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isPaddingAtIndexForDim(
Index index, int dim_index) const {
#if defined(EIGEN_HAS_INDEX_LIST)
return (!internal::index_pair_first_statically_eq<PaddingDimensions>(dim_index, 0) &&
index < m_padding[dim_index].first) ||
(!internal::index_pair_second_statically_eq<PaddingDimensions>(dim_index, 0) &&
index >= m_dimensions[dim_index] - m_padding[dim_index].second);
#else
return (index < m_padding[dim_index].first) ||
(index >= m_dimensions[dim_index] - m_padding[dim_index].second);
#endif
}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isLeftPaddingCompileTimeZero(
int dim_index) const {
#if defined(EIGEN_HAS_INDEX_LIST)
return internal::index_pair_first_statically_eq<PaddingDimensions>(dim_index, 0);
#else
EIGEN_UNUSED_VARIABLE(dim_index);
return false;
#endif
}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isRightPaddingCompileTimeZero(
int dim_index) const {
#if defined(EIGEN_HAS_INDEX_LIST)
return internal::index_pair_second_statically_eq<PaddingDimensions>(dim_index, 0);
#else
EIGEN_UNUSED_VARIABLE(dim_index);
return false;
#endif
}
@@ -564,7 +551,6 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetColMajor(Index index) const
{
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
const Index initialIndex = index;
@@ -622,7 +608,6 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetRowMajor(Index index) const
{
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
const Index initialIndex = index;
@@ -680,7 +665,7 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index) const
{
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
EIGEN_UNROLL_LOOP
for (int i = 0; i < PacketSize; ++i) {
values[i] = coeff(index+i);

View File

@@ -10,6 +10,8 @@
#ifndef EIGEN_CXX11_TENSOR_TENSOR_PATCH_H
#define EIGEN_CXX11_TENSOR_TENSOR_PATCH_H
#include "./InternalHeaderCheck.h"
namespace Eigen {
/** \class TensorPatch
@@ -28,9 +30,9 @@ struct traits<TensorPatchOp<PatchDim, XprType> > : public traits<XprType>
typedef typename XprTraits::StorageKind StorageKind;
typedef typename XprTraits::Index Index;
typedef typename XprType::Nested Nested;
typedef typename remove_reference<Nested>::type _Nested;
static const int NumDimensions = XprTraits::NumDimensions + 1;
static const int Layout = XprTraits::Layout;
typedef std::remove_reference_t<Nested> Nested_;
static constexpr int NumDimensions = XprTraits::NumDimensions + 1;
static constexpr int Layout = XprTraits::Layout;
typedef typename XprTraits::PointerType PointerType;
};
@@ -68,7 +70,7 @@ class TensorPatchOp : public TensorBase<TensorPatchOp<PatchDim, XprType>, ReadOn
const PatchDim& patch_dims() const { return m_patch_dims; }
EIGEN_DEVICE_FUNC
const typename internal::remove_all<typename XprType::Nested>::type&
const internal::remove_all_t<typename XprType::Nested>&
expression() const { return m_xpr; }
protected:
@@ -83,22 +85,21 @@ struct TensorEvaluator<const TensorPatchOp<PatchDim, ArgType>, Device>
{
typedef TensorPatchOp<PatchDim, ArgType> XprType;
typedef typename XprType::Index Index;
static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value + 1;
static constexpr int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value + 1;
typedef DSizes<Index, NumDims> Dimensions;
typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
typedef StorageMemory<CoeffReturnType, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
enum {
IsAligned = false,
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
BlockAccess = false,
PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
Layout = TensorEvaluator<ArgType, Device>::Layout,
CoordAccess = false,
RawAccess = false
};
@@ -195,7 +196,6 @@ struct TensorEvaluator<const TensorPatchOp<PatchDim, ArgType>, Device>
template<int LoadMode>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
{
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
Index output_stride_index = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? NumDims - 1 : 0;

View File

@@ -11,12 +11,12 @@
#ifndef EIGEN_CXX11_TENSOR_TENSOR_RANDOM_H
#define EIGEN_CXX11_TENSOR_TENSOR_RANDOM_H
#include "./InternalHeaderCheck.h"
namespace Eigen {
namespace internal {
namespace {
EIGEN_DEVICE_FUNC uint64_t get_random_seed() {
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE uint64_t get_random_seed() {
#if defined(EIGEN_GPU_COMPILE_PHASE)
// We don't support 3d kernels since we currently only use 1 and
// 2d kernels.
@@ -29,7 +29,7 @@ EIGEN_DEVICE_FUNC uint64_t get_random_seed() {
#endif
}
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE unsigned PCG_XSH_RS_generator(uint64_t* state, uint64_t stream) {
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE unsigned PCG_XSH_RS_generator(uint64_t* state, uint64_t stream) {
// TODO: Unify with the implementation in the non blocking thread pool.
uint64_t current = *state;
// Update the internal state
@@ -38,14 +38,11 @@ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE unsigned PCG_XSH_RS_generator(uint6
return static_cast<unsigned>((current ^ (current >> 22)) >> (22 + (current >> 61)));
}
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE uint64_t PCG_XSH_RS_state(uint64_t seed) {
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE uint64_t PCG_XSH_RS_state(uint64_t seed) {
seed = seed ? seed : get_random_seed();
return seed * 6364136223846793005ULL + 0xda3e39cb94b95bdbULL;
}
} // namespace
template <typename T> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
T RandomToTypeUniform(uint64_t* state, uint64_t stream) {
unsigned rnd = PCG_XSH_RS_generator(state, stream);
@@ -123,7 +120,7 @@ std::complex<double> RandomToTypeUniform<std::complex<double> >(uint64_t* state,
template <typename T> class UniformRandomGenerator {
public:
static const bool PacketAccess = true;
static constexpr bool PacketAccess = true;
// Uses the given "seed" if non-zero, otherwise uses a random seed.
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE UniformRandomGenerator(
@@ -131,7 +128,7 @@ template <typename T> class UniformRandomGenerator {
m_state = PCG_XSH_RS_state(seed);
#ifdef EIGEN_USE_SYCL
// In SYCL it is not possible to build PCG_XSH_RS_state in one step.
// Therefor, we need two step to initializate the m_state.
// Therefore, we need two steps to initializate the m_state.
// IN SYCL, the constructor of the functor is s called on the CPU
// and we get the clock seed here from the CPU. However, This seed is
//the same for all the thread. As unlike CUDA, the thread.ID, BlockID, etc is not a global function.
@@ -140,7 +137,7 @@ template <typename T> class UniformRandomGenerator {
// but for SYCL ((CLOCK * 6364136223846793005ULL) + 0xda3e39cb94b95bdbULL) is passed to each thread and each thread adds
// the (global_thread_id* 6364136223846793005ULL) for itself only once, in order to complete the construction
// similar to CUDA Therefore, the thread Id injection is not available at this stage.
//However when the operator() is called the thread ID will be avilable. So inside the opeator,
//However when the operator() is called the thread ID will be available. So inside the opeator,
// we add the thrreadID, BlockId,... (which is equivalent of i)
//to the seed and construct the unique m_state per thead similar to cuda.
m_exec_once =false;
@@ -237,20 +234,20 @@ std::complex<double> RandomToTypeNormal<std::complex<double> >(uint64_t* state,
template <typename T> class NormalRandomGenerator {
public:
static const bool PacketAccess = true;
static constexpr bool PacketAccess = true;
// Uses the given "seed" if non-zero, otherwise uses a random seed.
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE NormalRandomGenerator(uint64_t seed = 0) {
m_state = PCG_XSH_RS_state(seed);
#ifdef EIGEN_USE_SYCL
// In SYCL it is not possible to build PCG_XSH_RS_state in one step.
// Therefor, we need two steps to initializate the m_state.
// Therefore, we need two steps to initializate the m_state.
// IN SYCL, the constructor of the functor is s called on the CPU
// and we get the clock seed here from the CPU. However, This seed is
//the same for all the thread. As unlike CUDA, the thread.ID, BlockID, etc is not a global function.
// and only available on the Operator() function (which is called on the GPU).
// Therefore, the thread Id injection is not available at this stage. However when the operator()
//is called the thread ID will be avilable. So inside the opeator,
//is called the thread ID will be available. So inside the operator,
// we add the thrreadID, BlockId,... (which is equivalent of i)
//to the seed and construct the unique m_state per thead similar to cuda.
m_exec_once =false;

View File

@@ -21,6 +21,7 @@
#endif
#endif
#include "./InternalHeaderCheck.h"
namespace Eigen {
@@ -42,8 +43,8 @@ namespace internal {
typedef typename XprTraits::StorageKind StorageKind;
typedef typename XprTraits::Index Index;
typedef typename XprType::Nested Nested;
static const int NumDimensions = XprTraits::NumDimensions - array_size<Dims>::value;
static const int Layout = XprTraits::Layout;
static constexpr int NumDimensions = XprTraits::NumDimensions - array_size<Dims>::value;
static constexpr int Layout = XprTraits::Layout;
typedef typename XprTraits::PointerType PointerType;
template <class T> struct MakePointer {
@@ -107,7 +108,6 @@ struct preserve_inner_most_dims {
static const bool value = false;
};
#if EIGEN_HAS_CONSTEXPR && EIGEN_HAS_VARIADIC_TEMPLATES
template <typename ReducedDims, int NumTensorDims>
struct are_inner_most_dims<ReducedDims, NumTensorDims, ColMajor>{
static const bool tmp1 = indices_statically_known_to_increase<ReducedDims>();
@@ -136,7 +136,6 @@ struct preserve_inner_most_dims<ReducedDims, NumTensorDims, RowMajor>{
static const bool tmp2 = index_statically_lt<ReducedDims>(array_size<ReducedDims>::value - 1, NumTensorDims - 1);
static const bool value = tmp1 & tmp2;
};
#endif
template <int DimIndex, typename Self, typename Op>
@@ -166,8 +165,12 @@ struct GenericDimReducer<-1, Self, Op> {
};
template <typename Self, typename Op, bool Vectorizable = (Self::InputPacketAccess && Self::ReducerTraits::PacketAccess),
bool UseTreeReduction = (!Self::ReducerTraits::IsStateful &&
!Self::ReducerTraits::IsExactlyAssociative)>
bool UseTreeReduction = (!Self::ReducerTraits::IsStateful &&
!Self::ReducerTraits::IsExactlyAssociative &&
// GPU threads can quickly run out of stack space
// for moderately sized inputs.
!Self::RunningOnGPU
)>
struct InnerMostDimReducer {
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Self::CoeffReturnType reduce(const Self& self, typename Self::Index firstIndex, typename Self::Index numValuesToReduce, Op& reducer) {
typename Self::CoeffReturnType accum = reducer.initialize();
@@ -180,42 +183,77 @@ struct InnerMostDimReducer {
template <typename Self, typename Op>
struct InnerMostDimReducer<Self, Op, true, false> {
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Self::CoeffReturnType reduce(const Self& self, typename Self::Index firstIndex, typename Self::Index numValuesToReduce, Op& reducer) {
const typename Self::Index packetSize = internal::unpacket_traits<typename Self::PacketReturnType>::size;
const typename Self::Index VectorizedSize = (numValuesToReduce / packetSize) * packetSize;
typename Self::PacketReturnType paccum = reducer.template initializePacket<typename Self::PacketReturnType>();
for (typename Self::Index j = 0; j < VectorizedSize; j += packetSize) {
reducer.reducePacket(self.m_impl.template packet<Unaligned>(firstIndex + j), &paccum);
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Self::CoeffReturnType reduce(const Self& self, typename Self::Index firstIndex, typename Self::Index numValuesToReduce, Op& reducer0) {
using Index = typename Self::Index;
constexpr Index packetSize = internal::unpacket_traits<typename Self::PacketReturnType>::size;
Index start = 0;
typename Self::PacketReturnType paccum0 = reducer0.template initializePacket<typename Self::PacketReturnType>();
if (!Self::ReducerTraits::IsStateful && numValuesToReduce >= 4*packetSize) {
const Index VectorizedSize4 = (numValuesToReduce / (4*packetSize)) * (4*packetSize);
typename Self::PacketReturnType paccum1 = reducer0.template initializePacket<typename Self::PacketReturnType>();
typename Self::PacketReturnType paccum2 = reducer0.template initializePacket<typename Self::PacketReturnType>();
typename Self::PacketReturnType paccum3 = reducer0.template initializePacket<typename Self::PacketReturnType>();
const Index offset0 = firstIndex;
const Index offset1 = firstIndex + packetSize;
const Index offset2 = firstIndex + 2*packetSize;
const Index offset3 = firstIndex + 3*packetSize;
for (Index j = 0; j < VectorizedSize4; j += 4*packetSize) {
reducer0.reducePacket(self.m_impl.template packet<Unaligned>(offset0 + j), &paccum0);
reducer0.reducePacket(self.m_impl.template packet<Unaligned>(offset1 + j), &paccum1);
reducer0.reducePacket(self.m_impl.template packet<Unaligned>(offset2 + j), &paccum2);
reducer0.reducePacket(self.m_impl.template packet<Unaligned>(offset3 + j), &paccum3);
}
reducer0.reducePacket(paccum1, &paccum0);
reducer0.reducePacket(paccum2, &paccum0);
reducer0.reducePacket(paccum3, &paccum0);
start = VectorizedSize4;
}
typename Self::CoeffReturnType accum = reducer.initialize();
for (typename Self::Index j = VectorizedSize; j < numValuesToReduce; ++j) {
reducer.reduce(self.m_impl.coeff(firstIndex + j), &accum);
if (start <= (numValuesToReduce - packetSize)) {
const Index VectorizedSize = (numValuesToReduce / packetSize) * packetSize;
for (Index j = start; j < VectorizedSize; j += packetSize) {
reducer0.reducePacket(self.m_impl.template packet<Unaligned>(firstIndex + j), &paccum0);
}
start = VectorizedSize;
}
return reducer.finalizeBoth(accum, paccum);
typename Self::CoeffReturnType accum = reducer0.initialize();
for (Index j = start; j < numValuesToReduce; ++j) {
reducer0.reduce(self.m_impl.coeff(firstIndex + j), &accum);
}
return reducer0.finalizeBoth(accum, paccum0);
}
};
#if !defined(EIGEN_HIPCC)
static const int kLeafSize = 1024;
#if !defined(EIGEN_HIPCC)
// The following implements tree-based reduction, which improves the accuracy
// of sum and mean reductions, since each of the n inputs only participates in
// O(log n) additions.
template <typename T>
EIGEN_DEVICE_FUNC inline Index LeafSize() { return 1024; }
template <>
EIGEN_DEVICE_FUNC inline Index LeafSize<half>() { return 200; }
template <>
EIGEN_DEVICE_FUNC inline Index LeafSize<bfloat16>() { return 128; }
template <typename Self, typename Op>
struct InnerMostDimReducer<Self, Op, false, true> {
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Self::CoeffReturnType
reduce(const Self& self, typename Self::Index firstIndex,
typename Self::Index numValuesToReduce, Op& reducer) {
const Index kLeafSize = LeafSize<typename Self::CoeffReturnType>();
typename Self::CoeffReturnType accum = reducer.initialize();
if (numValuesToReduce > kLeafSize) {
const typename Self::Index half = numValuesToReduce / 2;
// Recursively reduce the two halves.
reducer.reduce(reduce(self, firstIndex, half, reducer), &accum);
reducer.reduce(
reduce(self, firstIndex + half, numValuesToReduce - half, reducer),
&accum);
return reducer.finalize(accum);
} else {
for (typename Self::Index j = 0; j < numValuesToReduce; ++j) {
reducer.reduce(self.m_impl.coeff(firstIndex + j), &accum);
}
return InnerMostDimReducer<Self, Op, false, false>::reduce(self, firstIndex, numValuesToReduce, reducer);
}
return reducer.finalize(accum);
}
};
@@ -224,6 +262,7 @@ struct InnerMostDimReducer<Self, Op, true, true> {
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Self::CoeffReturnType
reduce(const Self& self, typename Self::Index firstIndex,
typename Self::Index numValuesToReduce, Op& reducer) {
const Index kLeafSize = LeafSize<typename Self::CoeffReturnType>();
const typename Self::Index packetSize =
internal::unpacket_traits<typename Self::PacketReturnType>::size;
typename Self::CoeffReturnType accum = reducer.initialize();
@@ -242,36 +281,12 @@ struct InnerMostDimReducer<Self, Op, true, true> {
}
return reducer.finalize(accum);
} else {
const typename Self::Index UnrollSize =
(numValuesToReduce / (2*packetSize)) * 2*packetSize;
const typename Self::Index VectorizedSize =
(numValuesToReduce / packetSize) * packetSize;
typename Self::PacketReturnType paccum =
reducer.template initializePacket<typename Self::PacketReturnType>();
typename Self::PacketReturnType paccum2 =
reducer.template initializePacket<typename Self::PacketReturnType>();
for (typename Self::Index j = 0; j < UnrollSize; j += packetSize * 2) {
reducer.reducePacket(
self.m_impl.template packet<Unaligned>(firstIndex + j), &paccum);
reducer.reducePacket(
self.m_impl.template packet<Unaligned>(firstIndex + j + packetSize),
&paccum2);
}
for (typename Self::Index j = UnrollSize; j < VectorizedSize; j+= packetSize) {
reducer.reducePacket(self.m_impl.template packet<Unaligned>(
firstIndex + j), &paccum);
}
reducer.reducePacket(paccum2, &paccum);
for (typename Self::Index j = VectorizedSize; j < numValuesToReduce;
++j) {
reducer.reduce(self.m_impl.coeff(firstIndex + j), &accum);
}
return reducer.finalizeBoth(accum, paccum);
return InnerMostDimReducer<Self, Op, true, false>::reduce(self, firstIndex, numValuesToReduce, reducer);
}
}
};
#endif
template <int DimIndex, typename Self, typename Op, bool vectorizable = (Self::InputPacketAccess && Self::ReducerTraits::PacketAccess)>
struct InnerMostDimPreserver {
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self&, typename Self::Index, Op&, typename Self::PacketReturnType*) {
@@ -292,10 +307,37 @@ struct InnerMostDimPreserver<DimIndex, Self, Op, true> {
template <typename Self, typename Op>
struct InnerMostDimPreserver<0, Self, Op, true> {
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self& self, typename Self::Index firstIndex, Op& reducer, typename Self::PacketReturnType* accum) {
for (typename Self::Index j = 0; j < self.m_reducedDims[0]; ++j) {
const typename Self::Index input = firstIndex + j * self.m_reducedStrides[0];
reducer.reducePacket(self.m_impl.template packet<Unaligned>(input), accum);
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self& self, typename Self::Index firstIndex, Op& reducer0, typename Self::PacketReturnType* accum0) {
using Index = typename Self::Index;
const Index stride = self.m_reducedStrides[0];
const Index size = self.m_reducedDims[0];
if (!Self::ReducerTraits::IsStateful && size >= 16) {
const Index unrolled_size4 = (size / 4) * 4;
typename Self::PacketReturnType accum1 = reducer0.template initializePacket<typename Self::PacketReturnType>();
typename Self::PacketReturnType accum2 = reducer0.template initializePacket<typename Self::PacketReturnType>();
typename Self::PacketReturnType accum3 = reducer0.template initializePacket<typename Self::PacketReturnType>();
for (Index j = 0; j < unrolled_size4; j += 4) {
const Index input0 = firstIndex + j * stride;
reducer0.reducePacket(self.m_impl.template packet<Unaligned>(input0), accum0);
const Index input1 = firstIndex + (j+1) * stride;
reducer0.reducePacket(self.m_impl.template packet<Unaligned>(input1), &accum1);
const Index input2 = firstIndex + (j+2) * stride;
reducer0.reducePacket(self.m_impl.template packet<Unaligned>(input2), &accum2);
const Index input3 = firstIndex + (j+3) * stride;
reducer0.reducePacket(self.m_impl.template packet<Unaligned>(input3), &accum3);
}
reducer0.reducePacket(accum1, accum0);
reducer0.reducePacket(accum2, accum0);
reducer0.reducePacket(accum3, accum0);
for (Index j = unrolled_size4; j < size; ++j) {
Index input = firstIndex + j * stride;
reducer0.reducePacket(self.m_impl.template packet<Unaligned>(input), accum0);
}
} else {
for (Index j = 0; j < size; ++j) {
Index input = firstIndex + j * stride;
reducer0.reducePacket(self.m_impl.template packet<Unaligned>(input), accum0);
}
}
}
};
@@ -309,7 +351,7 @@ struct InnerMostDimPreserver<-1, Self, Op, true> {
// Default full reducer
template <typename Self, typename Op, typename Device, bool Vectorizable = (Self::InputPacketAccess && Self::ReducerTraits::PacketAccess)>
struct FullReducer {
static const bool HasOptimizedImplementation = false;
static constexpr bool HasOptimizedImplementation = false;
static EIGEN_DEVICE_FUNC void run(const Self& self, Op& reducer, const Device&, typename Self::EvaluatorPointerType output) {
const typename Self::Index num_coeffs = array_prod(self.m_impl.dimensions());
@@ -334,8 +376,8 @@ struct FullReducerShard {
// Multithreaded full reducer
template <typename Self, typename Op, bool Vectorizable>
struct FullReducer<Self, Op, ThreadPoolDevice, Vectorizable> {
static const bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful;
static const Index PacketSize =
static constexpr bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful;
static constexpr Index PacketSize =
unpacket_traits<typename Self::PacketReturnType>::size;
// launch one reducer per thread and accumulate the result.
@@ -351,15 +393,14 @@ struct FullReducer<Self, Op, ThreadPoolDevice, Vectorizable> {
self.m_impl.costPerCoeff(Vectorizable) +
TensorOpCost(0, 0, internal::functor_traits<Op>::Cost, Vectorizable,
PacketSize);
const int num_threads = TensorCostModel<ThreadPoolDevice>::numThreads(
const Index num_threads = TensorCostModel<ThreadPoolDevice>::numThreads(
num_coeffs, cost, device.numThreads());
if (num_threads == 1) {
*output =
InnerMostDimReducer<Self, Op, Vectorizable>::reduce(self, 0, num_coeffs, reducer);
return;
}
const Index blocksize =
std::floor<Index>(static_cast<float>(num_coeffs) / num_threads);
const Index blocksize = num_coeffs / num_threads;
const Index numblocks = blocksize > 0 ? num_coeffs / blocksize : 0;
eigen_assert(num_coeffs >= numblocks * blocksize);
@@ -393,7 +434,7 @@ struct FullReducer<Self, Op, ThreadPoolDevice, Vectorizable> {
// Default inner reducer
template <typename Self, typename Op, typename Device>
struct InnerReducer {
static const bool HasOptimizedImplementation = false;
static constexpr bool HasOptimizedImplementation = false;
EIGEN_DEVICE_FUNC static bool run(const Self&, Op&, const Device&, typename Self::CoeffReturnType*, typename Self::Index, typename Self::Index) {
eigen_assert(false && "Not implemented");
@@ -404,7 +445,7 @@ struct InnerReducer {
// Default outer reducer
template <typename Self, typename Op, typename Device>
struct OuterReducer {
static const bool HasOptimizedImplementation = false;
static constexpr bool HasOptimizedImplementation = false;
EIGEN_DEVICE_FUNC static bool run(const Self&, Op&, const Device&, typename Self::CoeffReturnType*, typename Self::Index, typename Self::Index) {
eigen_assert(false && "Not implemented");
@@ -416,7 +457,7 @@ struct OuterReducer {
// Default Generic reducer
template <typename Self, typename Op, typename Device>
struct GenericReducer {
static const bool HasOptimizedImplementation = false;
static constexpr bool HasOptimizedImplementation = false;
EIGEN_DEVICE_FUNC static bool run(const Self&, Op&, const Device&, typename Self::CoeffReturnType*, typename Self::Index, typename Self::Index) {
eigen_assert(false && "Not implemented");
@@ -458,9 +499,9 @@ __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void OuterReductionKernel(R, const S, I_
template <typename Op, typename CoeffReturnType>
struct ReductionReturnType {
#if defined(EIGEN_USE_SYCL)
typedef typename remove_const<decltype(std::declval<Op>().initialize())>::type type;
typedef std::remove_const_t<decltype(std::declval<Op>().initialize())> type;
#else
typedef typename remove_const<CoeffReturnType>::type type;
typedef std::remove_const_t<CoeffReturnType> type;
#endif
};
@@ -472,7 +513,7 @@ class TensorReductionOp : public TensorBase<TensorReductionOp<Op, Dims, XprType,
public:
typedef typename Eigen::internal::traits<TensorReductionOp>::Scalar Scalar;
typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
typedef std::remove_const_t<typename XprType::CoeffReturnType> CoeffReturnType;
typedef typename Eigen::internal::nested<TensorReductionOp>::type Nested;
typedef typename Eigen::internal::traits<TensorReductionOp>::StorageKind StorageKind;
typedef typename Eigen::internal::traits<TensorReductionOp>::Index Index;
@@ -510,44 +551,56 @@ struct TensorReductionEvaluatorBase<const TensorReductionOp<Op, Dims, ArgType, M
typedef typename XprType::Index Index;
typedef ArgType ChildType;
typedef typename TensorEvaluator<ArgType, Device>::Dimensions InputDimensions;
static const int NumInputDims = internal::array_size<InputDimensions>::value;
static const int NumReducedDims = internal::array_size<Dims>::value;
static const int NumOutputDims = NumInputDims - NumReducedDims;
typedef typename internal::conditional<NumOutputDims==0, Sizes<>, DSizes<Index, NumOutputDims> >::type Dimensions;
static constexpr int NumInputDims = internal::array_size<InputDimensions>::value;
static constexpr int NumReducedDims = internal::array_size<Dims>::value;
static constexpr int NumOutputDims = NumInputDims - NumReducedDims;
typedef std::conditional_t<NumOutputDims==0, Sizes<>, DSizes<Index, NumOutputDims> > Dimensions;
typedef typename XprType::Scalar Scalar;
typedef TensorReductionEvaluatorBase<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Device> Self;
static const bool InputPacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess;
static constexpr bool InputPacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess;
typedef typename internal::ReductionReturnType<Op, typename XprType::CoeffReturnType>::type CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const Index PacketSize = PacketType<CoeffReturnType, Device>::size;
static constexpr Index PacketSize = PacketType<CoeffReturnType, Device>::size;
typedef typename Eigen::internal::traits<XprType>::PointerType TensorPointerType;
typedef StorageMemory<CoeffReturnType, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
// Subset of strides of the input tensor for the non-reduced dimensions.
// Subset of strides of the input tensor for the non-reduced dimensions.
// Indexed by output dimensions.
static const int NumPreservedStrides = max_n_1<NumOutputDims>::size;
static constexpr int NumPreservedStrides = max_n_1<NumOutputDims>::size;
// For full reductions
#if defined(EIGEN_USE_GPU) && (defined(EIGEN_GPUCC))
static constexpr bool RunningOnGPU = internal::is_same<Device, Eigen::GpuDevice>::value;
static constexpr bool RunningOnSycl = false;
#elif defined(EIGEN_USE_SYCL)
static constexpr bool RunningOnSycl = internal::is_same<internal::remove_all_t<Device>, Eigen::SyclDevice>::value;
static constexpr bool RunningOnGPU = false;
#else
static constexpr bool RunningOnGPU = false;
static constexpr bool RunningOnSycl = false;
#endif
static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
enum {
IsAligned = false,
PacketAccess = Self::InputPacketAccess && ReducerTraits::PacketAccess,
BlockAccess = false,
PreferBlockAccess = true,
Layout = TensorEvaluator<ArgType, Device>::Layout,
CoordAccess = false, // to be implemented
RawAccess = false
};
typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
typedef std::remove_const_t<Scalar> ScalarNoConst;
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
typedef internal::TensorBlockNotImplemented TensorBlock;
//===--------------------------------------------------------------------===//
static const bool ReducingInnerMostDims = internal::are_inner_most_dims<Dims, NumInputDims, Layout>::value;
static const bool PreservingInnerMostDims = internal::preserve_inner_most_dims<Dims, NumInputDims, Layout>::value;
static const bool RunningFullReduction = (NumOutputDims==0);
static constexpr bool ReducingInnerMostDims = internal::are_inner_most_dims<Dims, NumInputDims, Layout>::value;
static constexpr bool PreservingInnerMostDims = internal::preserve_inner_most_dims<Dims, NumInputDims, Layout>::value;
static constexpr bool RunningFullReduction = (NumOutputDims==0);
EIGEN_STRONG_INLINE TensorReductionEvaluatorBase(const XprType& op, const Device& device)
: m_impl(op.expression(), device), m_reducer(op.reducer()), m_result(NULL), m_device(device)
@@ -578,7 +631,7 @@ struct TensorReductionEvaluatorBase<const TensorReductionOp<Op, Dims, ArgType, M
m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i]);
}
} else {
m_outputStrides[NumOutputDims - 1] = 1;
m_outputStrides[static_cast<size_t>(NumOutputDims - 1)] = 1;
for (int i = NumOutputDims - 2; i >= 0; --i) {
m_outputStrides[i] = m_outputStrides[i + 1] * m_dimensions[i + 1];
m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i]);
@@ -625,7 +678,7 @@ struct TensorReductionEvaluatorBase<const TensorReductionOp<Op, Dims, ArgType, M
? internal::array_prod(input_dims)
: (static_cast<int>(Layout) == static_cast<int>(ColMajor))
? m_preservedStrides[0]
: m_preservedStrides[NumOutputDims - 1];
: m_preservedStrides[static_cast<size_t>(NumOutputDims - 1)];
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
@@ -784,14 +837,13 @@ struct TensorReductionEvaluatorBase<const TensorReductionOp<Op, Dims, ArgType, M
template<int LoadMode>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
{
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
eigen_assert(index + PacketSize - 1 < Index(internal::array_prod(dimensions())));
if (RunningOnGPU && m_result) {
return internal::pload<PacketReturnType>(m_result + index);
}
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
if (ReducingInnerMostDims) {
const Index num_values_to_reduce =
(static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? m_preservedStrides[0] : m_preservedStrides[NumPreservedStrides - 1];
@@ -950,17 +1002,6 @@ struct TensorReductionEvaluatorBase<const TensorReductionOp<Op, Dims, ArgType, M
// Operation to apply for computing the reduction.
Op m_reducer;
// For full reductions
#if defined(EIGEN_USE_GPU) && (defined(EIGEN_GPUCC))
static const bool RunningOnGPU = internal::is_same<Device, Eigen::GpuDevice>::value;
static const bool RunningOnSycl = false;
#elif defined(EIGEN_USE_SYCL)
static const bool RunningOnSycl = internal::is_same<typename internal::remove_all<Device>::type, Eigen::SyclDevice>::value;
static const bool RunningOnGPU = false;
#else
static const bool RunningOnGPU = false;
static const bool RunningOnSycl = false;
#endif
EvaluatorPointerType m_result;
const Device EIGEN_DEVICE_REF m_device;

View File

@@ -1,6 +0,0 @@
#if defined(__clang__) || defined(__GNUC__)
#warning "Deprecated header file, please either include the main Eigen/CXX11/Tensor header or the respective TensorReductionGpu.h file"
#endif
#include "TensorReductionGpu.h"

View File

@@ -10,6 +10,8 @@
#ifndef EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_GPU_H
#define EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_GPU_H
#include "./InternalHeaderCheck.h"
namespace Eigen {
namespace internal {
@@ -98,6 +100,7 @@ __device__ inline void atomicReduce(half2* output, half2 accum, R& reducer) {
}
}
}
#ifdef EIGEN_GPU_COMPILE_PHASE
// reduction should be associative since reduction is not atomic in wide vector but atomic in half2 operations
template <typename R>
__device__ inline void atomicReduce(Packet4h2* output, Packet4h2 accum, R& reducer) {
@@ -107,6 +110,7 @@ __device__ inline void atomicReduce(Packet4h2* output, Packet4h2 accum, R& reduc
atomicReduce(houtput+i,*(haccum+i),reducer);
}
}
#endif // EIGEN_GPU_COMPILE_PHASE
#endif // EIGEN_HAS_GPU_FP16
template <>
@@ -213,8 +217,8 @@ __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void FullReductionKernel(Reducer reducer
#ifdef EIGEN_HAS_GPU_FP16
template <typename Self,
typename Reducer, typename Index>
__global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void ReductionInitFullReduxKernelHalfFloat(Reducer reducer, const Self input, Index num_coeffs,
packet_traits<Eigen::half>::type* scratch) {
__global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void ReductionInitFullReduxKernelHalfFloat(
Reducer reducer, const Self input, Index num_coeffs, half* scratch) {
eigen_assert(blockDim.x == 1);
eigen_assert(gridDim.x == 1);
typedef packet_traits<Eigen::half>::type packet_type;
@@ -224,15 +228,16 @@ __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void ReductionInitFullReduxKernelHalfFlo
half2* h2scratch = reinterpret_cast<half2*>(scratch);
for (Index i = num_coeffs - packet_remainder; i + 2 <= num_coeffs; i += 2) {
*h2scratch =
__halves2half2(input.m_impl.coeff(i), input.m_impl.coeff(i + 1));
__halves2half2(input.coeff(i), input.coeff(i + 1));
h2scratch++;
}
if ((num_coeffs & 1) != 0) {
half lastCoeff = input.m_impl.coeff(num_coeffs - 1);
half lastCoeff = input.coeff(num_coeffs - 1);
*h2scratch = __halves2half2(lastCoeff, reducer.initialize());
}
} else {
*scratch = reducer.template initializePacket<packet_type>();
packet_type reduce = reducer.template initializePacket<packet_type>();
internal::pstoreu(scratch, reduce);
}
}
@@ -258,8 +263,9 @@ __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void ReductionInitKernelHalfFloat(Reduce
template <int BlockSize, int NumPerThread, typename Self,
typename Reducer, typename Index>
__global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void FullReductionKernelHalfFloat(Reducer reducer, const Self input, Index num_coeffs,
half* output, packet_traits<Eigen::half>::type* scratch) {
__global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void FullReductionKernelHalfFloat(
Reducer reducer, const Self input, Index num_coeffs,
half* output, half* scratch) {
typedef typename packet_traits<Eigen::half>::type PacketType;
const int packet_width = unpacket_traits<PacketType>::size;
eigen_assert(NumPerThread % packet_width == 0);
@@ -273,19 +279,20 @@ __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void FullReductionKernelHalfFloat(Reduce
int rem = num_coeffs % packet_width;
if (rem != 0) {
half2* p_scratch = reinterpret_cast<half2*>(scratch);
*scratch = reducer.template initializePacket<PacketType>();
pstoreu(scratch, reducer.template initializePacket<PacketType>());
for (int i = 0; i < rem / 2; i++) {
*p_scratch = __halves2half2(
input.m_impl.coeff(num_coeffs - packet_width + 2 * i),
input.m_impl.coeff(num_coeffs - packet_width + 2 * i + 1));
input.coeff(num_coeffs - packet_width + 2 * i),
input.coeff(num_coeffs - packet_width + 2 * i + 1));
p_scratch++;
}
if ((num_coeffs & 1) != 0) {
half last = input.m_impl.coeff(num_coeffs - 1);
half last = input.coeff(num_coeffs - 1);
*p_scratch = __halves2half2(last, reducer.initialize());
}
} else {
*scratch = reducer.template initializePacket<PacketType>();
PacketType reduce = reducer.template initializePacket<PacketType>();
pstoreu(scratch, reduce);
}
}
__syncthreads();
@@ -298,7 +305,7 @@ __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void FullReductionKernelHalfFloat(Reduce
for (Index i = 0; i < max_iter; i += BlockSize) {
const Index index = first_index + packet_width * i;
eigen_assert(index + packet_width < num_coeffs);
PacketType val = input.m_impl.template packet<Unaligned>(index);
PacketType val = input.template packet<Unaligned>(index);
reducer.reducePacket(val, &accum);
}
@@ -337,7 +344,7 @@ __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void FullReductionKernelHalfFloat(Reduce
}
if ((threadIdx.x & (warpSize - 1)) == 0) {
atomicReduce(scratch, accum, reducer);
atomicReduce(reinterpret_cast<PacketType*>(scratch), accum, reducer);
}
__syncthreads();
@@ -357,17 +364,21 @@ __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void FullReductionKernelHalfFloat(Reduce
}
template <typename Op>
__global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void ReductionCleanupKernelHalfFloat(Op reducer, half* output, packet_traits<Eigen::half>::type* scratch) {
__global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void ReductionCleanupKernelHalfFloat(Op reducer, half* output, half* scratch) {
eigen_assert(threadIdx.x == 1);
half2* pscratch = reinterpret_cast<half2*>(scratch);
half tmp = __float2half(0.f);
typedef packet_traits<Eigen::half>::type packet_type;
for (int i = 0; i < unpacket_traits<packet_type>::size; i += 2) {
reducer.reduce(__low2half(*pscratch), &tmp);
reducer.reduce(__high2half(*pscratch), &tmp);
pscratch++;
if (unpacket_traits<packet_type>::size == 1) {
*output = *scratch;
} else {
half2* pscratch = reinterpret_cast<half2*>(scratch);
half tmp = __float2half(0.f);
for (int i = 0; i < unpacket_traits<packet_type>::size; i += 2) {
reducer.reduce(__low2half(*pscratch), &tmp);
reducer.reduce(__high2half(*pscratch), &tmp);
pscratch++;
}
*output = tmp;
}
*output = tmp;
}
#endif // EIGEN_HAS_GPU_FP16
@@ -383,10 +394,10 @@ struct FullReductionLauncher {
template <typename Self, typename Op, typename OutputType, bool PacketAccess>
struct FullReductionLauncher<
Self, Op, OutputType, PacketAccess,
typename internal::enable_if<
std::enable_if_t<
internal::is_same<float, OutputType>::value ||
internal::is_same<double, OutputType>::value,
void>::type> {
void>> {
static void run(const Self& self, Op& reducer, const GpuDevice& device, OutputType* output, typename Self::Index num_coeffs) {
typedef typename Self::Index Index;
@@ -416,13 +427,11 @@ template <typename Self, typename Op>
struct FullReductionLauncher<Self, Op, Eigen::half, true> {
static void run(const Self& self, Op& reducer, const GpuDevice& device, half* output, typename Self::Index num_coeffs) {
typedef typename Self::Index Index;
typedef typename packet_traits<Eigen::half>::type PacketType;
const int block_size = 256;
const int num_per_thread = 128;
const int num_blocks = divup<int>(num_coeffs, block_size * num_per_thread);
PacketType* scratch = static_cast<PacketType*>(device.scratchpad());
// half2* scratch = static_cast<half2*>(device.scratchpad());
half* scratch = static_cast<half*>(device.scratchpad());
if (num_blocks > 1) {
// We initialize the output and the scrathpad outside the reduction kernel when we can't be sure that there
@@ -449,12 +458,12 @@ struct FullReducer<Self, Op, GpuDevice, Vectorizable> {
// so reduce the scope of the optimized version of the code to the simple cases
// of doubles, floats and half floats
#ifdef EIGEN_HAS_GPU_FP16
static const bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful &&
static constexpr bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful &&
(internal::is_same<typename Self::CoeffReturnType, float>::value ||
internal::is_same<typename Self::CoeffReturnType, double>::value ||
(internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess));
#else // EIGEN_HAS_GPU_FP16
static const bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful &&
static constexpr bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful &&
(internal::is_same<typename Self::CoeffReturnType, float>::value ||
internal::is_same<typename Self::CoeffReturnType, double>::value);
#endif // EIGEN_HAS_GPU_FP16
@@ -755,10 +764,10 @@ struct InnerReductionLauncher {
template <typename Self, typename Op, typename OutputType, bool PacketAccess>
struct InnerReductionLauncher<
Self, Op, OutputType, PacketAccess,
typename internal::enable_if<
std::enable_if_t<
internal::is_same<float, OutputType>::value ||
internal::is_same<double, OutputType>::value,
void>::type> {
void>> {
static bool run(const Self& self, Op& reducer, const GpuDevice& device, OutputType* output, typename Self::Index num_coeffs_to_reduce, typename Self::Index num_preserved_vals) {
typedef typename Self::Index Index;
@@ -838,12 +847,12 @@ struct InnerReducer<Self, Op, GpuDevice> {
// so reduce the scope of the optimized version of the code to the simple case
// of floats and half floats.
#ifdef EIGEN_HAS_GPU_FP16
static const bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful &&
static constexpr bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful &&
(internal::is_same<typename Self::CoeffReturnType, float>::value ||
internal::is_same<typename Self::CoeffReturnType, double>::value ||
(internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess));
#else // EIGEN_HAS_GPU_FP16
static const bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful &&
static constexpr bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful &&
(internal::is_same<typename Self::CoeffReturnType, float>::value ||
internal::is_same<typename Self::CoeffReturnType, double>::value);
#endif // EIGEN_HAS_GPU_FP16
@@ -900,7 +909,7 @@ struct OuterReducer<Self, Op, GpuDevice> {
// Unfortunately nvidia doesn't support well exotic types such as complex,
// so reduce the scope of the optimized version of the code to the simple case
// of floats.
static const bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful &&
static constexpr bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful &&
(internal::is_same<typename Self::CoeffReturnType, float>::value ||
internal::is_same<typename Self::CoeffReturnType, double>::value);
template <typename Device, typename OutputType>

View File

@@ -27,6 +27,8 @@
#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSOR_REDUCTION_SYCL_HPP
#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSOR_REDUCTION_SYCL_HPP
#include "./InternalHeaderCheck.h"
namespace Eigen {
namespace TensorSycl {
namespace internal {
@@ -125,9 +127,8 @@ class FullReductionKernelFunctor {
typedef typename OpDef::type Op;
typedef typename Evaluator::EvaluatorPointerType EvaluatorPointerType;
typedef typename Evaluator::PacketReturnType PacketReturnType;
typedef
typename ::Eigen::internal::conditional<(Evaluator::ReducerTraits::PacketAccess & Evaluator::InputPacketAccess),
PacketReturnType, CoeffReturnType>::type OutType;
typedef std::conditional_t<(Evaluator::ReducerTraits::PacketAccess & Evaluator::InputPacketAccess),
PacketReturnType, CoeffReturnType> OutType;
typedef cl::sycl::accessor<OutType, 1, cl::sycl::access::mode::read_write, cl::sycl::access::target::local>
LocalAccessor;
LocalAccessor scratch;
@@ -143,7 +144,7 @@ class FullReductionKernelFunctor {
void operator()(cl::sycl::nd_item<1> itemID) { compute_reduction(itemID); }
template <bool Vect = (Evaluator::ReducerTraits::PacketAccess & Evaluator::InputPacketAccess)>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename ::Eigen::internal::enable_if<Vect>::type compute_reduction(
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::enable_if_t<Vect> compute_reduction(
const cl::sycl::nd_item<1> &itemID) {
auto output_ptr = final_output.get_pointer();
Index VectorizedRange = (rng / Evaluator::PacketSize) * Evaluator::PacketSize;
@@ -182,7 +183,7 @@ class FullReductionKernelFunctor {
}
template <bool Vect = (Evaluator::ReducerTraits::PacketAccess & Evaluator::InputPacketAccess)>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename ::Eigen::internal::enable_if<!Vect>::type compute_reduction(
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::enable_if_t<!Vect> compute_reduction(
const cl::sycl::nd_item<1> &itemID) {
auto output_ptr = final_output.get_pointer();
Index globalid = itemID.get_global_id(0);
@@ -481,7 +482,7 @@ struct FullReducer<Self, Op, Eigen::SyclDevice, Vectorizable> {
static EIGEN_CONSTEXPR bool HasOptimizedImplementation = true;
static EIGEN_CONSTEXPR int PacketSize = Self::PacketAccess ? Self::PacketSize : 1;
static void run(const Self &self, Op &reducer, const Eigen::SyclDevice &dev, EvaluatorPointerType data) {
typedef typename conditional<Self::PacketAccess, typename Self::PacketReturnType, CoeffReturnType>::type OutType;
typedef std::conditional_t<Self::PacketAccess, typename Self::PacketReturnType, CoeffReturnType> OutType;
static_assert(!((EIGEN_SYCL_LOCAL_THREAD_DIM0 * EIGEN_SYCL_LOCAL_THREAD_DIM1) &
(EIGEN_SYCL_LOCAL_THREAD_DIM0 * EIGEN_SYCL_LOCAL_THREAD_DIM1 - 1)),
"The Local thread size must be a power of 2 for the reduction "

View File

@@ -10,6 +10,8 @@
#ifndef EIGEN_CXX11_TENSOR_TENSOR_REF_H
#define EIGEN_CXX11_TENSOR_TENSOR_REF_H
#include "./InternalHeaderCheck.h"
namespace Eigen {
namespace internal {
@@ -96,13 +98,13 @@ class TensorLazyEvaluatorWritable : public TensorLazyEvaluatorReadOnly<Dimension
};
template <typename Dimensions, typename Expr, typename Device>
class TensorLazyEvaluator : public internal::conditional<bool(internal::is_lvalue<Expr>::value),
class TensorLazyEvaluator : public std::conditional_t<bool(internal::is_lvalue<Expr>::value),
TensorLazyEvaluatorWritable<Dimensions, Expr, Device>,
TensorLazyEvaluatorReadOnly<Dimensions, const Expr, Device> >::type {
TensorLazyEvaluatorReadOnly<Dimensions, const Expr, Device> > {
public:
typedef typename internal::conditional<bool(internal::is_lvalue<Expr>::value),
TensorLazyEvaluatorWritable<Dimensions, Expr, Device>,
TensorLazyEvaluatorReadOnly<Dimensions, const Expr, Device> >::type Base;
typedef std::conditional_t<bool(internal::is_lvalue<Expr>::value),
TensorLazyEvaluatorWritable<Dimensions, Expr, Device>,
TensorLazyEvaluatorReadOnly<Dimensions, const Expr, Device> > Base;
typedef typename Base::Scalar Scalar;
TensorLazyEvaluator(const Expr& expr, const Device& device) : Base(expr, device) {
@@ -135,15 +137,15 @@ template<typename PlainObjectType> class TensorRef : public TensorBase<TensorRef
typedef Scalar* PointerType;
typedef PointerType PointerArgType;
static const Index NumIndices = PlainObjectType::NumIndices;
static constexpr Index NumIndices = PlainObjectType::NumIndices;
typedef typename PlainObjectType::Dimensions Dimensions;
static constexpr int Layout = PlainObjectType::Layout;
enum {
IsAligned = false,
PacketAccess = false,
BlockAccess = false,
PreferBlockAccess = false,
Layout = PlainObjectType::Layout,
CoordAccess = false, // to be implemented
RawAccess = false
};
@@ -172,7 +174,7 @@ template<typename PlainObjectType> class TensorRef : public TensorBase<TensorRef
unrefEvaluator();
}
TensorRef(const TensorRef& other) : m_evaluator(other.m_evaluator) {
TensorRef(const TensorRef& other) : TensorBase<TensorRef<PlainObjectType> >(other), m_evaluator(other.m_evaluator) {
eigen_assert(m_evaluator->refCount() > 0);
m_evaluator->incrRefCount();
}
@@ -204,7 +206,6 @@ template<typename PlainObjectType> class TensorRef : public TensorBase<TensorRef
return m_evaluator->coeff(index);
}
#if EIGEN_HAS_VARIADIC_TEMPLATES
template<typename... IndexTypes> EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const Scalar operator()(Index firstIndex, IndexTypes... otherIndices) const
{
@@ -219,85 +220,6 @@ template<typename PlainObjectType> class TensorRef : public TensorBase<TensorRef
const array<Index, num_indices> indices{{firstIndex, otherIndices...}};
return coeffRef(indices);
}
#else
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const Scalar operator()(Index i0, Index i1) const
{
array<Index, 2> indices;
indices[0] = i0;
indices[1] = i1;
return coeff(indices);
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const Scalar operator()(Index i0, Index i1, Index i2) const
{
array<Index, 3> indices;
indices[0] = i0;
indices[1] = i1;
indices[2] = i2;
return coeff(indices);
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const Scalar operator()(Index i0, Index i1, Index i2, Index i3) const
{
array<Index, 4> indices;
indices[0] = i0;
indices[1] = i1;
indices[2] = i2;
indices[3] = i3;
return coeff(indices);
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const Scalar operator()(Index i0, Index i1, Index i2, Index i3, Index i4) const
{
array<Index, 5> indices;
indices[0] = i0;
indices[1] = i1;
indices[2] = i2;
indices[3] = i3;
indices[4] = i4;
return coeff(indices);
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Scalar& coeffRef(Index i0, Index i1)
{
array<Index, 2> indices;
indices[0] = i0;
indices[1] = i1;
return coeffRef(indices);
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Scalar& coeffRef(Index i0, Index i1, Index i2)
{
array<Index, 3> indices;
indices[0] = i0;
indices[1] = i1;
indices[2] = i2;
return coeffRef(indices);
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3)
{
array<Index, 4> indices;
indices[0] = i0;
indices[1] = i1;
indices[2] = i2;
indices[3] = i3;
return coeffRef(indices);
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Scalar& coeffRef(Index i0, Index i1, Index i2, Index i3, Index i4)
{
array<Index, 5> indices;
indices[0] = i0;
indices[1] = i1;
indices[2] = i2;
indices[3] = i3;
indices[4] = i4;
return coeffRef(indices);
}
#endif
template <std::size_t NumIndices> EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const Scalar coeff(const array<Index, NumIndices>& indices) const
@@ -374,12 +296,12 @@ struct TensorEvaluator<const TensorRef<Derived>, Device>
typedef StorageMemory<CoeffReturnType, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
static constexpr int Layout = TensorRef<Derived>::Layout;
enum {
IsAligned = false,
PacketAccess = false,
BlockAccess = false,
PreferBlockAccess = false,
Layout = TensorRef<Derived>::Layout,
CoordAccess = false, // to be implemented
RawAccess = false
};

View File

@@ -10,6 +10,8 @@
#ifndef EIGEN_CXX11_TENSOR_TENSOR_REVERSE_H
#define EIGEN_CXX11_TENSOR_TENSOR_REVERSE_H
#include "./InternalHeaderCheck.h"
namespace Eigen {
/** \class TensorReverse
@@ -28,9 +30,9 @@ struct traits<TensorReverseOp<ReverseDimensions,
typedef typename XprTraits::StorageKind StorageKind;
typedef typename XprTraits::Index Index;
typedef typename XprType::Nested Nested;
typedef typename remove_reference<Nested>::type _Nested;
static const int NumDimensions = XprTraits::NumDimensions;
static const int Layout = XprTraits::Layout;
typedef std::remove_reference_t<Nested> Nested_;
static constexpr int NumDimensions = XprTraits::NumDimensions;
static constexpr int Layout = XprTraits::Layout;
typedef typename XprTraits::PointerType PointerType;
};
@@ -71,7 +73,7 @@ class TensorReverseOp : public TensorBase<TensorReverseOp<ReverseDimensions,
const ReverseDimensions& reverse() const { return m_reverse_dims; }
EIGEN_DEVICE_FUNC
const typename internal::remove_all<typename XprType::Nested>::type&
const internal::remove_all_t<typename XprType::Nested>&
expression() const { return m_xpr; }
EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorReverseOp)
@@ -88,21 +90,21 @@ struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device
{
typedef TensorReverseOp<ReverseDimensions, ArgType> XprType;
typedef typename XprType::Index Index;
static const int NumDims = internal::array_size<ReverseDimensions>::value;
static constexpr int NumDims = internal::array_size<ReverseDimensions>::value;
typedef DSizes<Index, NumDims> Dimensions;
typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
typedef StorageMemory<CoeffReturnType, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
enum {
IsAligned = false,
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
BlockAccess = NumDims > 0,
PreferBlockAccess = true,
Layout = TensorEvaluator<ArgType, Device>::Layout,
CoordAccess = false, // to be implemented
RawAccess = false
};
@@ -213,12 +215,11 @@ struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
PacketReturnType packet(Index index) const
{
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
// TODO(ndjaitly): write a better packing routine that uses
// local structure.
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type
EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType>
values[PacketSize];
EIGEN_UNROLL_LOOP
for (int i = 0; i < PacketSize; ++i) {
@@ -413,15 +414,15 @@ struct TensorEvaluator<TensorReverseOp<ReverseDimensions, ArgType>, Device>
Device> Base;
typedef TensorReverseOp<ReverseDimensions, ArgType> XprType;
typedef typename XprType::Index Index;
static const int NumDims = internal::array_size<ReverseDimensions>::value;
static constexpr int NumDims = internal::array_size<ReverseDimensions>::value;
typedef DSizes<Index, NumDims> Dimensions;
static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
enum {
IsAligned = false,
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
BlockAccess = false,
PreferBlockAccess = false,
Layout = TensorEvaluator<ArgType, Device>::Layout,
CoordAccess = false, // to be implemented
RawAccess = false
};
@@ -431,7 +432,7 @@ struct TensorEvaluator<TensorReverseOp<ReverseDimensions, ArgType>, Device>
typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
typedef internal::TensorBlockNotImplemented TensorBlock;
@@ -446,7 +447,6 @@ struct TensorEvaluator<TensorReverseOp<ReverseDimensions, ArgType>, Device>
template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void writePacket(Index index, const PacketReturnType& x) {
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
// This code is pilfered from TensorMorphing.h

View File

@@ -10,6 +10,8 @@
#ifndef EIGEN_CXX11_TENSOR_TENSOR_SCAN_H
#define EIGEN_CXX11_TENSOR_TENSOR_SCAN_H
#include "./InternalHeaderCheck.h"
namespace Eigen {
namespace internal {
@@ -21,9 +23,9 @@ struct traits<TensorScanOp<Op, XprType> >
typedef traits<XprType> XprTraits;
typedef typename XprTraits::StorageKind StorageKind;
typedef typename XprType::Nested Nested;
typedef typename remove_reference<Nested>::type _Nested;
static const int NumDimensions = XprTraits::NumDimensions;
static const int Layout = XprTraits::Layout;
typedef std::remove_reference_t<Nested> Nested_;
static constexpr int NumDimensions = XprTraits::NumDimensions;
static constexpr int Layout = XprTraits::Layout;
typedef typename XprTraits::PointerType PointerType;
};
@@ -379,21 +381,21 @@ struct TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> {
typedef typename XprType::Index Index;
typedef const ArgType ChildTypeNoConst;
typedef const ArgType ChildType;
static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
static constexpr int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
typedef DSizes<Index, NumDims> Dimensions;
typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar;
typedef std::remove_const_t<typename XprType::Scalar> Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
typedef TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> Self;
typedef StorageMemory<Scalar, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
enum {
IsAligned = false,
PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
BlockAccess = false,
PreferBlockAccess = false,
Layout = TensorEvaluator<ArgType, Device>::Layout,
CoordAccess = false,
RawAccess = true
};

View File

@@ -25,7 +25,7 @@
* buffer is given as an input and all the threads within a work-group scan and
* reduces the boundaries between the blocks (generated from the previous
* kernel). and write the data on the temporary buffer. If the second kernel is
* required, the third and final kerenl (ScanAdjustmentKernelFunctor) will
* required, the third and final kernel (ScanAdjustmentKernelFunctor) will
* adjust the final result into the output buffer.
* The original algorithm for the parallel prefix sum can be found here:
*
@@ -37,6 +37,8 @@
#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSOR_SYCL_SYCL_HPP
#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSOR_SYCL_SYCL_HPP
#include "./InternalHeaderCheck.h"
namespace Eigen {
namespace TensorSycl {
namespace internal {
@@ -105,27 +107,27 @@ struct ScanKernelFunctor {
inclusive(inclusive_) {}
template <scan_step sst = stp, typename Input>
typename ::Eigen::internal::enable_if<sst == scan_step::first, CoeffReturnType>::type EIGEN_DEVICE_FUNC
std::enable_if_t<sst == scan_step::first, CoeffReturnType> EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE
read(const Input &inpt, Index global_id) {
return inpt.coeff(global_id);
}
template <scan_step sst = stp, typename Input>
typename ::Eigen::internal::enable_if<sst != scan_step::first, CoeffReturnType>::type EIGEN_DEVICE_FUNC
std::enable_if_t<sst != scan_step::first, CoeffReturnType> EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE
read(const Input &inpt, Index global_id) {
return inpt[global_id];
}
template <scan_step sst = stp, typename InclusiveOp>
typename ::Eigen::internal::enable_if<sst == scan_step::first>::type EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
std::enable_if_t<sst == scan_step::first> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
first_step_inclusive_Operation(InclusiveOp inclusive_op) {
inclusive_op();
}
template <scan_step sst = stp, typename InclusiveOp>
typename ::Eigen::internal::enable_if<sst != scan_step::first>::type EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
std::enable_if_t<sst != scan_step::first> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
first_step_inclusive_Operation(InclusiveOp) {}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(cl::sycl::nd_item<1> itemID) {

View File

@@ -10,6 +10,8 @@
#ifndef EIGEN_CXX11_TENSOR_TENSOR_SHUFFLING_H
#define EIGEN_CXX11_TENSOR_TENSOR_SHUFFLING_H
#include "./InternalHeaderCheck.h"
namespace Eigen {
/** \class TensorShuffling
@@ -28,9 +30,9 @@ struct traits<TensorShufflingOp<Shuffle, XprType> > : public traits<XprType>
typedef typename XprTraits::StorageKind StorageKind;
typedef typename XprTraits::Index Index;
typedef typename XprType::Nested Nested;
typedef typename remove_reference<Nested>::type _Nested;
static const int NumDimensions = XprTraits::NumDimensions;
static const int Layout = XprTraits::Layout;
typedef std::remove_reference_t<Nested> Nested_;
static constexpr int NumDimensions = XprTraits::NumDimensions;
static constexpr int Layout = XprTraits::Layout;
typedef typename XprTraits::PointerType PointerType;
};
@@ -69,7 +71,7 @@ class TensorShufflingOp : public TensorBase<TensorShufflingOp<Shuffle, XprType>
const Shuffle& shufflePermutation() const { return m_shuffle; }
EIGEN_DEVICE_FUNC
const typename internal::remove_all<typename XprType::Nested>::type&
const internal::remove_all_t<typename XprType::Nested>&
expression() const { return m_xpr; }
EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorShufflingOp)
@@ -88,26 +90,26 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
typedef TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device> Self;
typedef TensorShufflingOp<Shuffle, ArgType> XprType;
typedef typename XprType::Index Index;
static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
static constexpr int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
typedef DSizes<Index, NumDims> Dimensions;
typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
typedef StorageMemory<CoeffReturnType, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
enum {
IsAligned = false,
PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
BlockAccess = TensorEvaluator<ArgType, Device>::RawAccess,
PreferBlockAccess = true,
Layout = TensorEvaluator<ArgType, Device>::Layout,
CoordAccess = false, // to be implemented
RawAccess = false
};
typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
typedef std::remove_const_t<Scalar> ScalarNoConst;
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
@@ -194,7 +196,7 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
struct PacketLoader {
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
static PacketReturnType Run(const Self& self, Index index) {
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
EIGEN_UNROLL_LOOP
for (int i = 0; i < PacketSize; ++i) {
values[i] = self.coeff(index + i);
@@ -211,7 +213,7 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
if (self.m_is_identity) {
return self.m_impl.template packet<LoadMode>(index);
} else {
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
EIGEN_UNROLL_LOOP
for (int i = 0; i < PacketSize; ++i) {
values[i] = self.coeff(index + i);
@@ -225,8 +227,7 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
template<int LoadMode>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
{
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
eigen_assert(index + PacketSize - 1 < dimensions().TotalSize());
eigen_assert(index + PacketSize - 1 < dimensions().TotalSize());
return PacketLoader<LoadMode, Self, TensorEvaluator<ArgType, Device>::PacketAccess>::Run(*this, index);
}
@@ -255,7 +256,7 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
bool root_of_expr_ast = false) const {
assert(m_impl.data() != NULL);
eigen_assert(m_impl.data() != NULL);
typedef internal::TensorBlockIO<ScalarNoConst, Index, NumDims, Layout>
TensorBlockIO;
@@ -363,23 +364,23 @@ struct TensorEvaluator<TensorShufflingOp<Shuffle, ArgType>, Device>
typedef TensorShufflingOp<Shuffle, ArgType> XprType;
typedef typename XprType::Index Index;
static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
static constexpr int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
typedef DSizes<Index, NumDims> Dimensions;
typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
enum {
IsAligned = false,
PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
BlockAccess = TensorEvaluator<ArgType, Device>::RawAccess,
PreferBlockAccess = true,
Layout = TensorEvaluator<ArgType, Device>::Layout,
RawAccess = false
};
typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
typedef std::remove_const_t<Scalar> ScalarNoConst;
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
@@ -397,9 +398,7 @@ struct TensorEvaluator<TensorShufflingOp<Shuffle, ArgType>, Device>
template <int StoreMode> EIGEN_STRONG_INLINE
void writePacket(Index index, const PacketReturnType& x)
{
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
EIGEN_UNROLL_LOOP
for (int i = 0; i < PacketSize; ++i) {

View File

@@ -17,6 +17,8 @@
#define EIGEN_INTERNAL_TENSOR_STORAGE_CTOR_PLUGIN
#endif
#include "./InternalHeaderCheck.h"
namespace Eigen {
/** \internal
@@ -39,10 +41,10 @@ template<typename T, typename FixedDimensions, int Options_>
class TensorStorage
{
private:
static const std::size_t Size = FixedDimensions::total_size;
static constexpr std::size_t Size = FixedDimensions::total_size;
// Allocate an array of size at least one to prevent compiler warnings.
static const std::size_t MinSize = max_n_1<Size>::size;
static constexpr std::size_t MinSize = max_n_1<Size>::size;
EIGEN_ALIGN_MAX T m_data[MinSize];
public:
@@ -55,17 +57,14 @@ class TensorStorage
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const T *data() const { return m_data; }
static EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const FixedDimensions& dimensions()
{
static const FixedDimensions* singleton_dimensions = new FixedDimensions();
return *singleton_dimensions;
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const FixedDimensions dimensions() const { return FixedDimensions(); }
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE DenseIndex size() const { return Size; }
};
// pure dynamic
template<typename T, typename IndexType, int NumIndices_, int Options_>
class TensorStorage<T, DSizes<IndexType, NumIndices_>, Options_>
@@ -86,12 +85,10 @@ class TensorStorage<T, DSizes<IndexType, NumIndices_>, Options_>
: m_data(internal::conditional_aligned_new_auto<T,(Options_&DontAlign)==0>(size)), m_dimensions(dimensions)
{ EIGEN_INTERNAL_TENSOR_STORAGE_CTOR_PLUGIN }
#if EIGEN_HAS_VARIADIC_TEMPLATES
template <typename... DenseIndex>
EIGEN_DEVICE_FUNC TensorStorage(DenseIndex... indices) : m_dimensions(indices...) {
m_data = internal::conditional_aligned_new_auto<T,(Options_&DontAlign)==0>(internal::array_prod(m_dimensions));
}
#endif
EIGEN_DEVICE_FUNC TensorStorage(const Self& other)
: m_data(internal::conditional_aligned_new_auto<T,(Options_&DontAlign)==0>(internal::array_prod(other.m_dimensions)))
@@ -108,7 +105,6 @@ class TensorStorage<T, DSizes<IndexType, NumIndices_>, Options_>
return *this;
}
#if EIGEN_HAS_RVALUE_REFERENCES
EIGEN_DEVICE_FUNC TensorStorage(Self&& other) : TensorStorage()
{
*this = std::move(other);
@@ -120,7 +116,6 @@ class TensorStorage<T, DSizes<IndexType, NumIndices_>, Options_>
numext::swap(m_dimensions, other.m_dimensions);
return *this;
}
#endif
EIGEN_DEVICE_FUNC ~TensorStorage() { internal::conditional_aligned_delete_auto<T,(Options_&DontAlign)==0>(m_data, internal::array_prod(m_dimensions)); }
EIGEN_DEVICE_FUNC void swap(Self& other)

View File

@@ -10,6 +10,8 @@
#ifndef EIGEN_CXX11_TENSOR_TENSOR_STRIDING_H
#define EIGEN_CXX11_TENSOR_TENSOR_STRIDING_H
#include "./InternalHeaderCheck.h"
namespace Eigen {
/** \class TensorStriding
@@ -28,9 +30,9 @@ struct traits<TensorStridingOp<Strides, XprType> > : public traits<XprType>
typedef typename XprTraits::StorageKind StorageKind;
typedef typename XprTraits::Index Index;
typedef typename XprType::Nested Nested;
typedef typename remove_reference<Nested>::type _Nested;
static const int NumDimensions = XprTraits::NumDimensions;
static const int Layout = XprTraits::Layout;
typedef std::remove_reference_t<Nested> Nested_;
static constexpr int NumDimensions = XprTraits::NumDimensions;
static constexpr int Layout = XprTraits::Layout;
typedef typename XprTraits::PointerType PointerType;
};
@@ -69,7 +71,7 @@ class TensorStridingOp : public TensorBase<TensorStridingOp<Strides, XprType> >
const Strides& strides() const { return m_dims; }
EIGEN_DEVICE_FUNC
const typename internal::remove_all<typename XprType::Nested>::type&
const internal::remove_all_t<typename XprType::Nested>&
expression() const { return m_xpr; }
EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorStridingOp)
@@ -86,21 +88,21 @@ struct TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device>
{
typedef TensorStridingOp<Strides, ArgType> XprType;
typedef typename XprType::Index Index;
static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
static constexpr int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
typedef DSizes<Index, NumDims> Dimensions;
typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
typedef StorageMemory<CoeffReturnType, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
enum {
IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false,
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
BlockAccess = false,
PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
Layout = TensorEvaluator<ArgType, Device>::Layout,
CoordAccess = false, // to be implemented
RawAccess = false
};
@@ -193,7 +195,7 @@ struct TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device>
return rslt;
}
else {
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
values[0] = m_impl.coeff(inputIndices[0]);
values[PacketSize-1] = m_impl.coeff(inputIndices[1]);
EIGEN_UNROLL_LOOP
@@ -265,14 +267,14 @@ struct TensorEvaluator<TensorStridingOp<Strides, ArgType>, Device>
typedef TensorStridingOp<Strides, ArgType> XprType;
typedef TensorEvaluator<const XprType, Device> Base;
// typedef typename XprType::Index Index;
static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
static constexpr int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
// typedef DSizes<Index, NumDims> Dimensions;
static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
enum {
IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false,
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
PreferBlockAccess = false,
Layout = TensorEvaluator<ArgType, Device>::Layout,
CoordAccess = false, // to be implemented
RawAccess = false
};
@@ -284,7 +286,7 @@ struct TensorEvaluator<TensorStridingOp<Strides, ArgType>, Device>
typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index)
{

View File

@@ -11,6 +11,8 @@
#ifndef EIGEN_CXX11_TENSOR_TENSOR_TRACE_H
#define EIGEN_CXX11_TENSOR_TENSOR_TRACE_H
#include "./InternalHeaderCheck.h"
namespace Eigen {
/** \class TensorTrace
@@ -30,9 +32,9 @@ struct traits<TensorTraceOp<Dims, XprType> > : public traits<XprType>
typedef typename XprTraits::StorageKind StorageKind;
typedef typename XprTraits::Index Index;
typedef typename XprType::Nested Nested;
typedef typename remove_reference<Nested>::type _Nested;
static const int NumDimensions = XprTraits::NumDimensions - array_size<Dims>::value;
static const int Layout = XprTraits::Layout;
typedef std::remove_reference_t<Nested> Nested_;
static constexpr int NumDimensions = XprTraits::NumDimensions - array_size<Dims>::value;
static constexpr int Layout = XprTraits::Layout;
};
template<typename Dims, typename XprType>
@@ -69,7 +71,7 @@ class TensorTraceOp : public TensorBase<TensorTraceOp<Dims, XprType> >
const Dims& dims() const { return m_dims; }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const typename internal::remove_all<typename XprType::Nested>::type& expression() const { return m_xpr; }
const internal::remove_all_t<typename XprType::Nested>& expression() const { return m_xpr; }
protected:
typename XprType::Nested m_xpr;
@@ -82,24 +84,24 @@ template<typename Dims, typename ArgType, typename Device>
struct TensorEvaluator<const TensorTraceOp<Dims, ArgType>, Device>
{
typedef TensorTraceOp<Dims, ArgType> XprType;
static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
static const int NumReducedDims = internal::array_size<Dims>::value;
static const int NumOutputDims = NumInputDims - NumReducedDims;
static constexpr int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
static constexpr int NumReducedDims = internal::array_size<Dims>::value;
static constexpr int NumOutputDims = NumInputDims - NumReducedDims;
typedef typename XprType::Index Index;
typedef DSizes<Index, NumOutputDims> Dimensions;
typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
static constexpr int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
typedef StorageMemory<CoeffReturnType, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
enum {
IsAligned = false,
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
BlockAccess = false,
PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
Layout = TensorEvaluator<ArgType, Device>::Layout,
CoordAccess = false,
RawAccess = false
};
@@ -134,6 +136,7 @@ struct TensorEvaluator<const TensorTraceOp<Dims, ArgType>, Device>
}
}
EIGEN_ONLY_USED_FOR_DEBUG(num_distinct_reduce_dims);
eigen_assert(num_distinct_reduce_dims == NumReducedDims);
// Compute the dimensions of the result.
@@ -243,11 +246,9 @@ struct TensorEvaluator<const TensorTraceOp<Dims, ArgType>, Device>
template<int LoadMode>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const {
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE);
eigen_assert(index + PacketSize - 1 < dimensions().TotalSize());
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
for (int i = 0; i < PacketSize; ++i) {
values[i] = coeff(index + i);
}

View File

@@ -10,6 +10,8 @@
#ifndef EIGEN_CXX11_TENSOR_TENSOR_TRAITS_H
#define EIGEN_CXX11_TENSOR_TENSOR_TRAITS_H
#include "./InternalHeaderCheck.h"
namespace Eigen {
namespace internal {
@@ -50,8 +52,8 @@ struct traits<Tensor<Scalar_, NumIndices_, Options_, IndexType_> >
typedef Scalar_ Scalar;
typedef Dense StorageKind;
typedef IndexType_ Index;
static const int NumDimensions = NumIndices_;
static const int Layout = Options_ & RowMajor ? RowMajor : ColMajor;
static constexpr int NumDimensions = NumIndices_;
static constexpr int Layout = Options_ & RowMajor ? RowMajor : ColMajor;
enum {
Options = Options_,
Flags = compute_tensor_flags<Scalar_, Options_>::ret | (is_const<Scalar_>::value ? 0 : LvalueBit)
@@ -69,8 +71,8 @@ struct traits<TensorFixedSize<Scalar_, Dimensions, Options_, IndexType_> >
typedef Scalar_ Scalar;
typedef Dense StorageKind;
typedef IndexType_ Index;
static const int NumDimensions = array_size<Dimensions>::value;
static const int Layout = Options_ & RowMajor ? RowMajor : ColMajor;
static constexpr int NumDimensions = array_size<Dimensions>::value;
static constexpr int Layout = Options_ & RowMajor ? RowMajor : ColMajor;
enum {
Options = Options_,
Flags = compute_tensor_flags<Scalar_, Options_>::ret | (is_const<Scalar_>::value ? 0: LvalueBit)
@@ -90,8 +92,8 @@ struct traits<TensorMap<PlainObjectType, Options_, MakePointer_> >
typedef typename BaseTraits::Scalar Scalar;
typedef typename BaseTraits::StorageKind StorageKind;
typedef typename BaseTraits::Index Index;
static const int NumDimensions = BaseTraits::NumDimensions;
static const int Layout = BaseTraits::Layout;
static constexpr int NumDimensions = BaseTraits::NumDimensions;
static constexpr int Layout = BaseTraits::Layout;
enum {
Options = Options_,
Flags = BaseTraits::Flags
@@ -112,8 +114,8 @@ struct traits<TensorRef<PlainObjectType> >
typedef typename BaseTraits::Scalar Scalar;
typedef typename BaseTraits::StorageKind StorageKind;
typedef typename BaseTraits::Index Index;
static const int NumDimensions = BaseTraits::NumDimensions;
static const int Layout = BaseTraits::Layout;
static constexpr int NumDimensions = BaseTraits::NumDimensions;
static constexpr int Layout = BaseTraits::Layout;
enum {
Options = BaseTraits::Options,
Flags = BaseTraits::Flags
@@ -122,16 +124,16 @@ struct traits<TensorRef<PlainObjectType> >
};
template<typename _Scalar, int NumIndices_, int Options, typename IndexType_>
struct eval<Tensor<_Scalar, NumIndices_, Options, IndexType_>, Eigen::Dense>
template<typename Scalar_, int NumIndices_, int Options, typename IndexType_>
struct eval<Tensor<Scalar_, NumIndices_, Options, IndexType_>, Eigen::Dense>
{
typedef const Tensor<_Scalar, NumIndices_, Options, IndexType_>EIGEN_DEVICE_REF type;
typedef const Tensor<Scalar_, NumIndices_, Options, IndexType_>EIGEN_DEVICE_REF type;
};
template<typename _Scalar, int NumIndices_, int Options, typename IndexType_>
struct eval<const Tensor<_Scalar, NumIndices_, Options, IndexType_>, Eigen::Dense>
template<typename Scalar_, int NumIndices_, int Options, typename IndexType_>
struct eval<const Tensor<Scalar_, NumIndices_, Options, IndexType_>, Eigen::Dense>
{
typedef const Tensor<_Scalar, NumIndices_, Options, IndexType_>EIGEN_DEVICE_REF type;
typedef const Tensor<Scalar_, NumIndices_, Options, IndexType_>EIGEN_DEVICE_REF type;
};
template<typename Scalar_, typename Dimensions, int Options, typename IndexType_>
@@ -254,10 +256,10 @@ struct nested<const TensorRef<PlainObjectType> >
// the SAME case.
// When the stride is 1, we have the simplified case R'=R-K+1, C'=C-K+1, Pr=0,
// Pc=0.
typedef enum {
enum PaddingType {
PADDING_VALID = 1,
PADDING_SAME = 2
} PaddingType;
};
} // end namespace Eigen

View File

@@ -10,6 +10,8 @@
#ifndef EIGEN_CXX11_TENSOR_TENSOR_UINT128_H
#define EIGEN_CXX11_TENSOR_TENSOR_UINT128_H
#include "./InternalHeaderCheck.h"
namespace Eigen {
namespace internal {
@@ -55,7 +57,7 @@ struct TensorUInt128
template<typename T>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
explicit TensorUInt128(const T& x) : high(0), low(x) {
eigen_assert((static_cast<typename conditional<sizeof(T) == 8, uint64_t, uint32_t>::type>(x) <= NumTraits<uint64_t>::highest()));
eigen_assert((static_cast<std::conditional_t<sizeof(T) == 8, uint64_t, uint32_t>>(x) <= NumTraits<uint64_t>::highest()));
eigen_assert(x >= 0);
}
@@ -78,14 +80,14 @@ template <typename HL, typename LL, typename HR, typename LR>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
bool operator == (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
{
return (lhs.high == rhs.high) & (lhs.low == rhs.low);
return (lhs.high == rhs.high) && (lhs.low == rhs.low);
}
template <typename HL, typename LL, typename HR, typename LR>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
bool operator != (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
{
return (lhs.high != rhs.high) | (lhs.low != rhs.low);
return (lhs.high != rhs.high) || (lhs.low != rhs.low);
}
template <typename HL, typename LL, typename HR, typename LR>

View File

@@ -4,6 +4,8 @@
#ifndef EIGEN_CXX11_TENSOR_TENSOR_VOLUME_PATCH_H
#define EIGEN_CXX11_TENSOR_TENSOR_VOLUME_PATCH_H
#include "./InternalHeaderCheck.h"
namespace Eigen {
/** \class TensorVolumePatch
@@ -26,14 +28,14 @@ namespace internal {
template<DenseIndex Planes, DenseIndex Rows, DenseIndex Cols, typename XprType>
struct traits<TensorVolumePatchOp<Planes, Rows, Cols, XprType> > : public traits<XprType>
{
typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar;
typedef std::remove_const_t<typename XprType::Scalar> Scalar;
typedef traits<XprType> XprTraits;
typedef typename XprTraits::StorageKind StorageKind;
typedef typename XprTraits::Index Index;
typedef typename XprType::Nested Nested;
typedef typename remove_reference<Nested>::type _Nested;
static const int NumDimensions = XprTraits::NumDimensions + 1;
static const int Layout = XprTraits::Layout;
typedef std::remove_reference_t<Nested> Nested_;
static constexpr int NumDimensions = XprTraits::NumDimensions + 1;
static constexpr int Layout = XprTraits::Layout;
typedef typename XprTraits::PointerType PointerType;
};
@@ -135,7 +137,7 @@ class TensorVolumePatchOp : public TensorBase<TensorVolumePatchOp<Planes, Rows,
Scalar padding_value() const { return m_padding_value; }
EIGEN_DEVICE_FUNC
const typename internal::remove_all<typename XprType::Nested>::type&
const internal::remove_all_t<typename XprType::Nested>&
expression() const { return m_xpr; }
protected:
@@ -170,22 +172,22 @@ struct TensorEvaluator<const TensorVolumePatchOp<Planes, Rows, Cols, ArgType>, D
{
typedef TensorVolumePatchOp<Planes, Rows, Cols, ArgType> XprType;
typedef typename XprType::Index Index;
static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
static const int NumDims = NumInputDims + 1;
static constexpr int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
static constexpr int NumDims = NumInputDims + 1;
typedef DSizes<Index, NumDims> Dimensions;
typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar;
typedef std::remove_const_t<typename XprType::Scalar> Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
typedef StorageMemory<CoeffReturnType, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
enum {
IsAligned = false,
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
BlockAccess = false,
PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
Layout = TensorEvaluator<ArgType, Device>::Layout,
CoordAccess = false,
RawAccess = false
};
@@ -419,7 +421,6 @@ struct TensorEvaluator<const TensorVolumePatchOp<Planes, Rows, Cols, ArgType>, D
template<int LoadMode>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
{
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
if (m_in_row_strides != 1 || m_in_col_strides != 1 || m_row_inflate_strides != 1 || m_col_inflate_strides != 1 ||
@@ -543,7 +544,7 @@ struct TensorEvaluator<const TensorVolumePatchOp<Planes, Rows, Cols, ArgType>, D
protected:
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index) const
{
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
EIGEN_UNROLL_LOOP
for (int i = 0; i < PacketSize; ++i) {
values[i] = coeff(index+i);

View File

@@ -10,6 +10,8 @@
#ifndef EIGEN_CXX11_TENSORSYMMETRY_DYNAMICSYMMETRY_H
#define EIGEN_CXX11_TENSORSYMMETRY_DYNAMICSYMMETRY_H
#include "./InternalHeaderCheck.h"
namespace Eigen {
class DynamicSGroup

View File

@@ -0,0 +1,3 @@
#ifndef EIGEN_CXX11_TENSORSYMMETRY_MODULE_H
#error "Please include unsupported/Eigen/CXX11/TensorSymmetry instead of including headers inside the src directory directly."
#endif

View File

@@ -10,6 +10,8 @@
#ifndef EIGEN_CXX11_TENSORSYMMETRY_STATICSYMMETRY_H
#define EIGEN_CXX11_TENSORSYMMETRY_STATICSYMMETRY_H
#include "./InternalHeaderCheck.h"
namespace Eigen {
namespace internal {

View File

@@ -10,6 +10,8 @@
#ifndef EIGEN_CXX11_TENSORSYMMETRY_SYMMETRY_H
#define EIGEN_CXX11_TENSORSYMMETRY_SYMMETRY_H
#include "./InternalHeaderCheck.h"
namespace Eigen {
enum {
@@ -237,11 +239,11 @@ struct tensor_symmetry_pre_analysis<NumIndices, Gen_, Gens_...>
typedef tensor_static_symgroup_if<(sizeof...(Gens_) + 1 <= max_static_generators), NumIndices, Gen_, Gens_...> helper;
constexpr static std::size_t possible_size = helper::size;
typedef typename conditional<
typedef std::conditional_t<
possible_size == 0 || possible_size >= max_static_elements,
DynamicSGroupFromTemplateArgs<Gen_, Gens_...>,
typename helper::type
>::type root_type;
> root_type;
};
template<bool instantiate, std::size_t NumIndices, typename... Gens>

View File

@@ -10,6 +10,8 @@
#ifndef EIGEN_CXX11_TENSORSYMMETRY_TEMPLATEGROUPTHEORY_H
#define EIGEN_CXX11_TENSORSYMMETRY_TEMPLATEGROUPTHEORY_H
#include "../InternalHeaderCheck.h"
namespace Eigen {
namespace internal {
@@ -126,11 +128,11 @@ template<
>
struct strip_identities<Equality, id, type_list<t, ts...>>
{
typedef typename conditional<
typedef std::conditional_t<
Equality<id, t>::value,
typename strip_identities<Equality, id, type_list<ts...>>::type,
typename concat<type_list<t>, typename strip_identities<Equality, id, type_list<ts...>>::type>::type
>::type type;
> type;
constexpr static int global_flags = Equality<id, t>::global_flags | strip_identities<Equality, id, type_list<ts...>>::global_flags;
};
@@ -637,21 +639,21 @@ struct enumerate_group_elements_noid<Multiply, Equality, id, type_list<>, initia
* \tparam Equality The equality check operation that checks if two group elements
* are equal to another.
* \tparam id The identity element
* \tparam _generators A list of (possibly redundant) generators of the group
* \tparam Generators_ A list of (possibly redundant) generators of the group
*/
template<
template<typename, typename> class Multiply,
template<typename, typename> class Equality,
typename id,
typename _generators
typename Generators_
>
struct enumerate_group_elements
: public enumerate_group_elements_noid<
Multiply,
Equality,
id,
typename strip_identities<Equality, id, _generators>::type,
strip_identities<Equality, id, _generators>::global_flags
typename strip_identities<Equality, id, Generators_>::type,
strip_identities<Equality, id, Generators_>::global_flags
>
{
};

View File

@@ -13,6 +13,8 @@
#ifndef EIGEN_CXX11_THREADPOOL_BARRIER_H
#define EIGEN_CXX11_THREADPOOL_BARRIER_H
#include "./InternalHeaderCheck.h"
namespace Eigen {
class Barrier {

View File

@@ -7,8 +7,10 @@
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_CXX11_THREADPOOL_EVENTCOUNT_H_
#define EIGEN_CXX11_THREADPOOL_EVENTCOUNT_H_
#ifndef EIGEN_CXX11_THREADPOOL_EVENTCOUNT_H
#define EIGEN_CXX11_THREADPOOL_EVENTCOUNT_H
#include "./InternalHeaderCheck.h"
namespace Eigen {
@@ -85,7 +87,7 @@ class EventCount {
CheckState(state, true);
uint64_t newstate;
if ((state & kSignalMask) != 0) {
// Consume the signal and return immidiately.
// Consume the signal and return immediately.
newstate = state - kWaiterInc - kSignalInc;
} else {
// Remove this thread from pre-wait counter and add to the waiter stack.
@@ -112,7 +114,7 @@ class EventCount {
CheckState(state, true);
uint64_t newstate = state - kWaiterInc;
// We don't know if the thread was also notified or not,
// so we should not consume a signal unconditionaly.
// so we should not consume a signal unconditionally.
// Only if number of waiters is equal to number of signals,
// we know that the thread was notified and we must take away the signal.
if (((state & kWaiterMask) >> kWaiterShift) ==
@@ -246,4 +248,4 @@ class EventCount {
} // namespace Eigen
#endif // EIGEN_CXX11_THREADPOOL_EVENTCOUNT_H_
#endif // EIGEN_CXX11_THREADPOOL_EVENTCOUNT_H

View File

@@ -0,0 +1,3 @@
#ifndef EIGEN_CXX11_THREADPOOL_MODULE_H
#error "Please include unsupported/Eigen/CXX11/ThreadPool instead of including headers inside the src directory directly."
#endif

View File

@@ -10,6 +10,8 @@
#ifndef EIGEN_CXX11_THREADPOOL_NONBLOCKING_THREAD_POOL_H
#define EIGEN_CXX11_THREADPOOL_NONBLOCKING_THREAD_POOL_H
#include "./InternalHeaderCheck.h"
namespace Eigen {
template <typename Environment>

View File

@@ -7,8 +7,10 @@
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_CXX11_THREADPOOL_RUNQUEUE_H_
#define EIGEN_CXX11_THREADPOOL_RUNQUEUE_H_
#ifndef EIGEN_CXX11_THREADPOOL_RUNQUEUE_H
#define EIGEN_CXX11_THREADPOOL_RUNQUEUE_H
#include "./InternalHeaderCheck.h"
namespace Eigen {
@@ -233,4 +235,4 @@ class RunQueue {
} // namespace Eigen
#endif // EIGEN_CXX11_THREADPOOL_RUNQUEUE_H_
#endif // EIGEN_CXX11_THREADPOOL_RUNQUEUE_H

View File

@@ -10,6 +10,8 @@
#ifndef EIGEN_CXX11_THREADPOOL_THREAD_ENVIRONMENT_H
#define EIGEN_CXX11_THREADPOOL_THREAD_ENVIRONMENT_H
#include "./InternalHeaderCheck.h"
namespace Eigen {
struct StlThreadEnvironment {

View File

@@ -18,10 +18,7 @@
#else
#if EIGEN_MAX_CPP_VER >= 11 && \
((EIGEN_COMP_GNUC && EIGEN_GNUC_AT_LEAST(4, 8)) || \
__has_feature(cxx_thread_local) || \
(EIGEN_COMP_MSVC >= 1900) )
#if ((EIGEN_COMP_GNUC) || __has_feature(cxx_thread_local) || EIGEN_COMP_MSVC )
#define EIGEN_THREAD_LOCAL static thread_local
#endif
@@ -62,6 +59,8 @@
#endif // EIGEN_AVOID_THREAD_LOCAL
#include "./InternalHeaderCheck.h"
namespace Eigen {
namespace internal {

View File

@@ -10,6 +10,8 @@
#ifndef EIGEN_CXX11_THREADPOOL_THREAD_POOL_INTERFACE_H
#define EIGEN_CXX11_THREADPOOL_THREAD_POOL_INTERFACE_H
#include "./InternalHeaderCheck.h"
namespace Eigen {
// This defines an interface that ThreadPoolDevice can take to use

View File

@@ -11,10 +11,6 @@
#define EIGEN_CXX11_THREADPOOL_THREAD_YIELD_H
// Try to come up with a portable way to yield
#if EIGEN_COMP_GNUC && EIGEN_GNUC_AT_MOST(4, 7)
#define EIGEN_THREAD_YIELD() sched_yield()
#else
#define EIGEN_THREAD_YIELD() std::this_thread::yield()
#endif
#endif // EIGEN_CXX11_THREADPOOL_THREAD_YIELD_H

View File

@@ -35,7 +35,8 @@ template<typename T, T... nn>
struct numeric_list { constexpr static std::size_t count = sizeof...(nn); };
template<typename T, T n, T... nn>
struct numeric_list<T, n, nn...> { static const std::size_t count = sizeof...(nn) + 1; const static T first_value = n; };
struct numeric_list<T, n, nn...> { static constexpr std::size_t count = sizeof...(nn) + 1;
static constexpr T first_value = n; };
#ifndef EIGEN_PARSED_BY_DOXYGEN
/* numeric list constructors
@@ -81,7 +82,8 @@ template<typename a, typename... as> struct take<0, type_list<a, as...>>
template<> struct take<0, type_list<>> { typedef type_list<> type; };
template<typename T, int n, T a, T... as> struct take<n, numeric_list<T, a, as...>> : concat<numeric_list<T, a>, typename take<n-1, numeric_list<T, as...>>::type> {};
template<typename T, int n> struct take<n, numeric_list<T>> { typedef numeric_list<T> type; };
// XXX The following breaks in gcc-11, and is invalid anyways.
// template<typename T, int n> struct take<n, numeric_list<T>> { typedef numeric_list<T> type; };
template<typename T, T a, T... as> struct take<0, numeric_list<T, a, as...>> { typedef numeric_list<T> type; };
template<typename T> struct take<0, numeric_list<T>> { typedef numeric_list<T> type; };

View File

@@ -27,18 +27,6 @@
#error GNU C++ Compiler (g++) only supports required C++ features since version 4.6.
#endif
/* Check that the compiler at least claims to support C++11. It might not be sufficient
* because the compiler may not implement it correctly, but at least we'll know.
* On the other hand, visual studio still doesn't claim to support C++11 although it's
* compliant enugh for our purpose.
*/
#if (EIGEN_COMP_CXXVER < 11)
#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER)
#pragma GCC diagnostic error "-Wfatal-errors"
#endif
#error This library needs at least a C++11 compliant compiler. If you use g++/clang, please enable the -std=c++11 compiler flag. (-std=c++0x on older versions.)
#endif
namespace Eigen {
namespace internal {

View File

@@ -10,16 +10,43 @@
#ifndef EIGEN_EMULATE_ARRAY_H
#define EIGEN_EMULATE_ARRAY_H
// The array class is only available starting with cxx11. Emulate our own here
// if needed. Beware, msvc still doesn't advertise itself as a c++11 compiler!
// Moreover, CUDA doesn't support the STL containers, so we use our own instead.
#if (__cplusplus <= 199711L && EIGEN_COMP_MSVC < 1900) || defined(EIGEN_GPUCC) || defined(EIGEN_AVOID_STL_ARRAY)
// CUDA doesn't support the STL containers, so we use our own instead.
#if defined(EIGEN_GPUCC) || defined(EIGEN_AVOID_STL_ARRAY)
namespace Eigen {
template <typename T, size_t n> class array {
public:
typedef T value_type;
typedef T* iterator;
typedef const T* const_iterator;
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE iterator begin() { return values; }
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const_iterator begin() const { return values; }
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE iterator end() { return values + n; }
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const_iterator end() const { return values + n; }
#if !defined(EIGEN_GPUCC)
typedef std::reverse_iterator<iterator> reverse_iterator;
typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE reverse_iterator rbegin() { return reverse_iterator(end());}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); }
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE reverse_iterator rend() { return reverse_iterator(begin()); }
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const_reverse_iterator rend() const { return const_reverse_iterator(begin()); }
#endif
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE T& operator[] (size_t index) { eigen_internal_assert(index < size()); return values[index]; }
EIGEN_DEVICE_FUNC
@@ -40,6 +67,7 @@ template <typename T, size_t n> class array {
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const T& back() const { return values[n-1]; }
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
static std::size_t size() { return n; }
@@ -122,13 +150,11 @@ template <typename T, size_t n> class array {
values[7] = v8;
}
#if EIGEN_HAS_VARIADIC_TEMPLATES
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE array(std::initializer_list<T> l) {
eigen_assert(l.size() == n);
internal::smart_copy(l.begin(), l.end(), values);
}
#endif
};
@@ -172,12 +198,10 @@ template <typename T> class array<T, 0> {
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE array() : dummy() { }
#if EIGEN_HAS_VARIADIC_TEMPLATES
EIGEN_DEVICE_FUNC array(std::initializer_list<T> l) : dummy() {
EIGEN_UNUSED_VARIABLE(l);
eigen_assert(l.size() == 0);
}
#endif
private:
T dummy;
@@ -226,6 +250,7 @@ template<class T, std::size_t N> struct array_size<const array<T,N>& > {
// The compiler supports c++11, and we're not targeting cuda: use std::array as Eigen::array
#include <array>
namespace Eigen {
template <typename T, std::size_t N> using array = std::array<T, N>;

View File

@@ -29,7 +29,7 @@ namespace Eigen {
*/
template <typename T>
class MaxSizeVector {
static const size_t alignment = EIGEN_PLAIN_ENUM_MAX(EIGEN_ALIGNOF(T), sizeof(void*));
static const size_t alignment = internal::plain_enum_max(EIGEN_ALIGNOF(T), sizeof(void*));
public:
// Construct a new MaxSizeVector, reserve n elements.
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE

View File

@@ -7,8 +7,8 @@
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_FFT_H
#define EIGEN_FFT_H
#ifndef EIGEN_FFT_MODULE_H
#define EIGEN_FFT_MODULE_H
#include <complex>
#include <vector>
@@ -29,10 +29,19 @@
* The default implementation is based on kissfft. It is a small, free, and
* reasonably efficient default.
*
* There are currently two implementation backend:
* There are currently four implementation backend:
*
* - kissfft(https://github.com/mborgerding/kissfft) : Simple and not so fast, BSD-3-Clause.
* It is a mixed-radix Fast Fourier Transform based up on the principle, "Keep It Simple, Stupid."
* Notice that:kissfft fails to handle "atypically-sized" inputs(i.e., sizes with large factors),a workaround is using fftw or pocketfft.
* - fftw (http://www.fftw.org) : faster, GPL -- incompatible with Eigen in LGPL form, bigger code size.
* - MKL (http://en.wikipedia.org/wiki/Math_Kernel_Library) : fastest, commercial -- may be incompatible with Eigen in GPL form.
* - MKL (https://www.intel.com/content/www/us/en/developer/tools/oneapi/onemkl-download.html) : fastest, free -- may be incompatible with Eigen in GPL form.
* - pocketfft (https://gitlab.mpcdf.mpg.de/mtr/pocketfft) : faster than kissfft, BSD 3-clause.
* It is a heavily modified implementation of FFTPack, with the following advantages:
* 1.strictly C++11 compliant
* 2.more accurate twiddle factor computation
* 3.very fast plan generation
* 4.worst case complexity for transform sizes with large prime factors is N*log(N), because Bluestein's algorithm is used for these cases
*
* \section FFTDesign Design
*
@@ -79,15 +88,21 @@
template <typename T> struct default_fft_impl : public internal::fftw_impl<T> {};
}
#elif defined EIGEN_MKL_DEFAULT
// TODO
// intel Math Kernel Library: fastest, commercial -- may be incompatible with Eigen in GPL form
// intel Math Kernel Library: fastest, free -- may be incompatible with Eigen in GPL form
# include "src/FFT/ei_imklfft_impl.h"
namespace Eigen {
template <typename T> struct default_fft_impl : public internal::imklfft_impl {};
template <typename T> struct default_fft_impl : public internal::imklfft::imklfft_impl<T> {};
}
#else
#elif defined EIGEN_POCKETFFT_DEFAULT
// internal::pocketfft_impl: a heavily modified implementation of FFTPack, with many advantages.
# include<pocketfft_hdronly.h>
# include"src/FFT/ei_pocketfft_impl.h"
namespace Eigen {
template <typename T>
struct default_fft_impl : public internal::pocketfft_impl<T> {};
}
#else
// internal::kissfft_impl: small, free, reasonably efficient default, derived from kissfft
//
# include "src/FFT/ei_kissfft_impl.h"
namespace Eigen {
template <typename T>
@@ -195,19 +210,19 @@ class FFT
m_impl.fwd(dst,src,static_cast<int>(nfft));
}
/*
#if defined EIGEN_FFTW_DEFAULT || defined EIGEN_POCKETFFT_DEFAULT || defined EIGEN_MKL_DEFAULT
inline
void fwd2(Complex * dst, const Complex * src, int n0,int n1)
{
m_impl.fwd2(dst,src,n0,n1);
}
*/
#endif
template <typename _Input>
template <typename Input_>
inline
void fwd( std::vector<Complex> & dst, const std::vector<_Input> & src)
void fwd( std::vector<Complex> & dst, const std::vector<Input_> & src)
{
if ( NumTraits<_Input>::IsComplex == 0 && HasFlag(HalfSpectrum) )
if ( NumTraits<Input_>::IsComplex == 0 && HasFlag(HalfSpectrum) )
dst.resize( (src.size()>>1)+1); // half the bins + Nyquist bin
else
dst.resize(src.size());
@@ -343,19 +358,18 @@ class FFT
}
}
template <typename _Output>
template <typename Output_>
inline
void inv( std::vector<_Output> & dst, const std::vector<Complex> & src,Index nfft=-1)
void inv( std::vector<Output_> & dst, const std::vector<Complex> & src,Index nfft=-1)
{
if (nfft<1)
nfft = ( NumTraits<_Output>::IsComplex == 0 && HasFlag(HalfSpectrum) ) ? 2*(src.size()-1) : src.size();
nfft = ( NumTraits<Output_>::IsComplex == 0 && HasFlag(HalfSpectrum) ) ? 2*(src.size()-1) : src.size();
dst.resize( nfft );
inv( &dst[0],&src[0],nfft);
}
/*
// TODO: multi-dimensional FFTs
#if defined EIGEN_FFTW_DEFAULT || defined EIGEN_POCKETFFT_DEFAULT || defined EIGEN_MKL_DEFAULT
inline
void inv2(Complex * dst, const Complex * src, int n0,int n1)
{
@@ -363,7 +377,8 @@ class FFT
if ( HasFlag( Unscaled ) == false)
scale(dst,1./(n0*n1),n0*n1);
}
*/
#endif
inline
impl_type & impl() {return m_impl;}

View File

@@ -16,15 +16,61 @@
/**
* \defgroup IterativeLinearSolvers_Module Iterative solvers module
* \defgroup IterativeLinearSolvers_Module Iterative Solvers module
* This module aims to provide various iterative linear and non linear solver algorithms.
* It currently provides:
* - a constrained conjugate gradient
* - a Householder GMRES implementation
* - an IDR(s) implementation
* - a BiCGSTAB(L) implementation
* - a DGMRES implementation
* - a MINRES implementation
* - a IDRSTABL implementation
*
* Choosing the best solver for solving \c A \c x = \c b depends a lot on the preconditioner chosen as well as the properties of \c A. The following flowchart might help you.
* \dot width=50%
* digraph g {
* node [ fontname=Arial, fontsize=11];
* edge [ fontname=Helvetica, fontsize=10 ];
* A1[label="hermitian",shape="box"];
* A2[label="positive definite",shape="box"];
* CG[shape="plaintext"];
* A3[label="ill conditioned",shape="box"];
* A4[label="good preconditioner",shape="box"];
* A5[label="flexible preconditioner",shape="box"];
* A6[label="strongly indefinite",shape="box"];
* A8[label="large imaginary eigenvalue",shape="box"];
* A7[label="large imaginary eigenvalue",shape="box"];
*
* SYMMLQ[shape="plaintext"];
* MINRES[shape="plaintext"];
* GCR[shape="plaintext"];
* GMRES[shape="plaintext"];
* IDRSTABL[shape="plaintext"];
* IDRS[shape="plaintext"];
* BICGSTABL[shape="plaintext"];
* BICGSTAB[shape="plaintext"];
*
* A1 -> A2 [label="yes"];
* A2 -> CG [label="yes"];
* A2 -> A3 [label="no"];
* A3 -> SYMMLQ [label="yes"];
* A3 -> MINRES [label="no"];
*
* A1 -> A4 [label="no"];
* A4 -> A5 [label="yes"];
* A5 -> GCR [label="yes"];
* A5 -> GMRES [label="no"];
*
* A4 -> A6 [label="no"];
* A6 -> A8 [label="yes"];
* A6 -> A7 [label="no"];
* A7 -> BICGSTABL [label="yes"];
* A7 -> BICGSTAB [label="no"];
* A8 -> IDRSTABL [label="yes"];
* A8 -> IDRS [label="no"];
* }
* \enddot
* \code
* #include <unsupported/Eigen/IterativeSolvers>
* \endcode
@@ -41,9 +87,10 @@
#include "src/IterativeSolvers/IncompleteLU.h"
#include "src/IterativeSolvers/GMRES.h"
#include "src/IterativeSolvers/DGMRES.h"
//#include "src/IterativeSolvers/SSORPreconditioner.h"
#include "src/IterativeSolvers/MINRES.h"
#include "src/IterativeSolvers/IDRS.h"
#include "src/IterativeSolvers/BiCGSTABL.h"
#include "src/IterativeSolvers/IDRSTABL.h"
#include "../../Eigen/src/Core/util/ReenableStupidWarnings.h"

View File

@@ -10,10 +10,7 @@
#define EIGEN_KRONECKER_PRODUCT_MODULE_H
#include "../../Eigen/Core"
#include "../../Eigen/src/Core/util/DisableStupidWarnings.h"
#include "../../Eigen/src/SparseCore/SparseUtil.h"
#include "../../Eigen/SparseCore"
namespace Eigen {

View File

@@ -7,8 +7,8 @@
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_LEVENBERGMARQUARDT_MODULE
#define EIGEN_LEVENBERGMARQUARDT_MODULE
#ifndef EIGEN_LEVENBERGMARQUARDT_MODULE_H
#define EIGEN_LEVENBERGMARQUARDT_MODULE_H
// #include <vector>
@@ -46,4 +46,4 @@
#include "../../Eigen/src/Core/util/ReenableStupidWarnings.h"
#endif // EIGEN_LEVENBERGMARQUARDT_MODULE
#endif // EIGEN_LEVENBERGMARQUARDT_MODULE_H

Some files were not shown because too many files have changed in this diff Show More